membot 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +81 -24
- package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
- package/scripts/apply-transformers-patch.sh +35 -0
- package/src/cli.ts +70 -0
- package/src/commands/check-update.ts +69 -0
- package/src/commands/mcpx.ts +112 -0
- package/src/commands/reindex.ts +53 -0
- package/src/commands/serve.ts +58 -0
- package/src/commands/upgrade.ts +220 -0
- package/src/config/loader.ts +100 -0
- package/src/config/schemas.ts +39 -0
- package/src/constants.ts +42 -0
- package/src/context.ts +80 -0
- package/src/db/blobs.ts +53 -0
- package/src/db/chunks.ts +176 -0
- package/src/db/connection.ts +173 -0
- package/src/db/files.ts +325 -0
- package/src/db/migrations/001-init.ts +63 -0
- package/src/db/migrations/002-fts.ts +12 -0
- package/src/db/migrations.ts +45 -0
- package/src/errors.ts +87 -0
- package/src/ingest/chunker.ts +117 -0
- package/src/ingest/converter/docx.ts +15 -0
- package/src/ingest/converter/html.ts +20 -0
- package/src/ingest/converter/image.ts +71 -0
- package/src/ingest/converter/index.ts +119 -0
- package/src/ingest/converter/llm.ts +66 -0
- package/src/ingest/converter/ocr.ts +51 -0
- package/src/ingest/converter/pdf.ts +38 -0
- package/src/ingest/converter/text.ts +8 -0
- package/src/ingest/describer.ts +72 -0
- package/src/ingest/embedder.ts +83 -0
- package/src/ingest/fetcher.ts +280 -0
- package/src/ingest/ingest.ts +444 -0
- package/src/ingest/local-reader.ts +64 -0
- package/src/ingest/search-text.ts +18 -0
- package/src/ingest/source-resolver.ts +186 -0
- package/src/mcp/instructions.ts +34 -0
- package/src/mcp/server.ts +101 -0
- package/src/mount/commander.ts +174 -0
- package/src/mount/mcp.ts +111 -0
- package/src/mount/zod-to-cli.ts +158 -0
- package/src/operations/add.ts +69 -0
- package/src/operations/diff.ts +105 -0
- package/src/operations/index.ts +38 -0
- package/src/operations/info.ts +95 -0
- package/src/operations/list.ts +87 -0
- package/src/operations/move.ts +83 -0
- package/src/operations/prune.ts +80 -0
- package/src/operations/read.ts +102 -0
- package/src/operations/refresh.ts +72 -0
- package/src/operations/remove.ts +35 -0
- package/src/operations/search.ts +72 -0
- package/src/operations/tree.ts +103 -0
- package/src/operations/types.ts +81 -0
- package/src/operations/versions.ts +78 -0
- package/src/operations/write.ts +77 -0
- package/src/output/formatter.ts +68 -0
- package/src/output/logger.ts +114 -0
- package/src/output/progress.ts +78 -0
- package/src/output/tty.ts +91 -0
- package/src/refresh/runner.ts +296 -0
- package/src/refresh/scheduler.ts +54 -0
- package/src/sdk.ts +27 -0
- package/src/search/hybrid.ts +100 -0
- package/src/search/keyword.ts +62 -0
- package/src/search/semantic.ts +56 -0
- package/src/update/background.ts +73 -0
- package/src/update/cache.ts +40 -0
- package/src/update/checker.ts +117 -0
- package/.claude/settings.local.json +0 -7
- package/CLAUDE.md +0 -139
- package/docs/plan.md +0 -905
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import { tmpdir } from "node:os";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { dim, green, red, yellow } from "ansis";
|
|
4
|
+
import { $ } from "bun";
|
|
5
|
+
import type { Command } from "commander";
|
|
6
|
+
import { createSpinner } from "nanospinner";
|
|
7
|
+
import pkg from "../../package.json" with { type: "json" };
|
|
8
|
+
import { clearUpdateCache, loadUpdateCache, saveUpdateCache } from "../update/cache.ts";
|
|
9
|
+
import {
|
|
10
|
+
checkForUpdate,
|
|
11
|
+
detectInstallMethod,
|
|
12
|
+
type InstallMethod,
|
|
13
|
+
needsCheck,
|
|
14
|
+
type UpdateCache,
|
|
15
|
+
} from "../update/checker.ts";
|
|
16
|
+
|
|
17
|
+
const GITHUB_REPO = pkg.repository.url.replace(/^https:\/\/github\.com\//, "").replace(/\.git$/, "");
|
|
18
|
+
|
|
19
|
+
/** Build the platform-specific release artifact name (e.g. `membot-linux-x64`, `membot-windows-arm64.exe`). */
|
|
20
|
+
function platformArtifactName(): string {
|
|
21
|
+
let os: string;
|
|
22
|
+
let ext = "";
|
|
23
|
+
switch (process.platform) {
|
|
24
|
+
case "darwin":
|
|
25
|
+
os = "darwin";
|
|
26
|
+
break;
|
|
27
|
+
case "win32":
|
|
28
|
+
os = "windows";
|
|
29
|
+
ext = ".exe";
|
|
30
|
+
break;
|
|
31
|
+
default:
|
|
32
|
+
os = "linux";
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
const arch = process.arch === "arm64" ? "arm64" : "x64";
|
|
36
|
+
return `membot-${os}-${arch}${ext}`;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Run `bun install -g` or `npm install -g` and return whether it succeeded. */
|
|
40
|
+
async function upgradeWithPackageManager(command: string, args: string[]): Promise<boolean> {
|
|
41
|
+
const result = await $`${command} ${args}`.nothrow();
|
|
42
|
+
return result.exitCode === 0;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Download the platform binary for `latestVersion` from GitHub releases and replace
|
|
47
|
+
* the running executable in place. Falls back to `sudo mv` if the target is non-writable.
|
|
48
|
+
*/
|
|
49
|
+
async function upgradeFromBinary(latestVersion: string): Promise<boolean> {
|
|
50
|
+
const artifact = platformArtifactName();
|
|
51
|
+
const tag = `v${latestVersion}`;
|
|
52
|
+
const url = `https://github.com/${GITHUB_REPO}/releases/download/${tag}/${artifact}`;
|
|
53
|
+
|
|
54
|
+
const tmpPath = join(tmpdir(), `membot-upgrade-${Date.now()}`);
|
|
55
|
+
const targetPath = process.execPath;
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const res = await fetch(url);
|
|
59
|
+
if (!res.ok) {
|
|
60
|
+
console.error(red(`Failed to download binary: HTTP ${res.status}`));
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const bytes = await res.arrayBuffer();
|
|
65
|
+
await Bun.write(tmpPath, bytes);
|
|
66
|
+
|
|
67
|
+
await $`chmod +x ${tmpPath}`.quiet();
|
|
68
|
+
|
|
69
|
+
const mv = await $`mv ${tmpPath} ${targetPath}`.quiet().nothrow();
|
|
70
|
+
|
|
71
|
+
if (mv.exitCode !== 0) {
|
|
72
|
+
console.log(dim("Requires elevated permissions..."));
|
|
73
|
+
const sudo = await $`sudo mv ${tmpPath} ${targetPath}`.nothrow();
|
|
74
|
+
if (sudo.exitCode !== 0) {
|
|
75
|
+
console.error(red("Failed to install binary. Try running with sudo."));
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return true;
|
|
81
|
+
} catch (err) {
|
|
82
|
+
console.error(red(`Failed to upgrade binary: ${err}`));
|
|
83
|
+
await $`rm -f ${tmpPath}`.quiet().nothrow();
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Register `membot upgrade`. Detects the install method (npm/bun/binary/local-dev),
|
|
90
|
+
* uses cached update info if fresh, then performs the appropriate self-update.
|
|
91
|
+
* Emits structured JSON when `--json` is set.
|
|
92
|
+
*/
|
|
93
|
+
export function registerUpgradeCommand(program: Command) {
|
|
94
|
+
program
|
|
95
|
+
.command("upgrade")
|
|
96
|
+
.description("Upgrade membot to the latest version")
|
|
97
|
+
.action(async () => {
|
|
98
|
+
const opts = program.opts();
|
|
99
|
+
const json = !!(opts.json as boolean | undefined);
|
|
100
|
+
const isTTY = process.stderr.isTTY ?? false;
|
|
101
|
+
|
|
102
|
+
const spinner =
|
|
103
|
+
!json && isTTY ? createSpinner("Checking for updates...", { stream: process.stderr }).start() : null;
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const cache = await loadUpdateCache();
|
|
107
|
+
let latestVersion: string;
|
|
108
|
+
let hasUpdate: boolean;
|
|
109
|
+
|
|
110
|
+
if (!needsCheck(cache) && cache) {
|
|
111
|
+
latestVersion = cache.latestVersion;
|
|
112
|
+
hasUpdate = cache.hasUpdate;
|
|
113
|
+
} else {
|
|
114
|
+
const info = await checkForUpdate(pkg.version);
|
|
115
|
+
latestVersion = info.latestVersion;
|
|
116
|
+
hasUpdate = info.hasUpdate;
|
|
117
|
+
|
|
118
|
+
const newCache: UpdateCache = {
|
|
119
|
+
lastCheckAt: new Date().toISOString(),
|
|
120
|
+
latestVersion,
|
|
121
|
+
hasUpdate,
|
|
122
|
+
changelog: info.changelog,
|
|
123
|
+
};
|
|
124
|
+
await saveUpdateCache(newCache);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (!hasUpdate) {
|
|
128
|
+
spinner?.stop();
|
|
129
|
+
if (json) {
|
|
130
|
+
console.log(
|
|
131
|
+
JSON.stringify({
|
|
132
|
+
upgraded: false,
|
|
133
|
+
currentVersion: pkg.version,
|
|
134
|
+
message: "Already up to date",
|
|
135
|
+
}),
|
|
136
|
+
);
|
|
137
|
+
} else {
|
|
138
|
+
console.log(green(`membot is already up to date (v${pkg.version})`));
|
|
139
|
+
}
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const method: InstallMethod = detectInstallMethod();
|
|
144
|
+
spinner?.update({
|
|
145
|
+
text: `Upgrading from v${pkg.version} to v${latestVersion} (${method})...`,
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
let success = false;
|
|
149
|
+
|
|
150
|
+
switch (method) {
|
|
151
|
+
case "bun":
|
|
152
|
+
spinner?.stop();
|
|
153
|
+
success = await upgradeWithPackageManager("bun", ["install", "-g", `${pkg.name}@${latestVersion}`]);
|
|
154
|
+
break;
|
|
155
|
+
|
|
156
|
+
case "npm":
|
|
157
|
+
spinner?.stop();
|
|
158
|
+
success = await upgradeWithPackageManager("npm", ["install", "-g", `${pkg.name}@${latestVersion}`]);
|
|
159
|
+
break;
|
|
160
|
+
|
|
161
|
+
case "binary":
|
|
162
|
+
spinner?.stop();
|
|
163
|
+
success = await upgradeFromBinary(latestVersion);
|
|
164
|
+
break;
|
|
165
|
+
|
|
166
|
+
case "local-dev":
|
|
167
|
+
spinner?.stop();
|
|
168
|
+
if (json) {
|
|
169
|
+
console.log(
|
|
170
|
+
JSON.stringify({
|
|
171
|
+
upgraded: false,
|
|
172
|
+
currentVersion: pkg.version,
|
|
173
|
+
latestVersion,
|
|
174
|
+
installMethod: "local-dev",
|
|
175
|
+
message: "Running from source. Use `git pull && bun install` to update.",
|
|
176
|
+
}),
|
|
177
|
+
);
|
|
178
|
+
} else {
|
|
179
|
+
console.log(yellow("Running from source. Use `git pull && bun install` to update."));
|
|
180
|
+
}
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (success) {
|
|
185
|
+
await clearUpdateCache();
|
|
186
|
+
if (json) {
|
|
187
|
+
console.log(
|
|
188
|
+
JSON.stringify({
|
|
189
|
+
upgraded: true,
|
|
190
|
+
previousVersion: pkg.version,
|
|
191
|
+
newVersion: latestVersion,
|
|
192
|
+
installMethod: method,
|
|
193
|
+
}),
|
|
194
|
+
);
|
|
195
|
+
} else {
|
|
196
|
+
console.log(green(`Successfully upgraded membot: v${pkg.version} → v${latestVersion}`));
|
|
197
|
+
}
|
|
198
|
+
} else {
|
|
199
|
+
if (json) {
|
|
200
|
+
console.log(
|
|
201
|
+
JSON.stringify({
|
|
202
|
+
upgraded: false,
|
|
203
|
+
currentVersion: pkg.version,
|
|
204
|
+
latestVersion,
|
|
205
|
+
installMethod: method,
|
|
206
|
+
message: "Upgrade failed",
|
|
207
|
+
}),
|
|
208
|
+
);
|
|
209
|
+
} else {
|
|
210
|
+
console.error(red("Upgrade failed. See errors above."));
|
|
211
|
+
}
|
|
212
|
+
process.exit(1);
|
|
213
|
+
}
|
|
214
|
+
} catch (err) {
|
|
215
|
+
spinner?.error({ text: "Upgrade failed" });
|
|
216
|
+
console.error(String(err));
|
|
217
|
+
process.exit(1);
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { mkdir } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { defaultMembotHome, ENV, FILES } from "../constants.ts";
|
|
4
|
+
import { asHelpful, HelpfulError } from "../errors.ts";
|
|
5
|
+
import { type MembotConfig, MembotConfigSchema } from "./schemas.ts";
|
|
6
|
+
|
|
7
|
+
export interface LoadConfigOptions {
|
|
8
|
+
configFlag?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Resolve, read, and validate `~/.membot/config.json`. The directory is
|
|
13
|
+
* created if missing. Environment variables (ANTHROPIC_API_KEY) take
|
|
14
|
+
* precedence over the on-disk values for sensitive fields.
|
|
15
|
+
*/
|
|
16
|
+
export async function loadConfig(options: LoadConfigOptions = {}): Promise<{
|
|
17
|
+
config: MembotConfig;
|
|
18
|
+
dataDir: string;
|
|
19
|
+
configPath: string;
|
|
20
|
+
}> {
|
|
21
|
+
const dataDir = resolveDataDir(options.configFlag);
|
|
22
|
+
await mkdir(dataDir, { recursive: true });
|
|
23
|
+
|
|
24
|
+
const configPath = resolve(dataDir, FILES.CONFIG_JSON);
|
|
25
|
+
let raw: unknown = {};
|
|
26
|
+
const file = Bun.file(configPath);
|
|
27
|
+
if (await file.exists()) {
|
|
28
|
+
try {
|
|
29
|
+
raw = JSON.parse(await file.text());
|
|
30
|
+
} catch (err) {
|
|
31
|
+
throw asHelpful(
|
|
32
|
+
err,
|
|
33
|
+
`while parsing ${configPath}`,
|
|
34
|
+
`Fix the JSON in ${configPath}, or delete it to regenerate defaults.`,
|
|
35
|
+
"input_error",
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let config: MembotConfig;
|
|
41
|
+
try {
|
|
42
|
+
config = MembotConfigSchema.parse(raw);
|
|
43
|
+
} catch (err) {
|
|
44
|
+
throw asHelpful(
|
|
45
|
+
err,
|
|
46
|
+
`while validating ${configPath}`,
|
|
47
|
+
`Check ${configPath} against the documented schema, or delete it to regenerate defaults.`,
|
|
48
|
+
"input_error",
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const envKey = process.env[ENV.ANTHROPIC_API_KEY];
|
|
53
|
+
if (envKey?.trim()) {
|
|
54
|
+
config = { ...config, llm: { ...config.llm, anthropic_api_key: envKey } };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (config.data_dir !== dataDir) {
|
|
58
|
+
config = { ...config, data_dir: dataDir };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { config, dataDir, configPath };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Pick the membot data directory. Precedence: explicit `--config` flag,
|
|
66
|
+
* then `MEMBOT_HOME` env var, then `~/.membot`. The chosen path is later
|
|
67
|
+
* created (recursive mkdir) and stamped back into `config.data_dir`.
|
|
68
|
+
*/
|
|
69
|
+
function resolveDataDir(flag?: string): string {
|
|
70
|
+
if (flag?.trim()) return resolve(flag);
|
|
71
|
+
const env = process.env[ENV.HOME];
|
|
72
|
+
if (env?.trim()) return resolve(env);
|
|
73
|
+
return defaultMembotHome();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Persist config to disk, with the Anthropic API key blanked out — the env
|
|
78
|
+
* var (`ANTHROPIC_API_KEY`) is the source of truth, never the file. Writing
|
|
79
|
+
* the key to disk would land it in shell history, dotfile syncs, and
|
|
80
|
+
* accidental commits.
|
|
81
|
+
*/
|
|
82
|
+
export async function saveConfig(configPath: string, config: MembotConfig): Promise<void> {
|
|
83
|
+
const safe: MembotConfig = {
|
|
84
|
+
...config,
|
|
85
|
+
llm: { ...config.llm, anthropic_api_key: "" },
|
|
86
|
+
};
|
|
87
|
+
await Bun.write(configPath, `${JSON.stringify(safe, null, 2)}\n`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Tree-shaking guard. Not called at runtime — its presence keeps the module
|
|
92
|
+
* from being eliminated by aggressive bundlers when only types are imported.
|
|
93
|
+
*/
|
|
94
|
+
export function _ensureExportedSentinel(): never {
|
|
95
|
+
throw new HelpfulError({
|
|
96
|
+
kind: "internal_error",
|
|
97
|
+
message: "sentinel called",
|
|
98
|
+
hint: "This function exists only for tree-shaking sanity checks.",
|
|
99
|
+
});
|
|
100
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { DEFAULTS, defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
|
|
3
|
+
|
|
4
|
+
export const ChunkerConfigSchema = z.object({
|
|
5
|
+
mode: z.enum(["deterministic", "llm"]).default(DEFAULTS.CHUNKER_MODE),
|
|
6
|
+
target_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_TARGET_CHARS),
|
|
7
|
+
max_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_MAX_CHARS),
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
export const LlmConfigSchema = z.object({
|
|
11
|
+
anthropic_api_key: z.string().default(""),
|
|
12
|
+
converter_model: z.string().default(DEFAULTS.CONVERTER_MODEL),
|
|
13
|
+
chunker_model: z.string().default(DEFAULTS.CHUNKER_MODEL),
|
|
14
|
+
describer_model: z.string().default(DEFAULTS.DESCRIBER_MODEL),
|
|
15
|
+
vision_model: z.string().default(DEFAULTS.VISION_MODEL),
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
export const McpxConfigSchema = z.object({
|
|
19
|
+
config_path: z.string().default(""),
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
export const DaemonConfigSchema = z.object({
|
|
23
|
+
tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
export const MembotConfigSchema = z.object({
|
|
27
|
+
data_dir: z.string().default(defaultMembotHome()),
|
|
28
|
+
embedding_model: z.string().default(EMBEDDING_MODEL),
|
|
29
|
+
embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
|
|
30
|
+
chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
|
|
31
|
+
llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
|
|
32
|
+
mcpx: McpxConfigSchema.default(() => McpxConfigSchema.parse({})),
|
|
33
|
+
daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
|
|
34
|
+
default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
export type MembotConfig = z.infer<typeof MembotConfigSchema>;
|
|
38
|
+
export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
|
|
39
|
+
export type LlmConfig = z.infer<typeof LlmConfigSchema>;
|
package/src/constants.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { homedir } from "node:os";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
/** Default data directory: `~/.membot`. Override via $MEMBOT_HOME or `--config`. */
|
|
5
|
+
export function defaultMembotHome(): string {
|
|
6
|
+
const env = process.env.MEMBOT_HOME;
|
|
7
|
+
if (env?.trim()) return env;
|
|
8
|
+
return join(homedir(), ".membot");
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const ENV = {
|
|
12
|
+
HOME: "MEMBOT_HOME",
|
|
13
|
+
CONFIG: "MEMBOT_CONFIG",
|
|
14
|
+
DEBUG: "MEMBOT_DEBUG",
|
|
15
|
+
ANTHROPIC_API_KEY: "ANTHROPIC_API_KEY",
|
|
16
|
+
MCPX_CONFIG_PATH: "MCP_CONFIG_PATH",
|
|
17
|
+
NO_UPDATE_CHECK: "MEMBOT_NO_UPDATE_CHECK",
|
|
18
|
+
} as const;
|
|
19
|
+
|
|
20
|
+
export const EMBEDDING_MODEL = "Xenova/bge-small-en-v1.5";
|
|
21
|
+
export const EMBEDDING_DIMENSION = 384;
|
|
22
|
+
|
|
23
|
+
export const DEFAULTS = {
|
|
24
|
+
CHUNKER_MODE: "deterministic" as const,
|
|
25
|
+
CHUNKER_TARGET_CHARS: 4_000,
|
|
26
|
+
CHUNKER_MAX_CHARS: 15_000,
|
|
27
|
+
DAEMON_TICK_SEC: 60,
|
|
28
|
+
HTTP_TIMEOUT_MS: 30_000,
|
|
29
|
+
CONVERTER_MODEL: "claude-haiku-4-5-20251001",
|
|
30
|
+
CHUNKER_MODEL: "claude-haiku-4-5-20251001",
|
|
31
|
+
DESCRIBER_MODEL: "claude-haiku-4-5-20251001",
|
|
32
|
+
VISION_MODEL: "claude-haiku-4-5-20251001",
|
|
33
|
+
UPDATE_CHECK_INTERVAL_MS: 24 * 60 * 60 * 1000,
|
|
34
|
+
UPDATE_CHECK_TIMEOUT_MS: 5_000,
|
|
35
|
+
} as const;
|
|
36
|
+
|
|
37
|
+
export const FILES = {
|
|
38
|
+
CONFIG_JSON: "config.json",
|
|
39
|
+
INDEX_DUCKDB: "index.duckdb",
|
|
40
|
+
MODELS_DIR: "models",
|
|
41
|
+
LOGS_DIR: "logs",
|
|
42
|
+
} as const;
|
package/src/context.ts
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { McpxClient } from "@evantahler/mcpx";
|
|
3
|
+
import { loadConfig } from "./config/loader.ts";
|
|
4
|
+
import type { MembotConfig } from "./config/schemas.ts";
|
|
5
|
+
import { ENV, FILES } from "./constants.ts";
|
|
6
|
+
import { type DbConnection, openDb } from "./db/connection.ts";
|
|
7
|
+
import { logger } from "./output/logger.ts";
|
|
8
|
+
import type { Progress } from "./output/progress.ts";
|
|
9
|
+
import { createProgress } from "./output/progress.ts";
|
|
10
|
+
import { detectMode, setMode } from "./output/tty.ts";
|
|
11
|
+
|
|
12
|
+
export interface AppContext {
|
|
13
|
+
config: MembotConfig;
|
|
14
|
+
dataDir: string;
|
|
15
|
+
configPath: string;
|
|
16
|
+
db: DbConnection;
|
|
17
|
+
logger: typeof logger;
|
|
18
|
+
progress: Progress;
|
|
19
|
+
mcpx: McpxClient | null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface BuildContextOptions {
|
|
23
|
+
configFlag?: string;
|
|
24
|
+
json?: boolean;
|
|
25
|
+
verbose?: boolean;
|
|
26
|
+
noColor?: boolean;
|
|
27
|
+
noInteractive?: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Build the AppContext used by every operation handler. Initializes:
|
|
32
|
+
* - output mode (TTY/JSON/color detection — frozen for the rest of the run)
|
|
33
|
+
* - config (~/.membot/config.json with env overrides)
|
|
34
|
+
* - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
|
|
35
|
+
* - mcpx client (lazy — opened on first remote fetch; null when no servers)
|
|
36
|
+
*/
|
|
37
|
+
export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
|
|
38
|
+
setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
|
|
39
|
+
|
|
40
|
+
const { config, dataDir, configPath } = await loadConfig({ configFlag: options.configFlag });
|
|
41
|
+
const dbPath = join(dataDir, FILES.INDEX_DUCKDB);
|
|
42
|
+
const db = await openDb(dbPath);
|
|
43
|
+
|
|
44
|
+
const mcpx = await maybeMcpx(config);
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
config,
|
|
48
|
+
dataDir,
|
|
49
|
+
configPath,
|
|
50
|
+
db,
|
|
51
|
+
logger,
|
|
52
|
+
progress: createProgress(),
|
|
53
|
+
mcpx,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function maybeMcpx(config: MembotConfig): Promise<McpxClient | null> {
|
|
58
|
+
const configDir = config.mcpx.config_path || process.env[ENV.MCPX_CONFIG_PATH];
|
|
59
|
+
try {
|
|
60
|
+
const client = new McpxClient(configDir ? { configDir } : {});
|
|
61
|
+
return client;
|
|
62
|
+
} catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export async function closeContext(ctx: AppContext): Promise<void> {
|
|
68
|
+
try {
|
|
69
|
+
await ctx.db.close();
|
|
70
|
+
} catch {
|
|
71
|
+
// best effort
|
|
72
|
+
}
|
|
73
|
+
if (ctx.mcpx) {
|
|
74
|
+
try {
|
|
75
|
+
await ctx.mcpx.close();
|
|
76
|
+
} catch {
|
|
77
|
+
// best effort
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
package/src/db/blobs.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { DbConnection } from "./connection.ts";
|
|
2
|
+
|
|
3
|
+
export interface BlobRow {
|
|
4
|
+
sha256: string;
|
|
5
|
+
mime_type: string;
|
|
6
|
+
size_bytes: number;
|
|
7
|
+
bytes: Uint8Array;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Insert a content-addressed blob, doing nothing when the sha256 already
|
|
12
|
+
* exists. Uses an explicit `?::BLOB` cast because DuckDB can't infer the
|
|
13
|
+
* column type from a JS Uint8Array on its own.
|
|
14
|
+
*/
|
|
15
|
+
export async function upsertBlob(db: DbConnection, blob: BlobRow): Promise<void> {
|
|
16
|
+
await db.queryRun(
|
|
17
|
+
`INSERT INTO blobs (sha256, mime_type, size_bytes, bytes)
|
|
18
|
+
VALUES (?1, ?2, ?3, ?4::BLOB)
|
|
19
|
+
ON CONFLICT (sha256) DO NOTHING`,
|
|
20
|
+
blob.sha256,
|
|
21
|
+
blob.mime_type,
|
|
22
|
+
blob.size_bytes,
|
|
23
|
+
blob.bytes,
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Fetch a blob by sha256, or null. Used when serving `membot_read bytes=true`. */
|
|
28
|
+
export async function readBlob(db: DbConnection, sha256: string): Promise<BlobRow | null> {
|
|
29
|
+
const row = await db.queryGet<{
|
|
30
|
+
sha256: string;
|
|
31
|
+
mime_type: string;
|
|
32
|
+
size_bytes: number;
|
|
33
|
+
bytes: Uint8Array;
|
|
34
|
+
}>(`SELECT sha256, mime_type, size_bytes, bytes FROM blobs WHERE sha256 = ?1`, sha256);
|
|
35
|
+
if (!row) return null;
|
|
36
|
+
return {
|
|
37
|
+
sha256: row.sha256,
|
|
38
|
+
mime_type: row.mime_type,
|
|
39
|
+
size_bytes: Number(row.size_bytes),
|
|
40
|
+
bytes: row.bytes,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Drop blobs whose sha256 isn't referenced by any non-tombstone file row. */
|
|
45
|
+
export async function gcOrphanBlobs(db: DbConnection): Promise<{ removed: number }> {
|
|
46
|
+
const result = await db.queryRun(
|
|
47
|
+
`DELETE FROM blobs
|
|
48
|
+
WHERE sha256 NOT IN (
|
|
49
|
+
SELECT DISTINCT blob_sha256 FROM files WHERE blob_sha256 IS NOT NULL
|
|
50
|
+
)`,
|
|
51
|
+
);
|
|
52
|
+
return { removed: result.changes };
|
|
53
|
+
}
|