membot 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.claude/skills/membot.md +137 -0
  2. package/.cursor/rules/membot.mdc +137 -0
  3. package/README.md +131 -0
  4. package/package.json +83 -24
  5. package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
  6. package/scripts/apply-transformers-patch.sh +35 -0
  7. package/src/cli.ts +72 -0
  8. package/src/commands/check-update.ts +69 -0
  9. package/src/commands/mcpx.ts +112 -0
  10. package/src/commands/reindex.ts +53 -0
  11. package/src/commands/serve.ts +58 -0
  12. package/src/commands/skill.ts +131 -0
  13. package/src/commands/upgrade.ts +220 -0
  14. package/src/config/loader.ts +100 -0
  15. package/src/config/schemas.ts +39 -0
  16. package/src/constants.ts +42 -0
  17. package/src/context.ts +80 -0
  18. package/src/db/blobs.ts +53 -0
  19. package/src/db/chunks.ts +176 -0
  20. package/src/db/connection.ts +173 -0
  21. package/src/db/files.ts +325 -0
  22. package/src/db/migrations/001-init.ts +63 -0
  23. package/src/db/migrations/002-fts.ts +12 -0
  24. package/src/db/migrations.ts +45 -0
  25. package/src/errors.ts +87 -0
  26. package/src/ingest/chunker.ts +117 -0
  27. package/src/ingest/converter/docx.ts +15 -0
  28. package/src/ingest/converter/html.ts +20 -0
  29. package/src/ingest/converter/image.ts +71 -0
  30. package/src/ingest/converter/index.ts +119 -0
  31. package/src/ingest/converter/llm.ts +66 -0
  32. package/src/ingest/converter/ocr.ts +51 -0
  33. package/src/ingest/converter/pdf.ts +38 -0
  34. package/src/ingest/converter/text.ts +8 -0
  35. package/src/ingest/describer.ts +72 -0
  36. package/src/ingest/embedder.ts +98 -0
  37. package/src/ingest/fetcher.ts +280 -0
  38. package/src/ingest/ingest.ts +444 -0
  39. package/src/ingest/local-reader.ts +64 -0
  40. package/src/ingest/search-text.ts +18 -0
  41. package/src/ingest/source-resolver.ts +186 -0
  42. package/src/mcp/instructions.ts +34 -0
  43. package/src/mcp/server.ts +101 -0
  44. package/src/mount/commander.ts +174 -0
  45. package/src/mount/mcp.ts +111 -0
  46. package/src/mount/zod-to-cli.ts +158 -0
  47. package/src/operations/add.ts +69 -0
  48. package/src/operations/diff.ts +105 -0
  49. package/src/operations/index.ts +38 -0
  50. package/src/operations/info.ts +95 -0
  51. package/src/operations/list.ts +87 -0
  52. package/src/operations/move.ts +83 -0
  53. package/src/operations/prune.ts +80 -0
  54. package/src/operations/read.ts +102 -0
  55. package/src/operations/refresh.ts +72 -0
  56. package/src/operations/remove.ts +35 -0
  57. package/src/operations/search.ts +72 -0
  58. package/src/operations/tree.ts +103 -0
  59. package/src/operations/types.ts +81 -0
  60. package/src/operations/versions.ts +78 -0
  61. package/src/operations/write.ts +77 -0
  62. package/src/output/formatter.ts +68 -0
  63. package/src/output/logger.ts +114 -0
  64. package/src/output/progress.ts +78 -0
  65. package/src/output/tty.ts +91 -0
  66. package/src/refresh/runner.ts +296 -0
  67. package/src/refresh/scheduler.ts +54 -0
  68. package/src/sdk.ts +27 -0
  69. package/src/search/hybrid.ts +100 -0
  70. package/src/search/keyword.ts +62 -0
  71. package/src/search/semantic.ts +56 -0
  72. package/src/types/text-modules.d.ts +9 -0
  73. package/src/update/background.ts +73 -0
  74. package/src/update/cache.ts +40 -0
  75. package/src/update/checker.ts +117 -0
  76. package/.claude/settings.local.json +0 -7
  77. package/CLAUDE.md +0 -139
  78. package/docs/plan.md +0 -905
@@ -0,0 +1,78 @@
1
+ import { logger } from "./logger.ts";
2
+ import { isSilent, useSpinner } from "./tty.ts";
3
+
4
+ /**
5
+ * Minimal progress reporter for multi-entry operations (directory/glob ingest,
6
+ * batch refresh). Operations call `start(total)`, then `tick(label)` for each
7
+ * entry, then `done(summary)`.
8
+ *
9
+ * Interactive: replaces a single spinner line as work happens.
10
+ * Non-interactive: emits `info` lines per entry.
11
+ */
12
+ export interface Progress {
13
+ start(total: number, label?: string): void;
14
+ tick(label: string): void;
15
+ done(summary?: string): void;
16
+ fail(summary?: string): void;
17
+ info(msg: string): void;
18
+ }
19
+
20
+ /**
21
+ * Build a `Progress` reporter whose mode is decided once, at call time, from
22
+ * the current TTY state. Use one per multi-entry operation.
23
+ */
24
+ export function createProgress(): Progress {
25
+ let total = 0;
26
+ let count = 0;
27
+ let spinner: ReturnType<typeof logger.startSpinner> | null = null;
28
+
29
+ const interactive = useSpinner();
30
+ const silent = isSilent();
31
+
32
+ return {
33
+ start(t: number, label?: string) {
34
+ total = t;
35
+ count = 0;
36
+ if (silent) return;
37
+ if (interactive) {
38
+ spinner = logger.startSpinner(label ? `${label} (0/${total})` : `0/${total}`);
39
+ } else if (label) {
40
+ logger.info(label);
41
+ }
42
+ },
43
+ tick(label: string) {
44
+ count += 1;
45
+ if (silent) return;
46
+ if (interactive && spinner) {
47
+ spinner.update(`${count}/${total} — ${label}`);
48
+ } else {
49
+ logger.info(`[${count}/${total}] ${label}`);
50
+ }
51
+ },
52
+ done(summary?: string) {
53
+ if (silent) return;
54
+ if (interactive && spinner) {
55
+ spinner.success(summary ?? `${count}/${total} done`);
56
+ spinner = null;
57
+ } else if (summary) {
58
+ logger.info(summary);
59
+ }
60
+ },
61
+ fail(summary?: string) {
62
+ if (silent) {
63
+ if (summary) logger.warn(summary);
64
+ return;
65
+ }
66
+ if (interactive && spinner) {
67
+ spinner.error(summary ?? `failed at ${count}/${total}`);
68
+ spinner = null;
69
+ } else if (summary) {
70
+ logger.warn(summary);
71
+ }
72
+ },
73
+ info(msg: string) {
74
+ if (silent) return;
75
+ logger.info(msg);
76
+ },
77
+ };
78
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Single source of truth for whether the CLI is running interactively. All
3
+ * spinner / color / progress decisions go through these helpers — operations
4
+ * never inspect process.stdout themselves.
5
+ *
6
+ * Mode resolution (read once at startup, then frozen via setMode):
7
+ * stdout.isTTY && stderr.isTTY && !json → interactive
8
+ * anything else → non-interactive
9
+ * CI=true → forces non-interactive
10
+ * --no-color or NO_COLOR → disables ANSI even if interactive
11
+ * FORCE_COLOR → forces ANSI on regardless
12
+ * CI=true OR NODE_ENV=test OR → silent (suppresses advisory
13
+ * MEMBOT_SILENT=1 info / progress lines)
14
+ * --verbose → overrides silent
15
+ */
16
+
17
+ export interface OutputMode {
18
+ interactive: boolean;
19
+ color: boolean;
20
+ json: boolean;
21
+ verbose: boolean;
22
+ silent: boolean;
23
+ }
24
+
25
+ let mode: OutputMode | null = null;
26
+
27
+ export interface DetectModeOptions {
28
+ json?: boolean;
29
+ noColor?: boolean;
30
+ forceColor?: boolean;
31
+ verbose?: boolean;
32
+ }
33
+
34
+ /** Compute the active output mode from env + flags. Idempotent. */
35
+ export function detectMode(opts: DetectModeOptions = {}): OutputMode {
36
+ const json = !!opts.json;
37
+ const verbose = !!opts.verbose;
38
+ const stdoutTty = !!(process.stdout.isTTY ?? false);
39
+ const stderrTty = !!(process.stderr.isTTY ?? false);
40
+ const ci = process.env.CI === "true" || process.env.CI === "1";
41
+
42
+ const interactive = !json && !ci && stdoutTty && stderrTty;
43
+
44
+ const noColorEnv = !!process.env.NO_COLOR;
45
+ const forceColor = !!opts.forceColor || !!process.env.FORCE_COLOR;
46
+ const noColorFlag = !!opts.noColor;
47
+
48
+ let color: boolean;
49
+ if (forceColor) color = true;
50
+ else if (noColorFlag || noColorEnv || json) color = false;
51
+ else color = stderrTty; // colors target stderr (logs) and stdout (formatted output)
52
+
53
+ const testEnv = process.env.NODE_ENV === "test";
54
+ const explicitSilent = process.env.MEMBOT_SILENT === "1" || process.env.MEMBOT_SILENT === "true";
55
+ const silent = !verbose && !json && (ci || testEnv || explicitSilent);
56
+
57
+ return { interactive, color, json, verbose, silent };
58
+ }
59
+
60
+ export function setMode(m: OutputMode): void {
61
+ mode = m;
62
+ }
63
+
64
+ export function getMode(): OutputMode {
65
+ if (!mode) mode = detectMode();
66
+ return mode;
67
+ }
68
+
69
+ export function isInteractive(): boolean {
70
+ return getMode().interactive;
71
+ }
72
+
73
+ export function useColor(): boolean {
74
+ return getMode().color;
75
+ }
76
+
77
+ export function useSpinner(): boolean {
78
+ return getMode().interactive && !getMode().verbose;
79
+ }
80
+
81
+ export function isJson(): boolean {
82
+ return getMode().json;
83
+ }
84
+
85
+ export function isVerbose(): boolean {
86
+ return getMode().verbose;
87
+ }
88
+
89
+ export function isSilent(): boolean {
90
+ return getMode().silent;
91
+ }
@@ -0,0 +1,296 @@
1
+ import type { McpxClient } from "@evantahler/mcpx";
2
+ import type { AppContext } from "../context.ts";
3
+ import { upsertBlob } from "../db/blobs.ts";
4
+ import { insertChunksForVersion, rebuildFts } from "../db/chunks.ts";
5
+ import { getCurrent, insertVersion, millisIso, updateRefreshStatus } from "../db/files.ts";
6
+ import { HelpfulError } from "../errors.ts";
7
+ import { chunkDeterministic } from "../ingest/chunker.ts";
8
+ import { convert } from "../ingest/converter/index.ts";
9
+ import { describe } from "../ingest/describer.ts";
10
+ import { embed } from "../ingest/embedder.ts";
11
+ import { fetchRemote } from "../ingest/fetcher.ts";
12
+ import { mimeFromPath, readLocalFile, sha256Hex } from "../ingest/local-reader.ts";
13
+ import { buildSearchText } from "../ingest/search-text.ts";
14
+
15
+ export interface RefreshOutcome {
16
+ logical_path: string;
17
+ status: "ok" | "unchanged" | "failed";
18
+ new_version_id?: string;
19
+ error?: string;
20
+ }
21
+
22
+ /**
23
+ * Refresh one logical_path. Re-reads its source (local stat+sha or remote
24
+ * via the persisted mcpx invocation), and creates a new version only if
25
+ * the source bytes changed. Always updates `refreshed_at` and
26
+ * `last_refresh_status` on the row. Returns a per-path outcome — never
27
+ * throws unless the path doesn't exist.
28
+ */
29
+ export async function refreshOne(ctx: AppContext, logicalPath: string, force = false): Promise<RefreshOutcome> {
30
+ const cur = await getCurrent(ctx.db, logicalPath);
31
+ if (!cur) {
32
+ throw new HelpfulError({
33
+ kind: "not_found",
34
+ message: `no current version for ${logicalPath}`,
35
+ hint: `Run \`membot ls\` to see available paths, or ingest with \`membot add\`.`,
36
+ });
37
+ }
38
+
39
+ if (cur.source_type === "inline") {
40
+ return { logical_path: logicalPath, status: "unchanged" };
41
+ }
42
+
43
+ try {
44
+ if (cur.source_type === "local") {
45
+ return await refreshLocal(ctx, cur, force);
46
+ }
47
+ if (cur.source_type === "remote") {
48
+ return await refreshRemote(ctx, cur, force);
49
+ }
50
+ } catch (err) {
51
+ const message = err instanceof Error ? err.message : String(err);
52
+ await updateRefreshStatus(ctx.db, logicalPath, cur.version_id, {
53
+ refreshed_at: new Date().toISOString(),
54
+ last_refresh_status: `failed:${message}`,
55
+ });
56
+ return { logical_path: logicalPath, status: "failed", error: message };
57
+ }
58
+ return { logical_path: logicalPath, status: "unchanged" };
59
+ }
60
+
61
+ interface CurrentRow {
62
+ logical_path: string;
63
+ version_id: string;
64
+ source_type: string;
65
+ source_path: string | null;
66
+ source_mtime_ms: number | null;
67
+ source_sha256: string | null;
68
+ mime_type: string | null;
69
+ fetcher: string | null;
70
+ fetcher_server: string | null;
71
+ fetcher_tool: string | null;
72
+ fetcher_args: Record<string, unknown> | null;
73
+ refresh_frequency_sec: number | null;
74
+ }
75
+
76
+ /** Local-file refresh: stat-then-sha gate before re-running the pipeline. */
77
+ async function refreshLocal(ctx: AppContext, cur: CurrentRow, force: boolean): Promise<RefreshOutcome> {
78
+ if (!cur.source_path) {
79
+ throw new HelpfulError({
80
+ kind: "input_error",
81
+ message: `local row ${cur.logical_path} has no source_path`,
82
+ hint: "This row likely came from an inline write. Re-ingest with `membot add` if you want refreshing.",
83
+ });
84
+ }
85
+ const local = await readLocalFile(cur.source_path);
86
+
87
+ if (!force && cur.source_sha256 === local.sha256) {
88
+ await updateRefreshStatus(ctx.db, cur.logical_path, cur.version_id, {
89
+ refreshed_at: new Date().toISOString(),
90
+ last_refresh_status: "unchanged",
91
+ });
92
+ return { logical_path: cur.logical_path, status: "unchanged" };
93
+ }
94
+
95
+ const versionId = await runPipelineForRefresh(ctx, {
96
+ logicalPath: cur.logical_path,
97
+ bytes: local.bytes,
98
+ mime: local.mimeType,
99
+ source: cur.source_path,
100
+ sourceType: "local",
101
+ sourcePath: cur.source_path,
102
+ sourceMtimeMs: local.mtimeMs,
103
+ sourceSha: local.sha256,
104
+ fetcher: "local",
105
+ fetcherServer: null,
106
+ fetcherTool: null,
107
+ fetcherArgs: null,
108
+ refreshSec: cur.refresh_frequency_sec,
109
+ });
110
+ return { logical_path: cur.logical_path, status: "ok", new_version_id: versionId };
111
+ }
112
+
113
+ /** Remote refresh: replay the persisted mcpx invocation, or plain HTTP. */
114
+ async function refreshRemote(ctx: AppContext, cur: CurrentRow, force: boolean): Promise<RefreshOutcome> {
115
+ if (!cur.source_path) {
116
+ throw new HelpfulError({
117
+ kind: "input_error",
118
+ message: `remote row ${cur.logical_path} has no source_path`,
119
+ hint: "Inspect with `membot info` and consider re-ingesting.",
120
+ });
121
+ }
122
+ const fetched = await replayFetch(cur, ctx.mcpx);
123
+
124
+ if (!force && cur.source_sha256 === fetched.sha256) {
125
+ await updateRefreshStatus(ctx.db, cur.logical_path, cur.version_id, {
126
+ refreshed_at: new Date().toISOString(),
127
+ last_refresh_status: "unchanged",
128
+ });
129
+ return { logical_path: cur.logical_path, status: "unchanged" };
130
+ }
131
+
132
+ const versionId = await runPipelineForRefresh(ctx, {
133
+ logicalPath: cur.logical_path,
134
+ bytes: fetched.bytes,
135
+ mime: fetched.mimeType,
136
+ source: cur.source_path,
137
+ sourceType: "remote",
138
+ sourcePath: cur.source_path,
139
+ sourceMtimeMs: null,
140
+ sourceSha: fetched.sha256,
141
+ fetcher: cur.fetcher === "mcpx" ? "mcpx" : "http",
142
+ fetcherServer: fetched.fetcherServer,
143
+ fetcherTool: fetched.fetcherTool,
144
+ fetcherArgs: fetched.fetcherArgs,
145
+ refreshSec: cur.refresh_frequency_sec,
146
+ });
147
+ return { logical_path: cur.logical_path, status: "ok", new_version_id: versionId };
148
+ }
149
+
150
+ /**
151
+ * Re-fetch a remote source. When the row recorded an mcpx invocation,
152
+ * call it directly with the same args (no agent re-routing); otherwise
153
+ * fall back to plain HTTP. The choice is deterministic — same row always
154
+ * produces the same fetch path.
155
+ */
156
+ async function replayFetch(
157
+ cur: CurrentRow,
158
+ mcpx: McpxClient | null,
159
+ ): Promise<{
160
+ bytes: Uint8Array;
161
+ sha256: string;
162
+ mimeType: string;
163
+ fetcherServer: string | null;
164
+ fetcherTool: string | null;
165
+ fetcherArgs: Record<string, unknown> | null;
166
+ }> {
167
+ if (cur.fetcher === "mcpx" && cur.fetcher_server && cur.fetcher_tool && mcpx) {
168
+ const args = cur.fetcher_args ?? {};
169
+ const result = await mcpx.exec(cur.fetcher_server, cur.fetcher_tool, args);
170
+ const text = extractText(result);
171
+ const bytes = new TextEncoder().encode(text);
172
+ return {
173
+ bytes,
174
+ sha256: sha256Hex(bytes),
175
+ mimeType: "text/markdown",
176
+ fetcherServer: cur.fetcher_server,
177
+ fetcherTool: cur.fetcher_tool,
178
+ fetcherArgs: args,
179
+ };
180
+ }
181
+ const r = await fetchRemote(cur.source_path ?? "", { hint: "http" });
182
+ return {
183
+ bytes: r.bytes,
184
+ sha256: r.sha256,
185
+ mimeType: r.mimeType,
186
+ fetcherServer: null,
187
+ fetcherTool: null,
188
+ fetcherArgs: null,
189
+ };
190
+ }
191
+
192
+ /** Pull a string out of whatever shape an mcpx tool happens to return. */
193
+ function extractText(result: unknown): string {
194
+ if (typeof result === "string") return result;
195
+ if (result && typeof result === "object") {
196
+ const r = result as Record<string, unknown>;
197
+ if (typeof r.text === "string") return r.text;
198
+ if (typeof r.content === "string") return r.content;
199
+ if (typeof r.markdown === "string") return r.markdown;
200
+ if (Array.isArray(r.content)) {
201
+ const out: string[] = [];
202
+ for (const c of r.content) {
203
+ if (c && typeof c === "object") {
204
+ const inner = c as Record<string, unknown>;
205
+ if (typeof inner.text === "string") out.push(inner.text);
206
+ }
207
+ }
208
+ if (out.length > 0) return out.join("\n\n");
209
+ }
210
+ }
211
+ try {
212
+ return JSON.stringify(result);
213
+ } catch {
214
+ return "";
215
+ }
216
+ }
217
+
218
+ interface PipelineParams {
219
+ logicalPath: string;
220
+ bytes: Uint8Array;
221
+ mime: string;
222
+ source: string;
223
+ sourceType: "local" | "remote";
224
+ sourcePath: string | null;
225
+ sourceMtimeMs: number | null;
226
+ sourceSha: string;
227
+ fetcher: "local" | "http" | "mcpx";
228
+ fetcherServer: string | null;
229
+ fetcherTool: string | null;
230
+ fetcherArgs: Record<string, unknown> | null;
231
+ refreshSec: number | null;
232
+ }
233
+
234
+ /**
235
+ * Re-run convert → describe → chunk → embed and write a fresh version
236
+ * row. Mirrors `ingest.ts`'s pipeline; kept separate so refresh-specific
237
+ * fields (`change_note='refresh: source updated'`) aren't accidentally
238
+ * applied to first-time ingests.
239
+ */
240
+ async function runPipelineForRefresh(ctx: AppContext, p: PipelineParams): Promise<string> {
241
+ await upsertBlob(ctx.db, {
242
+ sha256: p.sourceSha,
243
+ mime_type: p.mime,
244
+ size_bytes: p.bytes.byteLength,
245
+ bytes: p.bytes,
246
+ });
247
+
248
+ const conversion = await convert(p.bytes, p.mime, p.source, ctx.config.llm);
249
+ const markdown = conversion.markdown;
250
+ const description = await describe(p.logicalPath, p.mime, markdown, ctx.config.llm);
251
+ const chunks = chunkDeterministic(markdown, ctx.config.chunker);
252
+ const searchTexts = chunks.map((c) => buildSearchText(p.logicalPath, description, c.content));
253
+ const embeddings = await embed(searchTexts, ctx.config.embedding_model);
254
+
255
+ const versionId = millisIso(Date.now());
256
+ const contentSha = sha256Hex(new TextEncoder().encode(markdown));
257
+ await insertVersion(ctx.db, {
258
+ logical_path: p.logicalPath,
259
+ version_id: versionId,
260
+ source_type: p.sourceType,
261
+ source_path: p.sourcePath,
262
+ source_mtime_ms: p.sourceMtimeMs,
263
+ source_sha256: p.sourceSha,
264
+ blob_sha256: p.sourceSha,
265
+ content_sha256: contentSha,
266
+ content: markdown,
267
+ description,
268
+ mime_type: p.mime,
269
+ size_bytes: p.bytes.byteLength,
270
+ fetcher: p.fetcher,
271
+ fetcher_server: p.fetcherServer,
272
+ fetcher_tool: p.fetcherTool,
273
+ fetcher_args: p.fetcherArgs,
274
+ refresh_frequency_sec: p.refreshSec,
275
+ refreshed_at: new Date().toISOString(),
276
+ last_refresh_status: "ok",
277
+ change_note: "refresh: source updated",
278
+ });
279
+
280
+ await insertChunksForVersion(
281
+ ctx.db,
282
+ p.logicalPath,
283
+ versionId,
284
+ chunks.map((c, i) => ({
285
+ chunk_index: c.index,
286
+ chunk_content: c.content,
287
+ search_text: searchTexts[i] ?? buildSearchText(p.logicalPath, description, c.content),
288
+ embedding: embeddings[i] ?? new Array(embeddings[0]?.length ?? 0).fill(0),
289
+ })),
290
+ );
291
+
292
+ await rebuildFts(ctx.db);
293
+ return versionId;
294
+ }
295
+
296
+ export { mimeFromPath };
@@ -0,0 +1,54 @@
1
+ import type { AppContext } from "../context.ts";
2
+ import { listDueRefreshes } from "../db/files.ts";
3
+ import { logger } from "../output/logger.ts";
4
+ import { type RefreshOutcome, refreshOne } from "./runner.ts";
5
+
6
+ /**
7
+ * One scheduler tick: refresh every row whose `refresh_frequency_sec` has
8
+ * elapsed since `refreshed_at`. Errors on individual rows are logged and
9
+ * the loop continues so one bad source doesn't halt the daemon.
10
+ */
11
+ export async function runDueRefreshes(ctx: AppContext): Promise<RefreshOutcome[]> {
12
+ const due = await listDueRefreshes(ctx.db);
13
+ const out: RefreshOutcome[] = [];
14
+ for (const row of due) {
15
+ try {
16
+ const r = await refreshOne(ctx, row.logical_path);
17
+ out.push(r);
18
+ if (r.status === "ok") logger.info(`refresh: ${row.logical_path} → new version ${r.new_version_id}`);
19
+ } catch (err) {
20
+ const msg = err instanceof Error ? err.message : String(err);
21
+ logger.warn(`refresh: ${row.logical_path} failed (${msg})`);
22
+ out.push({ logical_path: row.logical_path, status: "failed", error: msg });
23
+ }
24
+ }
25
+ return out;
26
+ }
27
+
28
+ /**
29
+ * Long-running daemon loop. Calls `runDueRefreshes` every `tick_interval_sec`
30
+ * (from config). Returns a stop function the caller can use to terminate
31
+ * the daemon (e.g. on SIGINT).
32
+ */
33
+ export function startDaemon(ctx: AppContext, tickSec: number): () => void {
34
+ const intervalMs = Math.max(1, tickSec) * 1000;
35
+ let stopped = false;
36
+
37
+ const loop = async () => {
38
+ if (stopped) return;
39
+ try {
40
+ await runDueRefreshes(ctx);
41
+ } catch (err) {
42
+ logger.warn(`daemon: tick failed (${err instanceof Error ? err.message : String(err)})`);
43
+ }
44
+ if (!stopped) setTimeout(loop, intervalMs);
45
+ };
46
+
47
+ logger.info(`daemon: started, tick interval ${tickSec}s`);
48
+ setTimeout(loop, intervalMs);
49
+
50
+ return () => {
51
+ stopped = true;
52
+ logger.info("daemon: stopping");
53
+ };
54
+ }
package/src/sdk.ts ADDED
@@ -0,0 +1,27 @@
1
+ // SDK entrypoint for embedding membot in other apps. Re-exports the core
2
+ // surfaces — context, errors, operations, search, ingest, refresh — so
3
+ // callers don't need to depend on internal file paths.
4
+
5
+ export { loadConfig, saveConfig } from "./config/loader.ts";
6
+ export type { ChunkerConfig, LlmConfig, MembotConfig } from "./config/schemas.ts";
7
+ export { defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "./constants.ts";
8
+ export type { AppContext, BuildContextOptions } from "./context.ts";
9
+ export { buildContext, closeContext } from "./context.ts";
10
+ export type { ErrorKind, HelpfulErrorArgs } from "./errors.ts";
11
+ export { asHelpful, HelpfulError, isHelpfulError, mapKindToExit } from "./errors.ts";
12
+ export type { Chunk } from "./ingest/chunker.ts";
13
+ export { chunkDeterministic } from "./ingest/chunker.ts";
14
+ export { embed, embedSingle } from "./ingest/embedder.ts";
15
+ export type { FetchedRemote, FetchOptions } from "./ingest/fetcher.ts";
16
+ export { fetchRemote } from "./ingest/fetcher.ts";
17
+ export type { IngestEntryResult, IngestInput, IngestResult } from "./ingest/ingest.ts";
18
+ export { ingest } from "./ingest/ingest.ts";
19
+ export { buildMcpServer, startHttpServer, startStdioServer } from "./mcp/server.ts";
20
+ export { OPERATIONS } from "./operations/index.ts";
21
+ export type { CliMetadata, Operation } from "./operations/types.ts";
22
+ export { composeDescription, defaultCliName, defineOperation } from "./operations/types.ts";
23
+ export { refreshOne } from "./refresh/runner.ts";
24
+ export { runDueRefreshes, startDaemon } from "./refresh/scheduler.ts";
25
+ export { fuseRRF } from "./search/hybrid.ts";
26
+ export { searchKeyword } from "./search/keyword.ts";
27
+ export { searchSemantic } from "./search/semantic.ts";
@@ -0,0 +1,100 @@
1
+ import type { KeywordHit } from "./keyword.ts";
2
+ import type { SemanticHit } from "./semantic.ts";
3
+
4
+ export interface FusedHit {
5
+ logical_path: string;
6
+ version_id: string;
7
+ chunk_index: number;
8
+ snippet: string;
9
+ score: number;
10
+ semantic_score: number | null;
11
+ keyword_score: number | null;
12
+ }
13
+
14
+ const SNIPPET_MAX = 300;
15
+
16
+ /**
17
+ * Reciprocal-rank fusion of semantic and keyword hit lists. Each result is
18
+ * keyed by `(logical_path, version_id, chunk_index)` so the same chunk
19
+ * appearing in both lists gets one fused score = sum of its RRF scores.
20
+ */
21
+ export function fuseRRF(
22
+ semantic: SemanticHit[],
23
+ keyword: KeywordHit[],
24
+ options: { k?: number; limit: number },
25
+ ): FusedHit[] {
26
+ const k = options.k ?? 60;
27
+ const merged = new Map<
28
+ string,
29
+ {
30
+ logical_path: string;
31
+ version_id: string;
32
+ chunk_index: number;
33
+ snippet: string;
34
+ rrf: number;
35
+ semantic_score: number | null;
36
+ keyword_score: number | null;
37
+ }
38
+ >();
39
+
40
+ const keyOf = (lp: string, v: string, ci: number) => `${lp}::${v}::${ci}`;
41
+
42
+ for (let i = 0; i < semantic.length; i++) {
43
+ const hit = semantic[i];
44
+ if (!hit) continue;
45
+ const key = keyOf(hit.logical_path, hit.version_id, hit.chunk_index);
46
+ const rrf = 1 / (k + i + 1);
47
+ const existing = merged.get(key);
48
+ if (existing) {
49
+ existing.rrf += rrf;
50
+ existing.semantic_score = round(hit.score);
51
+ } else {
52
+ merged.set(key, {
53
+ logical_path: hit.logical_path,
54
+ version_id: hit.version_id,
55
+ chunk_index: hit.chunk_index,
56
+ snippet: hit.chunk_content.slice(0, SNIPPET_MAX),
57
+ rrf,
58
+ semantic_score: round(hit.score),
59
+ keyword_score: null,
60
+ });
61
+ }
62
+ }
63
+
64
+ for (let i = 0; i < keyword.length; i++) {
65
+ const hit = keyword[i];
66
+ if (!hit) continue;
67
+ const key = keyOf(hit.logical_path, hit.version_id, hit.chunk_index);
68
+ const rrf = 1 / (k + i + 1);
69
+ const existing = merged.get(key);
70
+ if (existing) {
71
+ existing.rrf += rrf;
72
+ existing.keyword_score = round(hit.score);
73
+ } else {
74
+ merged.set(key, {
75
+ logical_path: hit.logical_path,
76
+ version_id: hit.version_id,
77
+ chunk_index: hit.chunk_index,
78
+ snippet: hit.chunk_content.slice(0, SNIPPET_MAX),
79
+ rrf,
80
+ semantic_score: null,
81
+ keyword_score: round(hit.score),
82
+ });
83
+ }
84
+ }
85
+
86
+ const all = [...merged.values()].sort((a, b) => b.rrf - a.rrf).slice(0, options.limit);
87
+ return all.map((h) => ({
88
+ logical_path: h.logical_path,
89
+ version_id: h.version_id,
90
+ chunk_index: h.chunk_index,
91
+ snippet: h.snippet,
92
+ score: round(h.rrf),
93
+ semantic_score: h.semantic_score,
94
+ keyword_score: h.keyword_score,
95
+ }));
96
+ }
97
+
98
+ function round(n: number): number {
99
+ return Math.round(n * 10000) / 10000;
100
+ }