membot 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +3 -0
- package/.cursor/rules/membot.mdc +3 -0
- package/README.md +7 -0
- package/package.json +1 -1
- package/src/cli.ts +11 -0
- package/src/config/schemas.ts +33 -0
- package/src/constants.ts +23 -0
- package/src/context.ts +24 -0
- package/src/ingest/concurrency.ts +60 -0
- package/src/ingest/describer.ts +49 -3
- package/src/ingest/embed-worker.ts +74 -0
- package/src/ingest/embedder-pool.ts +391 -0
- package/src/ingest/embedder.ts +40 -2
- package/src/ingest/ingest.ts +277 -67
- package/src/operations/add.ts +139 -99
- package/src/operations/index.ts +2 -0
- package/src/operations/refresh.ts +61 -34
- package/src/operations/stats.ts +342 -0
- package/src/operations/write.ts +48 -40
- package/src/output/formatter.ts +21 -0
- package/src/output/logger.ts +36 -0
- package/src/output/progress.ts +408 -46
- package/src/refresh/scheduler.ts +22 -13
package/src/operations/add.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { resolveEmbeddingWorkers } from "../context.ts";
|
|
3
|
+
import { withEmbedderPool } from "../ingest/embedder-pool.ts";
|
|
2
4
|
import {
|
|
3
5
|
countResolvedEntries,
|
|
4
6
|
type IngestCallbacks,
|
|
@@ -7,7 +9,9 @@ import {
|
|
|
7
9
|
ingestResolved,
|
|
8
10
|
} from "../ingest/ingest.ts";
|
|
9
11
|
import { type ResolvedSource, resolveSource } from "../ingest/source-resolver.ts";
|
|
10
|
-
import { colors } from "../output/formatter.ts";
|
|
12
|
+
import { colors, formatBytes } from "../output/formatter.ts";
|
|
13
|
+
import { pieFor } from "../output/progress.ts";
|
|
14
|
+
import { isInteractive } from "../output/tty.ts";
|
|
11
15
|
import { defineOperation } from "./types.ts";
|
|
12
16
|
|
|
13
17
|
const FetcherKindEnum = z.enum(["downloader", "local", "inline"]);
|
|
@@ -76,6 +80,7 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
|
|
|
76
80
|
error: z.string().optional(),
|
|
77
81
|
mime_type: z.string().nullable(),
|
|
78
82
|
size_bytes: z.number(),
|
|
83
|
+
chunk_count: z.number().nullable(),
|
|
79
84
|
fetcher: FetcherKindEnum,
|
|
80
85
|
source_sha256: z.string(),
|
|
81
86
|
}),
|
|
@@ -90,116 +95,145 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
|
|
|
90
95
|
aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m", force: "-f" },
|
|
91
96
|
},
|
|
92
97
|
console_formatter: (result) => {
|
|
93
|
-
const lines = result.ingested.map((e) => {
|
|
94
|
-
if (e.status === "ok") {
|
|
95
|
-
return `${colors.green("✓")} ${colors.cyan(e.logical_path)} ${colors.dim(`(${e.fetcher}, ${e.size_bytes}B)`)}`;
|
|
96
|
-
}
|
|
97
|
-
if (e.status === "unchanged") {
|
|
98
|
-
return `${colors.dim("≡")} ${colors.cyan(e.logical_path)} ${colors.dim("(unchanged)")}`;
|
|
99
|
-
}
|
|
100
|
-
return `${colors.red("✗")} ${e.source_path} ${colors.dim(e.error ?? "")}`;
|
|
101
|
-
});
|
|
102
98
|
const parts: string[] = [colors.green(`added ${result.ok}`)];
|
|
103
99
|
if (result.unchanged > 0) parts.push(colors.dim(`unchanged ${result.unchanged}`));
|
|
104
100
|
if (result.failed > 0) parts.push(colors.red(`failed ${result.failed}`));
|
|
105
|
-
|
|
101
|
+
const summary = parts.join(", ");
|
|
102
|
+
|
|
103
|
+
// In interactive mode, every entry was already streamed to stderr via
|
|
104
|
+
// progress.entry() during ingest; printing the same list to stdout
|
|
105
|
+
// here would just duplicate the scrollback. Non-interactive callers
|
|
106
|
+
// (JSON, piped stdout, CI) don't see the live stream, so they still
|
|
107
|
+
// get the full per-entry list as the operation's stdout payload.
|
|
108
|
+
if (isInteractive()) return summary;
|
|
109
|
+
|
|
110
|
+
const lines = result.ingested.map(formatEntryLine);
|
|
111
|
+
return `${lines.join("\n")}\n${summary}`;
|
|
106
112
|
},
|
|
107
113
|
handler: async (input, ctx) => {
|
|
108
|
-
|
|
109
|
-
|
|
114
|
+
// Spin up an ephemeral embedder pool for the whole `add` command —
|
|
115
|
+
// `withEmbedderPool` handles the workers=1 short-circuit and disposes
|
|
116
|
+
// the children when the closure returns (see embedder-pool.ts). Inside
|
|
117
|
+
// the closure, every embed() call from the ingest pipeline transparently
|
|
118
|
+
// fans out to the subprocess pool.
|
|
119
|
+
const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
|
|
120
|
+
return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
|
|
121
|
+
const { sources, ...rest } = input;
|
|
122
|
+
const followSymlinks = rest.follow_symlinks ?? true;
|
|
110
123
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
// Phase 1: resolve every source upfront so the shared progress bar
|
|
125
|
+
// knows its total. A resolve failure (bad path, glob with no base) is
|
|
126
|
+
// captured per-source so one bad arg doesn't abort the whole batch.
|
|
127
|
+
type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
|
|
128
|
+
const outcomes: ResolveOutcome[] = [];
|
|
129
|
+
for (const source of sources) {
|
|
130
|
+
try {
|
|
131
|
+
const resolved = await resolveSource(source, {
|
|
132
|
+
include: rest.include,
|
|
133
|
+
exclude: rest.exclude,
|
|
134
|
+
followSymlinks,
|
|
135
|
+
});
|
|
136
|
+
outcomes.push({ source, resolved });
|
|
137
|
+
} catch (err) {
|
|
138
|
+
outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
|
|
139
|
+
}
|
|
126
140
|
}
|
|
127
|
-
}
|
|
128
141
|
|
|
129
|
-
|
|
142
|
+
const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
|
|
130
143
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
144
|
+
const aggregated: IngestResult = {
|
|
145
|
+
ingested: [],
|
|
146
|
+
total: 0,
|
|
147
|
+
ok: 0,
|
|
148
|
+
unchanged: 0,
|
|
149
|
+
failed: 0,
|
|
150
|
+
};
|
|
138
151
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
152
|
+
ctx.progress.start(total, "ingest");
|
|
153
|
+
const callbacks: IngestCallbacks = {
|
|
154
|
+
// Counter advances on COMPLETION so concurrent prep doesn't race the
|
|
155
|
+
// bar to 100% before any file is fully persisted. The per-worker
|
|
156
|
+
// status section (one line per active worker) shows file + step in
|
|
157
|
+
// real time, prefixed with a pie glyph that fills as the per-file
|
|
158
|
+
// pipeline progresses. `setWorkers(n)` resizes the section whenever
|
|
159
|
+
// a new ingest source kicks off with its own pool size.
|
|
160
|
+
onWorkerCount: (n) => ctx.progress.setWorkers(n),
|
|
161
|
+
onEntryStart: (label, workerId) => {
|
|
162
|
+
if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(undefined)} ${label}`);
|
|
163
|
+
ctx.progress.setLabel(label);
|
|
164
|
+
},
|
|
165
|
+
onEntryComplete: (entry, workerId) => {
|
|
166
|
+
if (workerId !== undefined) ctx.progress.workerSet(workerId, "");
|
|
167
|
+
ctx.progress.tick(entry.logical_path);
|
|
168
|
+
ctx.progress.entry(formatEntryLine(entry));
|
|
169
|
+
},
|
|
170
|
+
onEntryProgress: (label, sublabel, workerId) => {
|
|
171
|
+
if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(sublabel)} ${label} — ${sublabel}`);
|
|
172
|
+
ctx.progress.update(sublabel);
|
|
173
|
+
},
|
|
174
|
+
onChunks: (n) => ctx.progress.addChunks(n),
|
|
175
|
+
};
|
|
145
176
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
177
|
+
for (const outcome of outcomes) {
|
|
178
|
+
if ("error" in outcome) {
|
|
179
|
+
const failed: IngestEntryResult = {
|
|
180
|
+
source_path: outcome.source,
|
|
181
|
+
logical_path: outcome.source,
|
|
182
|
+
version_id: null,
|
|
183
|
+
status: "failed",
|
|
184
|
+
error: outcome.error.message,
|
|
185
|
+
mime_type: null,
|
|
186
|
+
size_bytes: 0,
|
|
187
|
+
chunk_count: null,
|
|
188
|
+
fetcher: "local",
|
|
189
|
+
source_sha256: "",
|
|
190
|
+
};
|
|
191
|
+
callbacks.onEntryStart?.(outcome.source);
|
|
192
|
+
callbacks.onEntryComplete?.(failed);
|
|
193
|
+
aggregated.ingested.push(failed);
|
|
194
|
+
aggregated.total += 1;
|
|
195
|
+
aggregated.failed += 1;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
166
198
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
199
|
+
try {
|
|
200
|
+
const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
|
|
201
|
+
aggregated.ingested.push(...r.ingested);
|
|
202
|
+
aggregated.total += r.total;
|
|
203
|
+
aggregated.ok += r.ok;
|
|
204
|
+
aggregated.unchanged += r.unchanged;
|
|
205
|
+
aggregated.failed += r.failed;
|
|
206
|
+
} catch (err) {
|
|
207
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
208
|
+
const failed: IngestEntryResult = {
|
|
209
|
+
source_path: outcome.source,
|
|
210
|
+
logical_path: outcome.source,
|
|
211
|
+
version_id: null,
|
|
212
|
+
status: "failed",
|
|
213
|
+
error: message,
|
|
214
|
+
mime_type: null,
|
|
215
|
+
size_bytes: 0,
|
|
216
|
+
chunk_count: null,
|
|
217
|
+
fetcher: "local",
|
|
218
|
+
source_sha256: "",
|
|
219
|
+
};
|
|
220
|
+
callbacks.onEntryStart?.(outcome.source);
|
|
221
|
+
callbacks.onEntryComplete?.(failed);
|
|
222
|
+
aggregated.ingested.push(failed);
|
|
223
|
+
aggregated.total += 1;
|
|
224
|
+
aggregated.failed += 1;
|
|
225
|
+
} finally {
|
|
226
|
+
// Release the DB lock between sources so other consumers (a
|
|
227
|
+
// concurrent CLI call, the daemon, or a separate MCP server)
|
|
228
|
+
// can wedge in. The next source's first DB call reopens.
|
|
229
|
+
await ctx.db.release();
|
|
230
|
+
}
|
|
197
231
|
}
|
|
198
|
-
}
|
|
199
232
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
233
|
+
const summary = formatSummary(aggregated);
|
|
234
|
+
ctx.progress.done(summary);
|
|
235
|
+
return aggregated;
|
|
236
|
+
});
|
|
203
237
|
},
|
|
204
238
|
});
|
|
205
239
|
|
|
@@ -207,11 +241,17 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
|
|
|
207
241
|
* Render the persistent stderr line shown for one completed entry. Mirrors
|
|
208
242
|
* the glyphs used by the final `console_formatter` so users see the same
|
|
209
243
|
* status indicators twice (once during ingest on stderr, once in the final
|
|
210
|
-
* stdout summary).
|
|
244
|
+
* stdout summary). Successful entries show source kind, humanized byte
|
|
245
|
+
* size, and chunk count so the user can spot oddly small / oddly large
|
|
246
|
+
* files at a glance.
|
|
211
247
|
*/
|
|
212
248
|
function formatEntryLine(entry: IngestEntryResult): string {
|
|
213
249
|
if (entry.status === "ok") {
|
|
214
|
-
|
|
250
|
+
const parts: string[] = [entry.fetcher, formatBytes(entry.size_bytes)];
|
|
251
|
+
if (entry.chunk_count !== null) {
|
|
252
|
+
parts.push(`${entry.chunk_count} chunk${entry.chunk_count === 1 ? "" : "s"}`);
|
|
253
|
+
}
|
|
254
|
+
return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${parts.join(", ")})`)}`;
|
|
215
255
|
}
|
|
216
256
|
if (entry.status === "unchanged") {
|
|
217
257
|
return `${colors.dim("≡")} ${colors.cyan(entry.logical_path)} ${colors.dim("(unchanged)")}`;
|
package/src/operations/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { readOperation } from "./read.ts";
|
|
|
8
8
|
import { refreshOperation } from "./refresh.ts";
|
|
9
9
|
import { removeOperation } from "./remove.ts";
|
|
10
10
|
import { searchOperation } from "./search.ts";
|
|
11
|
+
import { statsOperation } from "./stats.ts";
|
|
11
12
|
import { treeOperation } from "./tree.ts";
|
|
12
13
|
import type { Operation } from "./types.ts";
|
|
13
14
|
import { versionsOperation } from "./versions.ts";
|
|
@@ -28,6 +29,7 @@ export const OPERATIONS: Operation<any, any>[] = [
|
|
|
28
29
|
readOperation,
|
|
29
30
|
searchOperation,
|
|
30
31
|
infoOperation,
|
|
32
|
+
statsOperation,
|
|
31
33
|
versionsOperation,
|
|
32
34
|
diffOperation,
|
|
33
35
|
writeOperation,
|
|
@@ -1,9 +1,31 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { resolveEmbeddingWorkers } from "../context.ts";
|
|
2
3
|
import { listDueRefreshes } from "../db/files.ts";
|
|
4
|
+
import { withEmbedderPool } from "../ingest/embedder-pool.ts";
|
|
3
5
|
import { colors } from "../output/formatter.ts";
|
|
6
|
+
import { isInteractive } from "../output/tty.ts";
|
|
4
7
|
import { refreshOne } from "../refresh/runner.ts";
|
|
5
8
|
import { defineOperation } from "./types.ts";
|
|
6
9
|
|
|
10
|
+
interface RefreshEntry {
|
|
11
|
+
logical_path: string;
|
|
12
|
+
status: "ok" | "unchanged" | "failed";
|
|
13
|
+
new_version_id?: string;
|
|
14
|
+
error?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Render one refresh result as a persistent stderr / final-summary line. */
|
|
18
|
+
function formatEntryLine(p: RefreshEntry): string {
|
|
19
|
+
if (p.status === "ok") {
|
|
20
|
+
const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
|
|
21
|
+
return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
|
|
22
|
+
}
|
|
23
|
+
if (p.status === "unchanged") {
|
|
24
|
+
return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
|
|
25
|
+
}
|
|
26
|
+
return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
7
29
|
export const refreshOperation = defineOperation({
|
|
8
30
|
name: "membot_refresh",
|
|
9
31
|
cliName: "refresh",
|
|
@@ -29,44 +51,49 @@ export const refreshOperation = defineOperation({
|
|
|
29
51
|
let updated = 0;
|
|
30
52
|
let unchanged = 0;
|
|
31
53
|
let failed = 0;
|
|
32
|
-
const
|
|
33
|
-
if (p.status === "ok")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
if (p.status === "unchanged") {
|
|
39
|
-
unchanged++;
|
|
40
|
-
return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
|
|
41
|
-
}
|
|
42
|
-
failed++;
|
|
43
|
-
return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
|
|
44
|
-
});
|
|
54
|
+
for (const p of result.processed) {
|
|
55
|
+
if (p.status === "ok") updated++;
|
|
56
|
+
else if (p.status === "unchanged") unchanged++;
|
|
57
|
+
else failed++;
|
|
58
|
+
}
|
|
45
59
|
const parts = [colors.green(`updated ${updated}`), colors.dim(`unchanged ${unchanged}`)];
|
|
46
60
|
if (failed) parts.push(colors.red(`failed ${failed}`));
|
|
47
|
-
|
|
61
|
+
const summary = parts.join(", ");
|
|
62
|
+
|
|
63
|
+
// In interactive mode the per-entry results were already streamed to
|
|
64
|
+
// stderr via progress.entry() during the run; printing the same list
|
|
65
|
+
// to stdout would just duplicate the scrollback. Non-interactive
|
|
66
|
+
// callers (JSON, piped, CI) still get the full list.
|
|
67
|
+
if (isInteractive()) return summary;
|
|
68
|
+
|
|
69
|
+
const lines = result.processed.map(formatEntryLine);
|
|
70
|
+
return `${lines.join("\n")}\n${summary}`;
|
|
48
71
|
},
|
|
49
72
|
handler: async (input, ctx) => {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
const
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
73
|
+
// Per-command embedder pool: workers come up at the start of the
|
|
74
|
+
// refresh sweep and are killed before we return, so a manual
|
|
75
|
+
// `membot refresh` doesn't leave subprocesses around.
|
|
76
|
+
const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
|
|
77
|
+
return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
|
|
78
|
+
const targets = input.logical_path
|
|
79
|
+
? [input.logical_path]
|
|
80
|
+
: (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
|
|
81
|
+
const out: RefreshEntry[] = [];
|
|
82
|
+
ctx.progress.start(targets.length, "refresh");
|
|
83
|
+
for (const path of targets) {
|
|
84
|
+
ctx.progress.setLabel(path);
|
|
85
|
+
let entry: RefreshEntry;
|
|
86
|
+
try {
|
|
87
|
+
entry = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
|
|
88
|
+
} catch (err) {
|
|
89
|
+
entry = { logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) };
|
|
90
|
+
}
|
|
91
|
+
out.push(entry);
|
|
92
|
+
ctx.progress.tick(path);
|
|
93
|
+
ctx.progress.entry(formatEntryLine(entry));
|
|
67
94
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
|
|
96
|
+
return { processed: out, count: out.length };
|
|
97
|
+
});
|
|
71
98
|
},
|
|
72
99
|
});
|