membot 0.7.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  import { z } from "zod";
2
+ import { resolveEmbeddingWorkers } from "../context.ts";
3
+ import { withEmbedderPool } from "../ingest/embedder-pool.ts";
2
4
  import {
3
5
  countResolvedEntries,
4
6
  type IngestCallbacks,
@@ -7,7 +9,9 @@ import {
7
9
  ingestResolved,
8
10
  } from "../ingest/ingest.ts";
9
11
  import { type ResolvedSource, resolveSource } from "../ingest/source-resolver.ts";
10
- import { colors } from "../output/formatter.ts";
12
+ import { colors, formatBytes } from "../output/formatter.ts";
13
+ import { pieFor } from "../output/progress.ts";
14
+ import { isInteractive } from "../output/tty.ts";
11
15
  import { defineOperation } from "./types.ts";
12
16
 
13
17
  const FetcherKindEnum = z.enum(["downloader", "local", "inline"]);
@@ -76,6 +80,7 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
76
80
  error: z.string().optional(),
77
81
  mime_type: z.string().nullable(),
78
82
  size_bytes: z.number(),
83
+ chunk_count: z.number().nullable(),
79
84
  fetcher: FetcherKindEnum,
80
85
  source_sha256: z.string(),
81
86
  }),
@@ -90,116 +95,145 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
90
95
  aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m", force: "-f" },
91
96
  },
92
97
  console_formatter: (result) => {
93
- const lines = result.ingested.map((e) => {
94
- if (e.status === "ok") {
95
- return `${colors.green("✓")} ${colors.cyan(e.logical_path)} ${colors.dim(`(${e.fetcher}, ${e.size_bytes}B)`)}`;
96
- }
97
- if (e.status === "unchanged") {
98
- return `${colors.dim("≡")} ${colors.cyan(e.logical_path)} ${colors.dim("(unchanged)")}`;
99
- }
100
- return `${colors.red("✗")} ${e.source_path} ${colors.dim(e.error ?? "")}`;
101
- });
102
98
  const parts: string[] = [colors.green(`added ${result.ok}`)];
103
99
  if (result.unchanged > 0) parts.push(colors.dim(`unchanged ${result.unchanged}`));
104
100
  if (result.failed > 0) parts.push(colors.red(`failed ${result.failed}`));
105
- return `${lines.join("\n")}\n${parts.join(", ")}`;
101
+ const summary = parts.join(", ");
102
+
103
+ // In interactive mode, every entry was already streamed to stderr via
104
+ // progress.entry() during ingest; printing the same list to stdout
105
+ // here would just duplicate the scrollback. Non-interactive callers
106
+ // (JSON, piped stdout, CI) don't see the live stream, so they still
107
+ // get the full per-entry list as the operation's stdout payload.
108
+ if (isInteractive()) return summary;
109
+
110
+ const lines = result.ingested.map(formatEntryLine);
111
+ return `${lines.join("\n")}\n${summary}`;
106
112
  },
107
113
  handler: async (input, ctx) => {
108
- const { sources, ...rest } = input;
109
- const followSymlinks = rest.follow_symlinks ?? true;
114
+ // Spin up an ephemeral embedder pool for the whole `add` command —
115
+ // `withEmbedderPool` handles the workers=1 short-circuit and disposes
116
+ // the children when the closure returns (see embedder-pool.ts). Inside
117
+ // the closure, every embed() call from the ingest pipeline transparently
118
+ // fans out to the subprocess pool.
119
+ const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
120
+ return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
121
+ const { sources, ...rest } = input;
122
+ const followSymlinks = rest.follow_symlinks ?? true;
110
123
 
111
- // Phase 1: resolve every source upfront so the shared progress bar
112
- // knows its total. A resolve failure (bad path, glob with no base) is
113
- // captured per-source so one bad arg doesn't abort the whole batch.
114
- type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
115
- const outcomes: ResolveOutcome[] = [];
116
- for (const source of sources) {
117
- try {
118
- const resolved = await resolveSource(source, {
119
- include: rest.include,
120
- exclude: rest.exclude,
121
- followSymlinks,
122
- });
123
- outcomes.push({ source, resolved });
124
- } catch (err) {
125
- outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
124
+ // Phase 1: resolve every source upfront so the shared progress bar
125
+ // knows its total. A resolve failure (bad path, glob with no base) is
126
+ // captured per-source so one bad arg doesn't abort the whole batch.
127
+ type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
128
+ const outcomes: ResolveOutcome[] = [];
129
+ for (const source of sources) {
130
+ try {
131
+ const resolved = await resolveSource(source, {
132
+ include: rest.include,
133
+ exclude: rest.exclude,
134
+ followSymlinks,
135
+ });
136
+ outcomes.push({ source, resolved });
137
+ } catch (err) {
138
+ outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
139
+ }
126
140
  }
127
- }
128
141
 
129
- const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
142
+ const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
130
143
 
131
- const aggregated: IngestResult = {
132
- ingested: [],
133
- total: 0,
134
- ok: 0,
135
- unchanged: 0,
136
- failed: 0,
137
- };
144
+ const aggregated: IngestResult = {
145
+ ingested: [],
146
+ total: 0,
147
+ ok: 0,
148
+ unchanged: 0,
149
+ failed: 0,
150
+ };
138
151
 
139
- ctx.progress.start(total, "ingest");
140
- const callbacks: IngestCallbacks = {
141
- onEntryStart: (label) => ctx.progress.tick(label),
142
- onEntryComplete: (entry) => ctx.progress.entry(formatEntryLine(entry)),
143
- onEntryProgress: (_label, sublabel) => ctx.progress.update(sublabel),
144
- };
152
+ ctx.progress.start(total, "ingest");
153
+ const callbacks: IngestCallbacks = {
154
+ // Counter advances on COMPLETION so concurrent prep doesn't race the
155
+ // bar to 100% before any file is fully persisted. The per-worker
156
+ // status section (one line per active worker) shows file + step in
157
+ // real time, prefixed with a pie glyph that fills as the per-file
158
+ // pipeline progresses. `setWorkers(n)` resizes the section whenever
159
+ // a new ingest source kicks off with its own pool size.
160
+ onWorkerCount: (n) => ctx.progress.setWorkers(n),
161
+ onEntryStart: (label, workerId) => {
162
+ if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(undefined)} ${label}`);
163
+ ctx.progress.setLabel(label);
164
+ },
165
+ onEntryComplete: (entry, workerId) => {
166
+ if (workerId !== undefined) ctx.progress.workerSet(workerId, "");
167
+ ctx.progress.tick(entry.logical_path);
168
+ ctx.progress.entry(formatEntryLine(entry));
169
+ },
170
+ onEntryProgress: (label, sublabel, workerId) => {
171
+ if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(sublabel)} ${label} — ${sublabel}`);
172
+ ctx.progress.update(sublabel);
173
+ },
174
+ onChunks: (n) => ctx.progress.addChunks(n),
175
+ };
145
176
 
146
- for (const outcome of outcomes) {
147
- if ("error" in outcome) {
148
- const failed: IngestEntryResult = {
149
- source_path: outcome.source,
150
- logical_path: outcome.source,
151
- version_id: null,
152
- status: "failed",
153
- error: outcome.error.message,
154
- mime_type: null,
155
- size_bytes: 0,
156
- fetcher: "local",
157
- source_sha256: "",
158
- };
159
- callbacks.onEntryStart?.(outcome.source);
160
- callbacks.onEntryComplete?.(failed);
161
- aggregated.ingested.push(failed);
162
- aggregated.total += 1;
163
- aggregated.failed += 1;
164
- continue;
165
- }
177
+ for (const outcome of outcomes) {
178
+ if ("error" in outcome) {
179
+ const failed: IngestEntryResult = {
180
+ source_path: outcome.source,
181
+ logical_path: outcome.source,
182
+ version_id: null,
183
+ status: "failed",
184
+ error: outcome.error.message,
185
+ mime_type: null,
186
+ size_bytes: 0,
187
+ chunk_count: null,
188
+ fetcher: "local",
189
+ source_sha256: "",
190
+ };
191
+ callbacks.onEntryStart?.(outcome.source);
192
+ callbacks.onEntryComplete?.(failed);
193
+ aggregated.ingested.push(failed);
194
+ aggregated.total += 1;
195
+ aggregated.failed += 1;
196
+ continue;
197
+ }
166
198
 
167
- try {
168
- const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
169
- aggregated.ingested.push(...r.ingested);
170
- aggregated.total += r.total;
171
- aggregated.ok += r.ok;
172
- aggregated.unchanged += r.unchanged;
173
- aggregated.failed += r.failed;
174
- } catch (err) {
175
- const message = err instanceof Error ? err.message : String(err);
176
- const failed: IngestEntryResult = {
177
- source_path: outcome.source,
178
- logical_path: outcome.source,
179
- version_id: null,
180
- status: "failed",
181
- error: message,
182
- mime_type: null,
183
- size_bytes: 0,
184
- fetcher: "local",
185
- source_sha256: "",
186
- };
187
- callbacks.onEntryStart?.(outcome.source);
188
- callbacks.onEntryComplete?.(failed);
189
- aggregated.ingested.push(failed);
190
- aggregated.total += 1;
191
- aggregated.failed += 1;
192
- } finally {
193
- // Release the DB lock between sources so other consumers (a
194
- // concurrent CLI call, the daemon, or a separate MCP server)
195
- // can wedge in. The next source's first DB call reopens.
196
- await ctx.db.release();
199
+ try {
200
+ const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
201
+ aggregated.ingested.push(...r.ingested);
202
+ aggregated.total += r.total;
203
+ aggregated.ok += r.ok;
204
+ aggregated.unchanged += r.unchanged;
205
+ aggregated.failed += r.failed;
206
+ } catch (err) {
207
+ const message = err instanceof Error ? err.message : String(err);
208
+ const failed: IngestEntryResult = {
209
+ source_path: outcome.source,
210
+ logical_path: outcome.source,
211
+ version_id: null,
212
+ status: "failed",
213
+ error: message,
214
+ mime_type: null,
215
+ size_bytes: 0,
216
+ chunk_count: null,
217
+ fetcher: "local",
218
+ source_sha256: "",
219
+ };
220
+ callbacks.onEntryStart?.(outcome.source);
221
+ callbacks.onEntryComplete?.(failed);
222
+ aggregated.ingested.push(failed);
223
+ aggregated.total += 1;
224
+ aggregated.failed += 1;
225
+ } finally {
226
+ // Release the DB lock between sources so other consumers (a
227
+ // concurrent CLI call, the daemon, or a separate MCP server)
228
+ // can wedge in. The next source's first DB call reopens.
229
+ await ctx.db.release();
230
+ }
197
231
  }
198
- }
199
232
 
200
- const summary = formatSummary(aggregated);
201
- ctx.progress.done(summary);
202
- return aggregated;
233
+ const summary = formatSummary(aggregated);
234
+ ctx.progress.done(summary);
235
+ return aggregated;
236
+ });
203
237
  },
204
238
  });
205
239
 
@@ -207,11 +241,17 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
207
241
  * Render the persistent stderr line shown for one completed entry. Mirrors
208
242
  * the glyphs used by the final `console_formatter` so users see the same
209
243
  * status indicators twice (once during ingest on stderr, once in the final
210
- * stdout summary).
244
+ * stdout summary). Successful entries show source kind, humanized byte
245
+ * size, and chunk count so the user can spot oddly small / oddly large
246
+ * files at a glance.
211
247
  */
212
248
  function formatEntryLine(entry: IngestEntryResult): string {
213
249
  if (entry.status === "ok") {
214
- return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${entry.fetcher}, ${entry.size_bytes}B)`)}`;
250
+ const parts: string[] = [entry.fetcher, formatBytes(entry.size_bytes)];
251
+ if (entry.chunk_count !== null) {
252
+ parts.push(`${entry.chunk_count} chunk${entry.chunk_count === 1 ? "" : "s"}`);
253
+ }
254
+ return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${parts.join(", ")})`)}`;
215
255
  }
216
256
  if (entry.status === "unchanged") {
217
257
  return `${colors.dim("≡")} ${colors.cyan(entry.logical_path)} ${colors.dim("(unchanged)")}`;
@@ -8,6 +8,7 @@ import { readOperation } from "./read.ts";
8
8
  import { refreshOperation } from "./refresh.ts";
9
9
  import { removeOperation } from "./remove.ts";
10
10
  import { searchOperation } from "./search.ts";
11
+ import { statsOperation } from "./stats.ts";
11
12
  import { treeOperation } from "./tree.ts";
12
13
  import type { Operation } from "./types.ts";
13
14
  import { versionsOperation } from "./versions.ts";
@@ -28,6 +29,7 @@ export const OPERATIONS: Operation<any, any>[] = [
28
29
  readOperation,
29
30
  searchOperation,
30
31
  infoOperation,
32
+ statsOperation,
31
33
  versionsOperation,
32
34
  diffOperation,
33
35
  writeOperation,
@@ -1,9 +1,31 @@
1
1
  import { z } from "zod";
2
+ import { resolveEmbeddingWorkers } from "../context.ts";
2
3
  import { listDueRefreshes } from "../db/files.ts";
4
+ import { withEmbedderPool } from "../ingest/embedder-pool.ts";
3
5
  import { colors } from "../output/formatter.ts";
6
+ import { isInteractive } from "../output/tty.ts";
4
7
  import { refreshOne } from "../refresh/runner.ts";
5
8
  import { defineOperation } from "./types.ts";
6
9
 
10
+ interface RefreshEntry {
11
+ logical_path: string;
12
+ status: "ok" | "unchanged" | "failed";
13
+ new_version_id?: string;
14
+ error?: string;
15
+ }
16
+
17
+ /** Render one refresh result as a persistent stderr / final-summary line. */
18
+ function formatEntryLine(p: RefreshEntry): string {
19
+ if (p.status === "ok") {
20
+ const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
21
+ return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
22
+ }
23
+ if (p.status === "unchanged") {
24
+ return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
25
+ }
26
+ return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
27
+ }
28
+
7
29
  export const refreshOperation = defineOperation({
8
30
  name: "membot_refresh",
9
31
  cliName: "refresh",
@@ -29,44 +51,49 @@ export const refreshOperation = defineOperation({
29
51
  let updated = 0;
30
52
  let unchanged = 0;
31
53
  let failed = 0;
32
- const lines = result.processed.map((p) => {
33
- if (p.status === "ok") {
34
- updated++;
35
- const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
36
- return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
37
- }
38
- if (p.status === "unchanged") {
39
- unchanged++;
40
- return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
41
- }
42
- failed++;
43
- return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
44
- });
54
+ for (const p of result.processed) {
55
+ if (p.status === "ok") updated++;
56
+ else if (p.status === "unchanged") unchanged++;
57
+ else failed++;
58
+ }
45
59
  const parts = [colors.green(`updated ${updated}`), colors.dim(`unchanged ${unchanged}`)];
46
60
  if (failed) parts.push(colors.red(`failed ${failed}`));
47
- return `${lines.join("\n")}\n${parts.join(", ")}`;
61
+ const summary = parts.join(", ");
62
+
63
+ // In interactive mode the per-entry results were already streamed to
64
+ // stderr via progress.entry() during the run; printing the same list
65
+ // to stdout would just duplicate the scrollback. Non-interactive
66
+ // callers (JSON, piped, CI) still get the full list.
67
+ if (isInteractive()) return summary;
68
+
69
+ const lines = result.processed.map(formatEntryLine);
70
+ return `${lines.join("\n")}\n${summary}`;
48
71
  },
49
72
  handler: async (input, ctx) => {
50
- const targets = input.logical_path
51
- ? [input.logical_path]
52
- : (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
53
- const out: Array<{
54
- logical_path: string;
55
- status: "ok" | "unchanged" | "failed";
56
- new_version_id?: string;
57
- error?: string;
58
- }> = [];
59
- ctx.progress.start(targets.length, "refresh");
60
- for (const path of targets) {
61
- ctx.progress.tick(path);
62
- try {
63
- const r = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
64
- out.push(r);
65
- } catch (err) {
66
- out.push({ logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) });
73
+ // Per-command embedder pool: workers come up at the start of the
74
+ // refresh sweep and are killed before we return, so a manual
75
+ // `membot refresh` doesn't leave subprocesses around.
76
+ const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
77
+ return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
78
+ const targets = input.logical_path
79
+ ? [input.logical_path]
80
+ : (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
81
+ const out: RefreshEntry[] = [];
82
+ ctx.progress.start(targets.length, "refresh");
83
+ for (const path of targets) {
84
+ ctx.progress.setLabel(path);
85
+ let entry: RefreshEntry;
86
+ try {
87
+ entry = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
88
+ } catch (err) {
89
+ entry = { logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) };
90
+ }
91
+ out.push(entry);
92
+ ctx.progress.tick(path);
93
+ ctx.progress.entry(formatEntryLine(entry));
67
94
  }
68
- }
69
- ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
70
- return { processed: out, count: out.length };
95
+ ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
96
+ return { processed: out, count: out.length };
97
+ });
71
98
  },
72
99
  });