membot 0.7.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
- import { logger } from "./logger.ts";
2
- import { isSilent, useSpinner } from "./tty.ts";
1
+ import { bold, dim } from "ansis";
2
+ import { type LiveArea, logger } from "./logger.ts";
3
+ import { isSilent, useColor, useSpinner } from "./tty.ts";
3
4
 
4
5
  /**
5
6
  * Progress reporter for multi-entry operations (directory/glob ingest, batch
@@ -8,13 +9,23 @@ import { isSilent, useSpinner } from "./tty.ts";
8
9
  * `entry(line)` (writes a persistent stderr line that survives the spinner),
9
10
  * then `done(summary)`.
10
11
  *
11
- * Interactive: replaces a single spinner line as work happens, with an ASCII
12
- * bar like `[████░░░░░░] 4/15 (26%) relative/path.md`.
13
- * Non-interactive: emits `info` lines per `tick` and per `entry`.
12
+ * Interactive: a multi-line live area on stderr top is the bar with
13
+ * counts, ETA, and chunk total; below it, one line per active worker showing
14
+ * which file and which step it's currently on. Updates redraw in place via
15
+ * ANSI escapes.
16
+ *
17
+ * Non-interactive: emits `info` lines per `tick` and per `entry` and
18
+ * silently ignores worker / chunk updates so CI logs don't get spammed.
14
19
  */
15
20
  export interface Progress {
16
21
  start(total: number, label?: string): void;
17
22
  tick(label: string): void;
23
+ /**
24
+ * Replace the spinner's main label without advancing the counter. Used to
25
+ * show which entry is currently being worked on while sub-step progress
26
+ * (the suffix) updates independently. No-op in non-interactive modes.
27
+ */
28
+ setLabel(label: string): void;
18
29
  /**
19
30
  * Re-render the active spinner with the most recent `tick` label plus an
20
31
  * extra suffix (e.g. "embedding 32/168") without advancing the counter.
@@ -22,6 +33,23 @@ export interface Progress {
22
33
  * deliberately TTY-only so CI logs don't get one line per inner batch.
23
34
  */
24
35
  update(suffix: string): void;
36
+ /**
37
+ * Resize the worker section of the multi-line display to `n` slots. Each
38
+ * slot is then addressable via `workerSet(workerId, line)`. Pass 0 to
39
+ * collapse the worker section (single-line bar only). No-op in
40
+ * non-interactive modes.
41
+ */
42
+ setWorkers(n: number): void;
43
+ /**
44
+ * Set worker `workerId`'s status line (e.g. "doc.md — embedding 12/30").
45
+ * Empty string marks the slot idle. No-op in non-interactive modes.
46
+ */
47
+ workerSet(workerId: number, line: string): void;
48
+ /**
49
+ * Increment the cumulative chunk count rendered on the top line. Called
50
+ * by ingest workers after persisting each file. No-op in non-interactive.
51
+ */
52
+ addChunks(n: number): void;
25
53
  entry(line: string): void;
26
54
  done(summary?: string): void;
27
55
  fail(summary?: string): void;
@@ -30,6 +58,49 @@ export interface Progress {
30
58
 
31
59
  const BAR_WIDTH = 20;
32
60
  const LABEL_MAX = 60;
61
+ const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
62
+ const FRAME_INTERVAL_MS = 80;
63
+ const PIE_FRAMES = ["◯", "◔", "◐", "◕", "●"] as const;
64
+
65
+ /**
66
+ * Map a pipeline step name to a single-character "pie" indicator showing
67
+ * roughly how far along the per-file pipeline is. The full path is
68
+ * read → unchanged check → convert → describe → chunk → embed → persist;
69
+ * each step lights up another quarter. Embed is the slow one and reports
70
+ * its own `embedding X/Y` sub-progress, which we render with a finer-
71
+ * grained pie based on the X/Y ratio.
72
+ */
73
+ export function pieFor(step: string | undefined): string {
74
+ if (!step) return PIE_FRAMES[0];
75
+ const m = step.match(/^embedding\s+(\d+)\s*\/\s*(\d+)/);
76
+ if (m) {
77
+ const done = Number(m[1]);
78
+ const total = Number(m[2]);
79
+ if (total > 0) return pieFromRatio(done / total);
80
+ }
81
+ switch (step) {
82
+ case "reading":
83
+ return PIE_FRAMES[0];
84
+ case "converting":
85
+ return PIE_FRAMES[1];
86
+ case "describing":
87
+ return PIE_FRAMES[2];
88
+ case "chunking":
89
+ return PIE_FRAMES[3];
90
+ case "persisting":
91
+ return PIE_FRAMES[4];
92
+ default:
93
+ return PIE_FRAMES[0];
94
+ }
95
+ }
96
+
97
+ function pieFromRatio(r: number): string {
98
+ if (r < 0.125) return PIE_FRAMES[0];
99
+ if (r < 0.375) return PIE_FRAMES[1];
100
+ if (r < 0.625) return PIE_FRAMES[2];
101
+ if (r < 0.875) return PIE_FRAMES[3];
102
+ return PIE_FRAMES[4];
103
+ }
33
104
 
34
105
  /**
35
106
  * Render a fixed-width ASCII progress bar. Uses block-drawing characters in
@@ -51,54 +122,351 @@ function truncateLabel(label: string, max = LABEL_MAX): string {
51
122
  return `…${label.slice(label.length - max + 1)}`;
52
123
  }
53
124
 
125
+ /**
126
+ * Cap a (possibly ANSI-styled) string at `width` *visible* columns. ANSI
127
+ * escape sequences are passed through verbatim — they don't count toward
128
+ * width — and a `\x1b[0m` reset is appended so any open formatting closes
129
+ * cleanly even if we cut mid-styled-substring. Critical for the multi-line
130
+ * live area: if a line wraps to two terminal rows, our cursor math (one
131
+ * `\x1b[1A` per logical line) leaves wrap residue behind on every clear,
132
+ * which is what produces the "double-write / scrolling" artifact.
133
+ */
134
+ export function clipToWidth(s: string, width: number): string {
135
+ if (width <= 0) return "\x1b[0m";
136
+ let visible = 0;
137
+ let i = 0;
138
+ let out = "";
139
+ while (i < s.length) {
140
+ if (s[i] === "\x1b" && s[i + 1] === "[") {
141
+ let j = i + 2;
142
+ while (j < s.length && s[j] !== "m") j++;
143
+ if (j < s.length) {
144
+ out += s.slice(i, j + 1);
145
+ i = j + 1;
146
+ continue;
147
+ }
148
+ }
149
+ if (visible >= width) break;
150
+ out += s[i];
151
+ visible++;
152
+ i++;
153
+ }
154
+ return `${out}\x1b[0m`;
155
+ }
156
+
157
+ /** Best-effort terminal width; falls back to 80 when stderr is not a TTY. */
158
+ function terminalWidth(): number {
159
+ const cols = process.stderr.columns;
160
+ if (typeof cols === "number" && cols > 0) return cols;
161
+ return 80;
162
+ }
163
+
164
+ /**
165
+ * Format a millisecond duration as a short human string: `47s`, `2m13s`,
166
+ * `1h12m`. Used for the ETA on the top line.
167
+ */
168
+ function formatDuration(ms: number): string {
169
+ if (!Number.isFinite(ms) || ms < 0) return "?";
170
+ const sec = Math.round(ms / 1000);
171
+ if (sec < 60) return `${sec}s`;
172
+ const min = Math.floor(sec / 60);
173
+ const remSec = sec % 60;
174
+ if (min < 60) return remSec === 0 ? `${min}m` : `${min}m${remSec}s`;
175
+ const hr = Math.floor(min / 60);
176
+ const remMin = min % 60;
177
+ return remMin === 0 ? `${hr}h` : `${hr}h${remMin}m`;
178
+ }
179
+
180
+ /**
181
+ * Multi-line live area on stderr. One main line (spinner glyph + bar +
182
+ * counts + ETA + chunk total + label + sub-step suffix), then `workerCount`
183
+ * worker status lines below it. Re-renders on every state change and on a
184
+ * recurring interval so the spinner glyph keeps animating during long
185
+ * operations. Implements `LiveArea` so the logger can clear/redraw the
186
+ * block around stray info/warn lines.
187
+ */
188
+ class MultiLineLiveArea implements LiveArea {
189
+ private mainLabel = "";
190
+ private mainSuffix = "";
191
+ private workerLines: string[] = [];
192
+ private total = 0;
193
+ private count = 0;
194
+ private chunks = 0;
195
+ private startedAt = 0;
196
+ private linesWritten = 0;
197
+ private frame = 0;
198
+ private interval: ReturnType<typeof setInterval> | null = null;
199
+ private active = false;
200
+ private color: boolean;
201
+
202
+ constructor(color: boolean) {
203
+ this.color = color;
204
+ }
205
+
206
+ start(total: number, label: string): void {
207
+ this.total = total;
208
+ this.count = 0;
209
+ this.chunks = 0;
210
+ this.startedAt = Date.now();
211
+ this.mainLabel = label;
212
+ this.mainSuffix = "";
213
+ this.workerLines = [];
214
+ this.linesWritten = 0;
215
+ this.frame = 0;
216
+ this.active = true;
217
+ logger.setActiveLiveArea(this);
218
+ this.render();
219
+ this.interval = setInterval(() => {
220
+ this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
221
+ this.render();
222
+ }, FRAME_INTERVAL_MS);
223
+ }
224
+
225
+ tick(label: string): void {
226
+ this.count += 1;
227
+ this.mainLabel = label;
228
+ this.mainSuffix = "";
229
+ this.render();
230
+ }
231
+
232
+ setLabel(label: string): void {
233
+ this.mainLabel = label;
234
+ this.render();
235
+ }
236
+
237
+ setSuffix(suffix: string): void {
238
+ this.mainSuffix = suffix;
239
+ this.render();
240
+ }
241
+
242
+ setWorkerCount(n: number): void {
243
+ this.workerLines = new Array(Math.max(0, n)).fill("");
244
+ this.render();
245
+ }
246
+
247
+ setWorker(id: number, line: string): void {
248
+ while (this.workerLines.length <= id) this.workerLines.push("");
249
+ this.workerLines[id] = line;
250
+ this.render();
251
+ }
252
+
253
+ addChunks(n: number): void {
254
+ this.chunks += n;
255
+ this.render();
256
+ }
257
+
258
+ stop(finalLine: string | undefined, glyphPrefix: string): void {
259
+ if (!this.active) return;
260
+ this.active = false;
261
+ if (this.interval) clearInterval(this.interval);
262
+ this.interval = null;
263
+ this.clear();
264
+ logger.setActiveLiveArea(null);
265
+ if (finalLine) {
266
+ process.stderr.write(`${glyphPrefix}${finalLine}\n`);
267
+ }
268
+ }
269
+
270
+ clear(): void {
271
+ if (this.linesWritten === 0) return;
272
+ // Cursor sits at the end of the last rendered line. Walk up, clearing
273
+ // each row, ending at column 0 of the original top line.
274
+ process.stderr.write("\r");
275
+ for (let i = 0; i < this.linesWritten; i++) {
276
+ process.stderr.write("\x1b[2K");
277
+ if (i < this.linesWritten - 1) process.stderr.write("\x1b[1A");
278
+ }
279
+ this.linesWritten = 0;
280
+ }
281
+
282
+ render(): void {
283
+ if (!this.active) return;
284
+ this.clear();
285
+ const lines = this.composeLines();
286
+ for (let i = 0; i < lines.length; i++) {
287
+ if (i > 0) process.stderr.write("\n");
288
+ process.stderr.write(lines[i] ?? "");
289
+ }
290
+ this.linesWritten = lines.length;
291
+ }
292
+
293
+ private composeLines(): string[] {
294
+ // One column shy of the terminal so the trailing char doesn't trigger
295
+ // a soft wrap on every render — without this, long bar/worker lines
296
+ // occupy two visible rows and `clear()`'s one-up-per-line cursor walk
297
+ // leaves wrap residue, which surfaces as duplicate bars scrolling up
298
+ // the screen as files complete.
299
+ const width = Math.max(20, terminalWidth() - 1);
300
+ const lines: string[] = [clipToWidth(this.bold(this.composeMainLine()), width)];
301
+ if (this.workerLines.length > 0) {
302
+ // Separator under the bar so the per-worker section reads as a
303
+ // distinct block — without this, the first worker line snugs up
304
+ // against the bar and the bar's tail (label/suffix) bleeds into
305
+ // the worker grid visually.
306
+ lines.push(this.dim("─".repeat(width)));
307
+ }
308
+ for (const w of this.workerLines) {
309
+ const raw = w ? ` ${truncateLabel(w, LABEL_MAX + 20)}` : "";
310
+ // Worker rows are de-emphasized so the bold bar stays the focal
311
+ // point; the pie glyph + filename + step still read clearly in
312
+ // dim type.
313
+ lines.push(clipToWidth(this.dim(raw), width));
314
+ }
315
+ return lines;
316
+ }
317
+
318
+ private composeMainLine(): string {
319
+ const glyph = SPINNER_FRAMES[this.frame] ?? "·";
320
+ const bar = renderBar(this.count, this.total);
321
+ const pct = this.total > 0 ? Math.floor((this.count / this.total) * 100) : 0;
322
+ const eta = this.computeEta();
323
+ const stats: string[] = [`${this.count}/${this.total} (${pct}%)`];
324
+ if (this.chunks > 0) stats.push(`${this.chunks} chunks`);
325
+ const elapsedMs = Date.now() - this.startedAt;
326
+ if (elapsedMs > 0) stats.push(`elapsed ${formatDuration(elapsedMs)}`);
327
+ if (eta) stats.push(`ETA ${eta}`);
328
+ const statsStr = this.dim(stats.join(" · "));
329
+ // When per-worker lines are active, the in-flight file/step lives in
330
+ // the worker grid — duplicating it on the bar would just be noise.
331
+ // In single-line mode (workers = 0) we keep the label/suffix tail so
332
+ // short ingests still show what's happening.
333
+ const showTail = this.workerLines.length === 0;
334
+ const labelTail = showTail && this.mainLabel ? ` ${truncateLabel(this.mainLabel)}` : "";
335
+ const suffixTail = showTail && this.mainSuffix ? ` ${this.dim(`— ${this.mainSuffix}`)}` : "";
336
+ return `${glyph} ${bar} ${statsStr}${labelTail}${suffixTail}`;
337
+ }
338
+
339
+ /**
340
+ * Compose the final summary tail appended on `done()` — the per-batch
341
+ * totals the user asked for: file count, chunk count, elapsed time.
342
+ * Emitted only when there's something interesting to show (count > 0).
343
+ */
344
+ totalsSummary(): string {
345
+ if (this.count <= 0) return "";
346
+ const parts = [`${this.count} files`];
347
+ if (this.chunks > 0) parts.push(`${this.chunks} chunks`);
348
+ const elapsedMs = Date.now() - this.startedAt;
349
+ parts.push(`${formatDuration(elapsedMs)} elapsed`);
350
+ return parts.join(" · ");
351
+ }
352
+
353
+ private computeEta(): string | null {
354
+ if (this.count <= 0 || this.total <= 0) return null;
355
+ if (this.count >= this.total) return null;
356
+ const elapsed = Date.now() - this.startedAt;
357
+ const remainingMs = (elapsed * (this.total - this.count)) / this.count;
358
+ return formatDuration(remainingMs);
359
+ }
360
+
361
+ private dim(text: string): string {
362
+ return this.color ? dim(text) : text;
363
+ }
364
+
365
+ private bold(text: string): string {
366
+ return this.color ? bold(text) : text;
367
+ }
368
+ }
369
+
54
370
  /**
55
371
  * Build a `Progress` reporter whose mode is decided once, at call time, from
56
372
  * the current TTY state. Use one per multi-entry operation.
57
373
  */
58
374
  export function createProgress(): Progress {
59
- let total = 0;
60
- let count = 0;
61
- let lastLabel = "";
62
- let spinner: ReturnType<typeof logger.startSpinner> | null = null;
63
-
64
375
  const interactive = useSpinner();
65
376
  const silent = isSilent();
66
377
 
67
- const renderSpinnerText = (label: string, suffix?: string): string => {
68
- const bar = renderBar(count, total);
69
- const pct = total > 0 ? Math.floor((count / total) * 100) : 0;
70
- const labelTail = label ? ` — ${truncateLabel(label)}` : "";
71
- const suffixTail = suffix ? ` — ${suffix}` : "";
72
- return `${bar} ${count}/${total} (${pct}%)${labelTail}${suffixTail}`;
378
+ if (!interactive || silent) {
379
+ return createNonInteractiveProgress(silent);
380
+ }
381
+
382
+ const live = new MultiLineLiveArea(useColor());
383
+ let lastSummary: string | undefined;
384
+ let total = 0;
385
+ let count = 0;
386
+
387
+ return {
388
+ start(t: number, label?: string) {
389
+ total = t;
390
+ count = 0;
391
+ live.start(t, label ?? "");
392
+ },
393
+ tick(label: string) {
394
+ count += 1;
395
+ live.tick(label);
396
+ },
397
+ setLabel(label: string) {
398
+ live.setLabel(label);
399
+ },
400
+ update(suffix: string) {
401
+ live.setSuffix(suffix);
402
+ },
403
+ setWorkers(n: number) {
404
+ live.setWorkerCount(n);
405
+ },
406
+ workerSet(workerId: number, line: string) {
407
+ live.setWorker(workerId, line);
408
+ },
409
+ addChunks(n: number) {
410
+ live.addChunks(n);
411
+ },
412
+ entry(line: string) {
413
+ logger.info(line);
414
+ },
415
+ done(summary?: string) {
416
+ const base = summary ?? `${count}/${total} done`;
417
+ const totals = live.totalsSummary();
418
+ lastSummary = totals ? `${base} · ${totals}` : base;
419
+ live.stop(lastSummary, useColor() ? `${SPINNER_FRAMES[0]} ` : "✓ ");
420
+ },
421
+ fail(summary?: string) {
422
+ const base = summary ?? `failed at ${count}/${total}`;
423
+ const totals = live.totalsSummary();
424
+ lastSummary = totals ? `${base} · ${totals}` : base;
425
+ live.stop(lastSummary, "✗ ");
426
+ },
427
+ info(msg: string) {
428
+ logger.info(msg);
429
+ },
73
430
  };
431
+ }
74
432
 
433
+ /**
434
+ * Stripped-down progress reporter for non-TTY / silent contexts: emits one
435
+ * line per tick + entry, drops every sub-step / worker / chunk update so CI
436
+ * logs don't blow up.
437
+ */
438
+ function createNonInteractiveProgress(silent: boolean): Progress {
439
+ let total = 0;
440
+ let count = 0;
441
+ let chunks = 0;
442
+ let startedAt = 0;
443
+ const totalsTail = (): string => {
444
+ if (count <= 0) return "";
445
+ const parts = [`${count} files`];
446
+ if (chunks > 0) parts.push(`${chunks} chunks`);
447
+ parts.push(`${formatDuration(Date.now() - startedAt)} elapsed`);
448
+ return parts.join(" · ");
449
+ };
75
450
  return {
76
451
  start(t: number, label?: string) {
77
452
  total = t;
78
453
  count = 0;
79
- lastLabel = label ?? "";
454
+ chunks = 0;
455
+ startedAt = Date.now();
80
456
  if (silent) return;
81
- if (interactive) {
82
- const initial = renderSpinnerText(lastLabel);
83
- spinner = logger.startSpinner(initial);
84
- } else if (label) {
85
- logger.info(`${label}: 0/${total}`);
86
- }
457
+ if (label) logger.info(`${label}: 0/${total}`);
87
458
  },
88
459
  tick(label: string) {
89
460
  count += 1;
90
- lastLabel = label;
91
461
  if (silent) return;
92
- if (interactive && spinner) {
93
- spinner.update(renderSpinnerText(label));
94
- } else {
95
- logger.info(`[${count}/${total}] ${label}`);
96
- }
462
+ logger.info(`[${count}/${total}] ${label}`);
97
463
  },
98
- update(suffix: string) {
99
- if (silent) return;
100
- if (!interactive || !spinner) return;
101
- spinner.update(renderSpinnerText(lastLabel, suffix));
464
+ setLabel() {},
465
+ update() {},
466
+ setWorkers() {},
467
+ workerSet() {},
468
+ addChunks(n: number) {
469
+ chunks += n;
102
470
  },
103
471
  entry(line: string) {
104
472
  if (silent) return;
@@ -106,24 +474,18 @@ export function createProgress(): Progress {
106
474
  },
107
475
  done(summary?: string) {
108
476
  if (silent) return;
109
- if (interactive && spinner) {
110
- spinner.success(summary ?? `${count}/${total} done`);
111
- spinner = null;
112
- } else if (summary) {
113
- logger.info(summary);
114
- }
477
+ const tail = totalsTail();
478
+ const line = summary ? (tail ? `${summary} · ${tail}` : summary) : tail;
479
+ if (line) logger.info(line);
115
480
  },
116
481
  fail(summary?: string) {
482
+ const tail = totalsTail();
483
+ const line = summary ? (tail ? `${summary} · ${tail}` : summary) : tail;
117
484
  if (silent) {
118
- if (summary) logger.warn(summary);
485
+ if (line) logger.warn(line);
119
486
  return;
120
487
  }
121
- if (interactive && spinner) {
122
- spinner.error(summary ?? `failed at ${count}/${total}`);
123
- spinner = null;
124
- } else if (summary) {
125
- logger.warn(summary);
126
- }
488
+ if (line) logger.warn(line);
127
489
  },
128
490
  info(msg: string) {
129
491
  if (silent) return;
@@ -1,5 +1,6 @@
1
- import type { AppContext } from "../context.ts";
1
+ import { type AppContext, resolveEmbeddingWorkers } from "../context.ts";
2
2
  import { listDueRefreshes } from "../db/files.ts";
3
+ import { withEmbedderPool } from "../ingest/embedder-pool.ts";
3
4
  import { logger } from "../output/logger.ts";
4
5
  import { type RefreshOutcome, refreshOne } from "./runner.ts";
5
6
 
@@ -7,22 +8,30 @@ import { type RefreshOutcome, refreshOne } from "./runner.ts";
7
8
  * One scheduler tick: refresh every row whose `refresh_frequency_sec` has
8
9
  * elapsed since `refreshed_at`. Errors on individual rows are logged and
9
10
  * the loop continues so one bad source doesn't halt the daemon.
11
+ *
12
+ * The embedder worker pool is per-tick: spun up only if there are due rows,
13
+ * torn down before the tick returns. The daemon never holds idle workers
14
+ * between ticks (which can be minutes apart).
10
15
  */
11
16
  export async function runDueRefreshes(ctx: AppContext): Promise<RefreshOutcome[]> {
12
17
  const due = await listDueRefreshes(ctx.db);
13
- const out: RefreshOutcome[] = [];
14
- for (const row of due) {
15
- try {
16
- const r = await refreshOne(ctx, row.logical_path);
17
- out.push(r);
18
- if (r.status === "ok") logger.info(`refresh: ${row.logical_path} → new version ${r.new_version_id}`);
19
- } catch (err) {
20
- const msg = err instanceof Error ? err.message : String(err);
21
- logger.warn(`refresh: ${row.logical_path} failed (${msg})`);
22
- out.push({ logical_path: row.logical_path, status: "failed", error: msg });
18
+ if (due.length === 0) return [];
19
+ const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
20
+ return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
21
+ const out: RefreshOutcome[] = [];
22
+ for (const row of due) {
23
+ try {
24
+ const r = await refreshOne(ctx, row.logical_path);
25
+ out.push(r);
26
+ if (r.status === "ok") logger.info(`refresh: ${row.logical_path} new version ${r.new_version_id}`);
27
+ } catch (err) {
28
+ const msg = err instanceof Error ? err.message : String(err);
29
+ logger.warn(`refresh: ${row.logical_path} failed (${msg})`);
30
+ out.push({ logical_path: row.logical_path, status: "failed", error: msg });
31
+ }
23
32
  }
24
- }
25
- return out;
33
+ return out;
34
+ });
26
35
  }
27
36
 
28
37
  /**