@desplega.ai/agent-swarm 1.93.0 → 1.94.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +2 -2
  2. package/openapi.json +180 -1
  3. package/package.json +1 -1
  4. package/src/be/db.ts +63 -7
  5. package/src/be/migrations/090_model_tiers.sql +2 -0
  6. package/src/be/migrations/091_seed_swarm_operations_metrics.sql +12 -0
  7. package/src/be/migrations/092_metrics_dashboard_combobox_filters.sql +68 -0
  8. package/src/be/migrations/093_slack_message_tracking.sql +6 -0
  9. package/src/be/migrations/runner.ts +52 -0
  10. package/src/be/modelsdev-cache.json +2060 -198
  11. package/src/be/scripts/boot-reembed.ts +74 -0
  12. package/src/be/scripts/db.ts +19 -3
  13. package/src/be/seed/index.ts +1 -1
  14. package/src/be/seed/registry.ts +2 -2
  15. package/src/be/seed/runner.ts +5 -5
  16. package/src/be/seed/types.ts +6 -1
  17. package/src/be/seed-pricing.ts +1 -0
  18. package/src/be/seed-scripts/index.ts +3 -2
  19. package/src/commands/runner.ts +83 -13
  20. package/src/http/index.ts +13 -2
  21. package/src/http/metrics.ts +55 -6
  22. package/src/http/schedules.ts +16 -15
  23. package/src/http/script-runs.ts +7 -1
  24. package/src/http/scripts.ts +147 -1
  25. package/src/http/tasks.ts +7 -0
  26. package/src/model-tiers.ts +140 -0
  27. package/src/providers/claude-managed-models.ts +9 -0
  28. package/src/providers/opencode-adapter.ts +1 -0
  29. package/src/providers/pi-mono-adapter.ts +78 -6
  30. package/src/scheduler/scheduler.ts +22 -34
  31. package/src/server-user.ts +8 -2
  32. package/src/slack/responses.ts +39 -11
  33. package/src/slack/watcher.ts +121 -8
  34. package/src/tests/agents-list-model-display.test.ts +13 -0
  35. package/src/tests/aws-error-classifier.test.ts +148 -0
  36. package/src/tests/claude-managed-adapter.test.ts +12 -0
  37. package/src/tests/context-window.test.ts +7 -0
  38. package/src/tests/http-api-integration.test.ts +19 -0
  39. package/src/tests/metrics-http.test.ts +137 -3
  40. package/src/tests/migration-046-budgets.test.ts +33 -0
  41. package/src/tests/migration-runner-regressions.test.ts +69 -0
  42. package/src/tests/model-control.test.ts +162 -46
  43. package/src/tests/opencode-adapter.test.ts +9 -0
  44. package/src/tests/pi-mono-adapter.test.ts +319 -0
  45. package/src/tests/providers/pi-cost.test.ts +9 -0
  46. package/src/tests/runner-fallback-output.test.ts +50 -0
  47. package/src/tests/scripts-boot-reembed.test.ts +163 -0
  48. package/src/tests/scripts-embeddings.test.ts +90 -0
  49. package/src/tests/seed.test.ts +26 -1
  50. package/src/tests/session-costs-model-key-normalize.test.ts +2 -0
  51. package/src/tests/slack-watcher.test.ts +66 -0
  52. package/src/tests/workflow-agent-task.test.ts +5 -2
  53. package/src/tests/workflow-validation-port-routing.test.ts +181 -0
  54. package/src/tools/memory-get.ts +11 -0
  55. package/src/tools/memory-search.ts +18 -0
  56. package/src/tools/schedules/create-schedule.ts +71 -70
  57. package/src/tools/schedules/update-schedule.ts +43 -31
  58. package/src/tools/send-task.ts +16 -5
  59. package/src/tools/task-action.ts +11 -3
  60. package/src/types.ts +29 -0
  61. package/src/utils/aws-error-classifier.ts +97 -0
  62. package/src/utils/context-window.ts +2 -0
  63. package/src/utils/credentials.test.ts +68 -0
  64. package/src/utils/credentials.ts +44 -3
  65. package/src/utils/pretty-print.ts +25 -10
  66. package/src/workflows/engine.ts +3 -2
  67. package/src/workflows/executors/agent-task.ts +3 -1
@@ -350,3 +350,322 @@ describe("Cost aggregation from SessionStats", () => {
350
350
  expect(cost.numTurns).toBe(0);
351
351
  });
352
352
  });
353
+
354
+ // ============================================================================
355
+ // AWS SDK error detection — event-driven PiMonoSession + classifyAwsSdkError
356
+ //
357
+ // Redesign (2026-06): detection is driven entirely by structured
358
+ // pi-coding-agent events, NOT stderr scraping or auto_retry_start inference:
359
+ // - `message_end` with an assistant `stopReason:'error'` → the ONLY signal
360
+ // for NON-retryable failures, critically AWS auth (ExpiredToken /
361
+ // CredentialsProviderError), which never enter pi's _isRetryableError loop.
362
+ // - `auto_retry_end` with `success:false` + `finalError` → the definitive
363
+ // terminal failure for the RETRYABLE class (throttle / 5xx / timeout).
364
+ // - recovery (`message_end` success, or `auto_retry_end` success:true) clears
365
+ // the tracked error so a recovered turn never surfaces as a false failure.
366
+ // ============================================================================
367
+
368
+ import type { AgentSession } from "@earendil-works/pi-coding-agent";
369
+ import { PiMonoSession } from "../providers/pi-mono-adapter";
370
+ import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
371
+ import { classifyAwsSdkError } from "../utils/aws-error-classifier";
372
+
373
+ function makeSessionConfig(logFile: string): ProviderSessionConfig {
374
+ return {
375
+ prompt: "test prompt",
376
+ systemPrompt: "",
377
+ model: "amazon-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
378
+ role: "worker",
379
+ agentId: "test-agent-id",
380
+ taskId: "test-task-id",
381
+ apiUrl: "http://localhost:3013",
382
+ apiKey: "test-key",
383
+ cwd: "/tmp",
384
+ logFile,
385
+ iteration: 1,
386
+ };
387
+ }
388
+
389
+ type AgentSessionEvent = Parameters<Parameters<AgentSession["subscribe"]>[0]>[0];
390
+
391
+ /** Build a `message_end` event for an assistant turn that ended in error. */
392
+ function errorMessageEnd(errorMessage: string): AgentSessionEvent {
393
+ return {
394
+ type: "message_end",
395
+ message: {
396
+ role: "assistant",
397
+ content: [],
398
+ stopReason: "error",
399
+ errorMessage,
400
+ },
401
+ } as unknown as AgentSessionEvent;
402
+ }
403
+
404
+ /** Build a `message_end` event for a successful assistant turn. */
405
+ function successMessageEnd(text: string): AgentSessionEvent {
406
+ return {
407
+ type: "message_end",
408
+ message: {
409
+ role: "assistant",
410
+ content: [{ type: "text", text }],
411
+ stopReason: "stop",
412
+ },
413
+ } as unknown as AgentSessionEvent;
414
+ }
415
+
416
+ /** Build an `auto_retry_end` event (terminal retryable failure / recovery). */
417
+ function autoRetryEnd(success: boolean, finalError?: string): AgentSessionEvent {
418
+ return {
419
+ type: "auto_retry_end",
420
+ success,
421
+ attempt: 3,
422
+ ...(finalError ? { finalError } : {}),
423
+ } as unknown as AgentSessionEvent;
424
+ }
425
+
426
+ /**
427
+ * Mock AgentSession that replays a fixed list of structured events to its
428
+ * subscribers when `prompt()` is called, then resolves (no throw). This mirrors
429
+ * the real pi-coding-agent: AWS failures arrive as DATA via events, there is no
430
+ * exception to catch at the agent-swarm layer.
431
+ */
432
+ function makeMockAgentSession(opts: {
433
+ events?: AgentSessionEvent[];
434
+ throwError?: string;
435
+ }): AgentSession {
436
+ const listeners: Array<(event: AgentSessionEvent) => void> = [];
437
+ return {
438
+ sessionId: "mock-session-id",
439
+ isStreaming: false,
440
+ model: undefined,
441
+ subscribe(listener: (event: AgentSessionEvent) => void) {
442
+ listeners.push(listener);
443
+ return () => {
444
+ const idx = listeners.indexOf(listener);
445
+ if (idx >= 0) listeners.splice(idx, 1);
446
+ };
447
+ },
448
+ async prompt() {
449
+ for (const event of opts.events ?? []) {
450
+ for (const l of listeners) l(event);
451
+ }
452
+ if (opts.throwError) throw new Error(opts.throwError);
453
+ },
454
+ getContextUsage: () => null,
455
+ getSessionStats: () => ({
456
+ tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
457
+ cost: 0,
458
+ userMessages: 0,
459
+ assistantMessages: 0,
460
+ }),
461
+ abort: async () => {},
462
+ dispose: () => {},
463
+ } as unknown as AgentSession;
464
+ }
465
+
466
+ const tmpLogDir = `/tmp/pi-mono-aws-test-${Date.now()}`;
467
+
468
+ beforeAll(() => {
469
+ mkdirSync(tmpLogDir, { recursive: true });
470
+ });
471
+
472
+ afterAll(() => {
473
+ rmSync(tmpLogDir, { recursive: true, force: true });
474
+ });
475
+
476
+ async function runWithEvents(events: AgentSessionEvent[]): Promise<{
477
+ events: ProviderEvent[];
478
+ result: ProviderResult;
479
+ }> {
480
+ const logFile = join(tmpLogDir, `evt-${Date.now()}-${Math.random().toString(36).slice(2)}.log`);
481
+ const session = new PiMonoSession(
482
+ makeMockAgentSession({ events }),
483
+ makeSessionConfig(logFile),
484
+ false,
485
+ );
486
+ const emitted: ProviderEvent[] = [];
487
+ session.onEvent((e) => emitted.push(e));
488
+ const result = await session.waitForCompletion();
489
+ return { events: emitted, result };
490
+ }
491
+
492
+ function findError(events: ProviderEvent[]): Extract<ProviderEvent, { type: "error" }> | undefined {
493
+ return events.find((e) => e.type === "error") as
494
+ | Extract<ProviderEvent, { type: "error" }>
495
+ | undefined;
496
+ }
497
+
498
+ describe("PiMonoSession — NON-retryable AWS auth via message_end stopReason:'error'", () => {
499
+ // ORIGINAL-BUG REGRESSION TEST. AWS auth errors (ExpiredToken /
500
+ // CredentialsProviderError) are non-retryable: pi's _isRetryableError regex
501
+ // matches throttle/429/5xx/timeout but NOT auth tokens, so they never enter
502
+ // the retry loop. The ONLY structured signal is a `message_end` assistant
503
+ // turn with stopReason:'error'. This is the Commander's original silent-fail.
504
+ test("ExpiredToken stopReason:'error' → type:error category aws-auth + terminal isError", async () => {
505
+ const { events, result } = await runWithEvents([
506
+ errorMessageEnd(
507
+ "ExpiredTokenException: The security token included in the request is expired",
508
+ ),
509
+ ]);
510
+ const errorEvent = findError(events);
511
+ expect(errorEvent).toBeDefined();
512
+ expect(errorEvent?.category).toBe("aws-auth");
513
+ expect(errorEvent?.message).toContain("aws sso login");
514
+ expect(result.isError).toBe(true);
515
+ expect(result.errorCategory).toBe("aws-auth");
516
+ expect(result.exitCode).toBe(1);
517
+ expect(result.failureReason).toContain("aws sso login");
518
+ });
519
+
520
+ test("CredentialsProviderError stopReason:'error' → aws-auth terminal failure", async () => {
521
+ const { events, result } = await runWithEvents([
522
+ errorMessageEnd("CredentialsProviderError: Could not load credentials from any providers"),
523
+ ]);
524
+ expect(findError(events)?.category).toBe("aws-auth");
525
+ expect(result.errorCategory).toBe("aws-auth");
526
+ expect(result.isError).toBe(true);
527
+ });
528
+
529
+ test("AccessDeniedException stopReason:'error' → aws-access terminal failure", async () => {
530
+ const { events, result } = await runWithEvents([
531
+ errorMessageEnd("AccessDeniedException: not authorized to perform: bedrock:InvokeModel"),
532
+ ]);
533
+ expect(findError(events)?.category).toBe("aws-access");
534
+ expect(result.errorCategory).toBe("aws-access");
535
+ });
536
+
537
+ test("ValidationException stopReason:'error' → aws-model terminal failure", async () => {
538
+ const { events, result } = await runWithEvents([
539
+ errorMessageEnd(
540
+ "ValidationException: Invocation of model ID x with on-demand throughput isn't supported",
541
+ ),
542
+ ]);
543
+ expect(findError(events)?.category).toBe("aws-model");
544
+ expect(result.errorCategory).toBe("aws-model");
545
+ });
546
+
547
+ test("non-AWS stopReason:'error' → still terminal failure, no AWS category", async () => {
548
+ const { events, result } = await runWithEvents([
549
+ errorMessageEnd("Some unrecognized provider failure"),
550
+ ]);
551
+ const errorEvent = findError(events);
552
+ // A terminal stopReason:'error' is a genuine failure by definition — it must
553
+ // surface (no silent green), but it carries no AWS category.
554
+ expect(errorEvent).toBeDefined();
555
+ expect(errorEvent?.category).toBeUndefined();
556
+ expect(errorEvent?.message).toContain("Some unrecognized provider failure");
557
+ expect(result.isError).toBe(true);
558
+ expect(result.exitCode).toBe(1);
559
+ expect(result.errorCategory).toBeUndefined();
560
+ });
561
+ });
562
+
563
+ describe("PiMonoSession — RETRYABLE failure via auto_retry_end success:false", () => {
564
+ test("throttle finalError after exhausted retries → aws-throttle terminal failure", async () => {
565
+ const { events, result } = await runWithEvents([
566
+ // Each retry attempt also produces an errored message_end before retrying;
567
+ // the definitive terminal marker is auto_retry_end success:false.
568
+ errorMessageEnd("ThrottlingException: Rate exceeded"),
569
+ autoRetryEnd(false, "ThrottlingException: Rate exceeded"),
570
+ ]);
571
+ const errorEvent = findError(events);
572
+ expect(errorEvent?.category).toBe("aws-throttle");
573
+ expect(result.errorCategory).toBe("aws-throttle");
574
+ expect(result.isError).toBe(true);
575
+ expect(result.exitCode).toBe(1);
576
+ });
577
+
578
+ test("5xx finalError (non-AWS) → terminal failure surfaced, no AWS category", async () => {
579
+ const { events, result } = await runWithEvents([
580
+ autoRetryEnd(false, "provider returned error: 503 service unavailable"),
581
+ ]);
582
+ expect(findError(events)).toBeDefined();
583
+ expect(result.isError).toBe(true);
584
+ expect(result.errorCategory).toBeUndefined();
585
+ });
586
+ });
587
+
588
+ describe("PiMonoSession — recovery clears the tracked error (no false failure)", () => {
589
+ // The never-cleared-on-recovery false-fail bug the redesign eliminates.
590
+ test("errored turn then successful auto_retry_end → success, output, no error", async () => {
591
+ const { events, result } = await runWithEvents([
592
+ errorMessageEnd("ThrottlingException: Rate exceeded"),
593
+ autoRetryEnd(true),
594
+ successMessageEnd("Recovered answer"),
595
+ ]);
596
+ expect(findError(events)).toBeUndefined();
597
+ expect(result.isError).toBe(false);
598
+ expect(result.exitCode).toBe(0);
599
+ expect(result.output).toBe("Recovered answer");
600
+ });
601
+
602
+ test("errored turn then a later successful message_end → success, no error", async () => {
603
+ const { events, result } = await runWithEvents([
604
+ errorMessageEnd("ExpiredTokenException: token expired"),
605
+ successMessageEnd("Final answer after creds refreshed"),
606
+ ]);
607
+ expect(findError(events)).toBeUndefined();
608
+ expect(result.isError).toBe(false);
609
+ expect(result.output).toBe("Final answer after creds refreshed");
610
+ });
611
+
612
+ test("clean success path emits a result event and no error", async () => {
613
+ const { events, result } = await runWithEvents([successMessageEnd("All done")]);
614
+ expect(findError(events)).toBeUndefined();
615
+ expect(events.some((e) => e.type === "result")).toBe(true);
616
+ expect(result.isError).toBe(false);
617
+ expect(result.output).toBe("All done");
618
+ });
619
+ });
620
+
621
+ describe("PiMonoSession — thrown-exception catch path (defense-in-depth)", () => {
622
+ // AWS failures arrive as events, not throws, but a genuine unexpected throw
623
+ // (MCP/transport) must still fail the task; an AWS signature that reaches the
624
+ // catch is still classified.
625
+ async function runWithThrow(message: string) {
626
+ const logFile = join(
627
+ tmpLogDir,
628
+ `throw-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
629
+ );
630
+ const session = new PiMonoSession(
631
+ makeMockAgentSession({ throwError: message }),
632
+ makeSessionConfig(logFile),
633
+ false,
634
+ );
635
+ const emitted: ProviderEvent[] = [];
636
+ session.onEvent((e) => emitted.push(e));
637
+ const result = await session.waitForCompletion();
638
+ return { events: emitted, result };
639
+ }
640
+
641
+ test("thrown ExpiredToken → aws-auth error event + terminal failure", async () => {
642
+ const { events, result } = await runWithThrow(
643
+ "ExpiredTokenException: The security token is expired",
644
+ );
645
+ expect(findError(events)?.category).toBe("aws-auth");
646
+ expect(result.isError).toBe(true);
647
+ expect(result.errorCategory).toBe("aws-auth");
648
+ });
649
+
650
+ test("thrown non-AWS error → no AWS category, still terminal failure", async () => {
651
+ const { events, result } = await runWithThrow("ECONNREFUSED 127.0.0.1:3013");
652
+ expect(findError(events)).toBeUndefined();
653
+ expect(result.isError).toBe(true);
654
+ expect(result.errorCategory).toBeUndefined();
655
+ });
656
+ });
657
+
658
+ describe("classifyAwsSdkError — all 4 categories (quick summary)", () => {
659
+ test("all four categories are reachable", () => {
660
+ const cases: Array<[string, string]> = [
661
+ ["ExpiredTokenException: token expired", "aws-auth"],
662
+ ["ThrottlingException: rate exceeded", "aws-throttle"],
663
+ ["AccessDeniedException: no permission", "aws-access"],
664
+ ["ValidationException: bad model", "aws-model"],
665
+ ];
666
+ for (const [msg, expected] of cases) {
667
+ const r = classifyAwsSdkError(msg);
668
+ expect(r?.category).toBe(expected);
669
+ }
670
+ });
671
+ });
@@ -107,6 +107,15 @@ describe("PiMonoSession — provider tag on CostData", () => {
107
107
  const session = new PiMonoSession(fake, makeConfig(logFile), false);
108
108
  session.onEvent((e) => events.push(e));
109
109
 
110
+ const sessionInit = events.find((e) => e.type === "session_init");
111
+ expect(sessionInit).toBeDefined();
112
+ if (sessionInit?.type === "session_init") {
113
+ expect(sessionInit.provider).toBe("pi");
114
+ expect(sessionInit.harnessVariant).toBe("stock");
115
+ expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
116
+ expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
117
+ }
118
+
110
119
  const result = await session.waitForCompletion();
111
120
 
112
121
  // The load-bearing assertion. Phase 2's API recompute path keys off
@@ -1,7 +1,11 @@
1
1
  import { afterAll, beforeAll, describe, expect, test } from "bun:test";
2
+ import { mkdir, mkdtemp, rm } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
2
5
  import {
3
6
  type ApiConfig,
4
7
  ensureTaskFinished,
8
+ getBridgeFailureDiagnostics,
5
9
  handleStructuredOutputFallback,
6
10
  } from "../commands/runner";
7
11
 
@@ -381,6 +385,27 @@ describe("ensureTaskFinished", () => {
381
385
  expect(lastFinishBody!.failureReason).toBe("Out of memory");
382
386
  });
383
387
 
388
+ test("appends failure diagnostics when exit code is non-zero", async () => {
389
+ resetMocks();
390
+
391
+ await ensureTaskFinished(
392
+ makeConfig(),
393
+ "worker",
394
+ "task-14b",
395
+ 1,
396
+ "Session error (exit code 1): Unknown error",
397
+ undefined,
398
+ "claude",
399
+ "Claude bridge final tmux pane tail (/tmp/run/tmux-pane-final.txt):\nraw pane tail",
400
+ );
401
+
402
+ expect(lastFinishBody).toBeTruthy();
403
+ expect(lastFinishBody!.status).toBe("failed");
404
+ expect(lastFinishBody!.failureReason).toBe(
405
+ "Session error (exit code 1): Unknown error\n\nClaude bridge final tmux pane tail (/tmp/run/tmux-pane-final.txt):\nraw pane tail",
406
+ );
407
+ });
408
+
384
409
  test("truncates long progress to 2000 chars", async () => {
385
410
  resetMocks();
386
411
  const longProgress = "x".repeat(3000);
@@ -399,3 +424,28 @@ describe("ensureTaskFinished", () => {
399
424
  expect((lastFinishBody!.output as string).length).toBe(2000);
400
425
  });
401
426
  });
427
+
428
+ describe("getBridgeFailureDiagnostics", () => {
429
+ test("returns latest tmux pane artifact and 40-line tail", async () => {
430
+ const cwd = await mkdtemp(join(tmpdir(), "runner-bridge-diagnostics-"));
431
+ try {
432
+ const older = join(cwd, ".claude-bridge/runs/2026-01-01T00-00-00-000Z-old");
433
+ const newer = join(cwd, ".claude-bridge/runs/2026-01-01T00-00-01-000Z-new");
434
+ await mkdir(older, { recursive: true });
435
+ await mkdir(newer, { recursive: true });
436
+ await Bun.write(join(older, "tmux-pane-final.txt"), "old pane");
437
+ await Bun.write(
438
+ join(newer, "tmux-pane-final.txt"),
439
+ Array.from({ length: 45 }, (_, i) => `line ${i + 1}`).join("\n"),
440
+ );
441
+
442
+ const diagnostics = await getBridgeFailureDiagnostics(cwd);
443
+
444
+ expect(diagnostics?.artifactPath).toBe(join(newer, "tmux-pane-final.txt"));
445
+ expect(diagnostics?.paneTail?.startsWith("line 6\nline 7")).toBe(true);
446
+ expect(diagnostics?.paneTail?.endsWith("line 45")).toBe(true);
447
+ } finally {
448
+ await rm(cwd, { recursive: true, force: true });
449
+ }
450
+ });
451
+ });
@@ -0,0 +1,163 @@
1
+ import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
2
+ import { unlink } from "node:fs/promises";
3
+ import { closeDb, getDb, initDb } from "../be/db";
4
+ import type { EmbeddingProvider } from "../be/memory/types";
5
+ import { runBootReembedScripts } from "../be/scripts/boot-reembed";
6
+ import { upsertScriptByName } from "../be/scripts/db";
7
+ import { setScriptEmbeddingProviderForTests } from "../be/scripts/embeddings";
8
+
9
+ const TEST_DB_PATH = "./test-scripts-boot-reembed.sqlite";
10
+
11
+ const signatureJson = JSON.stringify({
12
+ argsType: "{ value: string }",
13
+ resultType: "Promise<{ ok: boolean }>",
14
+ description: "",
15
+ });
16
+
17
+ async function clearDb() {
18
+ for (const suffix of ["", "-wal", "-shm"]) {
19
+ try {
20
+ await unlink(TEST_DB_PATH + suffix);
21
+ } catch {}
22
+ }
23
+ }
24
+
25
+ function source(label: string) {
26
+ return `export default async () => ({ label: ${JSON.stringify(label)} });`;
27
+ }
28
+
29
+ class FakeEmbeddingProvider implements EmbeddingProvider {
30
+ readonly name = "test/fake-boot-reembed";
31
+ readonly dimensions = 5;
32
+ readonly calls: string[] = [];
33
+
34
+ async embed(text: string): Promise<Float32Array | null> {
35
+ this.calls.push(text);
36
+ return new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5]);
37
+ }
38
+
39
+ async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
40
+ return Promise.all(texts.map((text) => this.embed(text)));
41
+ }
42
+
43
+ reset(): void {
44
+ this.calls.length = 0;
45
+ }
46
+ }
47
+
48
+ let provider: FakeEmbeddingProvider;
49
+
50
+ function embeddingCount(scriptId: string): number {
51
+ return (
52
+ getDb()
53
+ .prepare<{ count: number }, [string]>(
54
+ "SELECT COUNT(*) as count FROM script_embeddings WHERE scriptId = ?",
55
+ )
56
+ .get(scriptId)?.count ?? 0
57
+ );
58
+ }
59
+
60
+ function totalEmbeddingCount(): number {
61
+ return (
62
+ getDb().prepare<{ count: number }, []>("SELECT COUNT(*) as count FROM script_embeddings").get()
63
+ ?.count ?? 0
64
+ );
65
+ }
66
+
67
+ beforeAll(async () => {
68
+ await clearDb();
69
+ initDb(TEST_DB_PATH);
70
+ });
71
+
72
+ afterAll(async () => {
73
+ setScriptEmbeddingProviderForTests(null);
74
+ closeDb();
75
+ await clearDb();
76
+ });
77
+
78
+ beforeEach(() => {
79
+ getDb().run("DELETE FROM scripts");
80
+ getDb().run("DELETE FROM script_embeddings");
81
+ provider = new FakeEmbeddingProvider();
82
+ setScriptEmbeddingProviderForTests(provider);
83
+ });
84
+
85
+ describe("boot-reembed-scripts", () => {
86
+ test("backfills scripts that were seeded with embeddingMode: skip", async () => {
87
+ const result = await upsertScriptByName({
88
+ name: "skipped-embed",
89
+ scope: "global",
90
+ source: source("skipped"),
91
+ description: "A script seeded without embedding",
92
+ intent: "Test backfill",
93
+ signatureJson,
94
+ embeddingMode: "skip",
95
+ });
96
+ expect(embeddingCount(result.script.id)).toBe(0);
97
+
98
+ provider.reset();
99
+ await runBootReembedScripts();
100
+ expect(embeddingCount(result.script.id)).toBe(1);
101
+ expect(provider.calls).toHaveLength(1);
102
+ });
103
+
104
+ test("no-ops when all scripts already have embeddings", async () => {
105
+ await upsertScriptByName({
106
+ name: "already-embedded",
107
+ scope: "global",
108
+ source: source("embedded"),
109
+ description: "Already has embedding",
110
+ intent: "No-op test",
111
+ signatureJson,
112
+ });
113
+ expect(totalEmbeddingCount()).toBe(1);
114
+
115
+ provider.reset();
116
+ await runBootReembedScripts();
117
+ expect(provider.calls).toHaveLength(0);
118
+ });
119
+
120
+ test("skips scratch scripts during backfill", async () => {
121
+ await upsertScriptByName({
122
+ name: "scratch-no-backfill",
123
+ scope: "agent",
124
+ scopeId: "agent-1",
125
+ source: source("scratch"),
126
+ description: "Scratch script",
127
+ intent: "Should not be backfilled",
128
+ signatureJson,
129
+ isScratch: true,
130
+ });
131
+
132
+ provider.reset();
133
+ await runBootReembedScripts();
134
+ expect(provider.calls).toHaveLength(0);
135
+ });
136
+
137
+ test("backfills only scripts missing embeddings, not those that already have them", async () => {
138
+ const withEmbed = await upsertScriptByName({
139
+ name: "has-embed",
140
+ scope: "global",
141
+ source: source("has"),
142
+ description: "Has embedding",
143
+ intent: "Already embedded",
144
+ signatureJson,
145
+ });
146
+ const withoutEmbed = await upsertScriptByName({
147
+ name: "missing-embed",
148
+ scope: "global",
149
+ source: source("missing"),
150
+ description: "Missing embedding",
151
+ intent: "Needs backfill",
152
+ signatureJson,
153
+ embeddingMode: "skip",
154
+ });
155
+ expect(embeddingCount(withEmbed.script.id)).toBe(1);
156
+ expect(embeddingCount(withoutEmbed.script.id)).toBe(0);
157
+
158
+ provider.reset();
159
+ await runBootReembedScripts();
160
+ expect(provider.calls).toHaveLength(1);
161
+ expect(embeddingCount(withoutEmbed.script.id)).toBe(1);
162
+ });
163
+ });
@@ -268,6 +268,96 @@ describe("script embeddings", () => {
268
268
  expect(topOneHits).toBeGreaterThanOrEqual(4);
269
269
  });
270
270
 
271
+ test("embeddingMode: skip prevents embedding on new script", async () => {
272
+ provider.reset();
273
+ const result = await upsertScriptByName({
274
+ name: "skip-new",
275
+ scope: "agent",
276
+ scopeId: "agent-1",
277
+ source: source("skip-new"),
278
+ description: "Should not embed",
279
+ intent: "Skip mode test",
280
+ signatureJson,
281
+ agentId: "agent-1",
282
+ embeddingMode: "skip",
283
+ });
284
+ expect(result.isNew).toBe(true);
285
+ expect(embeddingCount(result.script.id)).toBe(0);
286
+ expect(provider.calls).toHaveLength(0);
287
+ });
288
+
289
+ test("embeddingMode: skip prevents embedding on source change", async () => {
290
+ const first = await upsertScriptByName({
291
+ name: "skip-update",
292
+ scope: "agent",
293
+ scopeId: "agent-1",
294
+ source: source("v1"),
295
+ description: "Will update",
296
+ intent: "Skip mode update test",
297
+ signatureJson,
298
+ agentId: "agent-1",
299
+ });
300
+ expect(embeddingCount(first.script.id)).toBe(1);
301
+
302
+ provider.reset();
303
+ const second = await upsertScriptByName({
304
+ name: "skip-update",
305
+ scope: "agent",
306
+ scopeId: "agent-1",
307
+ source: source("v2"),
308
+ description: "Updated source",
309
+ intent: "Skip mode update test",
310
+ signatureJson,
311
+ agentId: "agent-1",
312
+ embeddingMode: "skip",
313
+ });
314
+ expect(second.contentDeduped).toBe(false);
315
+ expect(provider.calls).toHaveLength(0);
316
+ });
317
+
318
+ test("embeddingMode: skip prevents embedding on metadata change", async () => {
319
+ await upsertScriptByName({
320
+ name: "skip-meta",
321
+ scope: "agent",
322
+ scopeId: "agent-1",
323
+ source: source("skip-meta"),
324
+ description: "Original description",
325
+ intent: "Original intent",
326
+ signatureJson,
327
+ agentId: "agent-1",
328
+ });
329
+
330
+ provider.reset();
331
+ await upsertScriptByName({
332
+ name: "skip-meta",
333
+ scope: "agent",
334
+ scopeId: "agent-1",
335
+ source: source("skip-meta"),
336
+ description: "Changed description",
337
+ intent: "Changed intent",
338
+ signatureJson,
339
+ agentId: "agent-1",
340
+ embeddingMode: "skip",
341
+ });
342
+ expect(provider.calls).toHaveLength(0);
343
+ });
344
+
345
+ test("embeddingMode defaults to sync (embeds normally)", async () => {
346
+ provider.reset();
347
+ const result = await upsertScriptByName({
348
+ name: "default-sync",
349
+ scope: "agent",
350
+ scopeId: "agent-1",
351
+ source: source("default-sync"),
352
+ description: "Should embed by default",
353
+ intent: "Default mode test",
354
+ signatureJson,
355
+ agentId: "agent-1",
356
+ });
357
+ expect(embeddingCount(result.script.id)).toBe(1);
358
+ expect(provider.calls).toHaveLength(1);
359
+ });
360
+
271
361
  test("reembedAllScripts updates every explicit script", async () => {
272
362
  await upsertFixture({ name: "linear-one", description: "Linear issue parser" });
273
363
  await upsertFixture({ name: "slack-one", description: "Slack message digest" });