@desplega.ai/agent-swarm 1.93.0 → 1.94.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/openapi.json +180 -1
- package/package.json +1 -1
- package/src/be/db.ts +63 -7
- package/src/be/migrations/090_model_tiers.sql +2 -0
- package/src/be/migrations/091_seed_swarm_operations_metrics.sql +12 -0
- package/src/be/migrations/092_metrics_dashboard_combobox_filters.sql +68 -0
- package/src/be/migrations/093_slack_message_tracking.sql +6 -0
- package/src/be/migrations/runner.ts +52 -0
- package/src/be/modelsdev-cache.json +2060 -198
- package/src/be/scripts/boot-reembed.ts +74 -0
- package/src/be/scripts/db.ts +19 -3
- package/src/be/seed/index.ts +1 -1
- package/src/be/seed/registry.ts +2 -2
- package/src/be/seed/runner.ts +5 -5
- package/src/be/seed/types.ts +6 -1
- package/src/be/seed-pricing.ts +1 -0
- package/src/be/seed-scripts/index.ts +3 -2
- package/src/commands/runner.ts +83 -13
- package/src/http/index.ts +13 -2
- package/src/http/metrics.ts +55 -6
- package/src/http/schedules.ts +16 -15
- package/src/http/script-runs.ts +7 -1
- package/src/http/scripts.ts +147 -1
- package/src/http/tasks.ts +7 -0
- package/src/model-tiers.ts +140 -0
- package/src/providers/claude-managed-models.ts +9 -0
- package/src/providers/opencode-adapter.ts +1 -0
- package/src/providers/pi-mono-adapter.ts +78 -6
- package/src/scheduler/scheduler.ts +22 -34
- package/src/server-user.ts +8 -2
- package/src/slack/responses.ts +39 -11
- package/src/slack/watcher.ts +121 -8
- package/src/tests/agents-list-model-display.test.ts +13 -0
- package/src/tests/aws-error-classifier.test.ts +148 -0
- package/src/tests/claude-managed-adapter.test.ts +12 -0
- package/src/tests/context-window.test.ts +7 -0
- package/src/tests/http-api-integration.test.ts +19 -0
- package/src/tests/metrics-http.test.ts +137 -3
- package/src/tests/migration-046-budgets.test.ts +33 -0
- package/src/tests/migration-runner-regressions.test.ts +69 -0
- package/src/tests/model-control.test.ts +162 -46
- package/src/tests/opencode-adapter.test.ts +9 -0
- package/src/tests/pi-mono-adapter.test.ts +319 -0
- package/src/tests/providers/pi-cost.test.ts +9 -0
- package/src/tests/runner-fallback-output.test.ts +50 -0
- package/src/tests/scripts-boot-reembed.test.ts +163 -0
- package/src/tests/scripts-embeddings.test.ts +90 -0
- package/src/tests/seed.test.ts +26 -1
- package/src/tests/session-costs-model-key-normalize.test.ts +2 -0
- package/src/tests/slack-watcher.test.ts +66 -0
- package/src/tests/workflow-agent-task.test.ts +5 -2
- package/src/tests/workflow-validation-port-routing.test.ts +181 -0
- package/src/tools/memory-get.ts +11 -0
- package/src/tools/memory-search.ts +18 -0
- package/src/tools/schedules/create-schedule.ts +71 -70
- package/src/tools/schedules/update-schedule.ts +43 -31
- package/src/tools/send-task.ts +16 -5
- package/src/tools/task-action.ts +11 -3
- package/src/types.ts +29 -0
- package/src/utils/aws-error-classifier.ts +97 -0
- package/src/utils/context-window.ts +2 -0
- package/src/utils/credentials.test.ts +68 -0
- package/src/utils/credentials.ts +44 -3
- package/src/utils/pretty-print.ts +25 -10
- package/src/workflows/engine.ts +3 -2
- package/src/workflows/executors/agent-task.ts +3 -1
|
@@ -350,3 +350,322 @@ describe("Cost aggregation from SessionStats", () => {
|
|
|
350
350
|
expect(cost.numTurns).toBe(0);
|
|
351
351
|
});
|
|
352
352
|
});
|
|
353
|
+
|
|
354
|
+
// ============================================================================
|
|
355
|
+
// AWS SDK error detection — event-driven PiMonoSession + classifyAwsSdkError
|
|
356
|
+
//
|
|
357
|
+
// Redesign (2026-06): detection is driven entirely by structured
|
|
358
|
+
// pi-coding-agent events, NOT stderr scraping or auto_retry_start inference:
|
|
359
|
+
// - `message_end` with an assistant `stopReason:'error'` → the ONLY signal
|
|
360
|
+
// for NON-retryable failures, critically AWS auth (ExpiredToken /
|
|
361
|
+
// CredentialsProviderError), which never enter pi's _isRetryableError loop.
|
|
362
|
+
// - `auto_retry_end` with `success:false` + `finalError` → the definitive
|
|
363
|
+
// terminal failure for the RETRYABLE class (throttle / 5xx / timeout).
|
|
364
|
+
// - recovery (`message_end` success, or `auto_retry_end` success:true) clears
|
|
365
|
+
// the tracked error so a recovered turn never surfaces as a false failure.
|
|
366
|
+
// ============================================================================
|
|
367
|
+
|
|
368
|
+
import type { AgentSession } from "@earendil-works/pi-coding-agent";
|
|
369
|
+
import { PiMonoSession } from "../providers/pi-mono-adapter";
|
|
370
|
+
import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
|
|
371
|
+
import { classifyAwsSdkError } from "../utils/aws-error-classifier";
|
|
372
|
+
|
|
373
|
+
function makeSessionConfig(logFile: string): ProviderSessionConfig {
|
|
374
|
+
return {
|
|
375
|
+
prompt: "test prompt",
|
|
376
|
+
systemPrompt: "",
|
|
377
|
+
model: "amazon-bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
|
378
|
+
role: "worker",
|
|
379
|
+
agentId: "test-agent-id",
|
|
380
|
+
taskId: "test-task-id",
|
|
381
|
+
apiUrl: "http://localhost:3013",
|
|
382
|
+
apiKey: "test-key",
|
|
383
|
+
cwd: "/tmp",
|
|
384
|
+
logFile,
|
|
385
|
+
iteration: 1,
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
type AgentSessionEvent = Parameters<Parameters<AgentSession["subscribe"]>[0]>[0];
|
|
390
|
+
|
|
391
|
+
/** Build a `message_end` event for an assistant turn that ended in error. */
|
|
392
|
+
function errorMessageEnd(errorMessage: string): AgentSessionEvent {
|
|
393
|
+
return {
|
|
394
|
+
type: "message_end",
|
|
395
|
+
message: {
|
|
396
|
+
role: "assistant",
|
|
397
|
+
content: [],
|
|
398
|
+
stopReason: "error",
|
|
399
|
+
errorMessage,
|
|
400
|
+
},
|
|
401
|
+
} as unknown as AgentSessionEvent;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/** Build a `message_end` event for a successful assistant turn. */
|
|
405
|
+
function successMessageEnd(text: string): AgentSessionEvent {
|
|
406
|
+
return {
|
|
407
|
+
type: "message_end",
|
|
408
|
+
message: {
|
|
409
|
+
role: "assistant",
|
|
410
|
+
content: [{ type: "text", text }],
|
|
411
|
+
stopReason: "stop",
|
|
412
|
+
},
|
|
413
|
+
} as unknown as AgentSessionEvent;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/** Build an `auto_retry_end` event (terminal retryable failure / recovery). */
|
|
417
|
+
function autoRetryEnd(success: boolean, finalError?: string): AgentSessionEvent {
|
|
418
|
+
return {
|
|
419
|
+
type: "auto_retry_end",
|
|
420
|
+
success,
|
|
421
|
+
attempt: 3,
|
|
422
|
+
...(finalError ? { finalError } : {}),
|
|
423
|
+
} as unknown as AgentSessionEvent;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Mock AgentSession that replays a fixed list of structured events to its
|
|
428
|
+
* subscribers when `prompt()` is called, then resolves (no throw). This mirrors
|
|
429
|
+
* the real pi-coding-agent: AWS failures arrive as DATA via events, there is no
|
|
430
|
+
* exception to catch at the agent-swarm layer.
|
|
431
|
+
*/
|
|
432
|
+
function makeMockAgentSession(opts: {
|
|
433
|
+
events?: AgentSessionEvent[];
|
|
434
|
+
throwError?: string;
|
|
435
|
+
}): AgentSession {
|
|
436
|
+
const listeners: Array<(event: AgentSessionEvent) => void> = [];
|
|
437
|
+
return {
|
|
438
|
+
sessionId: "mock-session-id",
|
|
439
|
+
isStreaming: false,
|
|
440
|
+
model: undefined,
|
|
441
|
+
subscribe(listener: (event: AgentSessionEvent) => void) {
|
|
442
|
+
listeners.push(listener);
|
|
443
|
+
return () => {
|
|
444
|
+
const idx = listeners.indexOf(listener);
|
|
445
|
+
if (idx >= 0) listeners.splice(idx, 1);
|
|
446
|
+
};
|
|
447
|
+
},
|
|
448
|
+
async prompt() {
|
|
449
|
+
for (const event of opts.events ?? []) {
|
|
450
|
+
for (const l of listeners) l(event);
|
|
451
|
+
}
|
|
452
|
+
if (opts.throwError) throw new Error(opts.throwError);
|
|
453
|
+
},
|
|
454
|
+
getContextUsage: () => null,
|
|
455
|
+
getSessionStats: () => ({
|
|
456
|
+
tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
457
|
+
cost: 0,
|
|
458
|
+
userMessages: 0,
|
|
459
|
+
assistantMessages: 0,
|
|
460
|
+
}),
|
|
461
|
+
abort: async () => {},
|
|
462
|
+
dispose: () => {},
|
|
463
|
+
} as unknown as AgentSession;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const tmpLogDir = `/tmp/pi-mono-aws-test-${Date.now()}`;
|
|
467
|
+
|
|
468
|
+
beforeAll(() => {
|
|
469
|
+
mkdirSync(tmpLogDir, { recursive: true });
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
afterAll(() => {
|
|
473
|
+
rmSync(tmpLogDir, { recursive: true, force: true });
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
async function runWithEvents(events: AgentSessionEvent[]): Promise<{
|
|
477
|
+
events: ProviderEvent[];
|
|
478
|
+
result: ProviderResult;
|
|
479
|
+
}> {
|
|
480
|
+
const logFile = join(tmpLogDir, `evt-${Date.now()}-${Math.random().toString(36).slice(2)}.log`);
|
|
481
|
+
const session = new PiMonoSession(
|
|
482
|
+
makeMockAgentSession({ events }),
|
|
483
|
+
makeSessionConfig(logFile),
|
|
484
|
+
false,
|
|
485
|
+
);
|
|
486
|
+
const emitted: ProviderEvent[] = [];
|
|
487
|
+
session.onEvent((e) => emitted.push(e));
|
|
488
|
+
const result = await session.waitForCompletion();
|
|
489
|
+
return { events: emitted, result };
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
function findError(events: ProviderEvent[]): Extract<ProviderEvent, { type: "error" }> | undefined {
|
|
493
|
+
return events.find((e) => e.type === "error") as
|
|
494
|
+
| Extract<ProviderEvent, { type: "error" }>
|
|
495
|
+
| undefined;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
describe("PiMonoSession — NON-retryable AWS auth via message_end stopReason:'error'", () => {
|
|
499
|
+
// ORIGINAL-BUG REGRESSION TEST. AWS auth errors (ExpiredToken /
|
|
500
|
+
// CredentialsProviderError) are non-retryable: pi's _isRetryableError regex
|
|
501
|
+
// matches throttle/429/5xx/timeout but NOT auth tokens, so they never enter
|
|
502
|
+
// the retry loop. The ONLY structured signal is a `message_end` assistant
|
|
503
|
+
// turn with stopReason:'error'. This is the Commander's original silent-fail.
|
|
504
|
+
test("ExpiredToken stopReason:'error' → type:error category aws-auth + terminal isError", async () => {
|
|
505
|
+
const { events, result } = await runWithEvents([
|
|
506
|
+
errorMessageEnd(
|
|
507
|
+
"ExpiredTokenException: The security token included in the request is expired",
|
|
508
|
+
),
|
|
509
|
+
]);
|
|
510
|
+
const errorEvent = findError(events);
|
|
511
|
+
expect(errorEvent).toBeDefined();
|
|
512
|
+
expect(errorEvent?.category).toBe("aws-auth");
|
|
513
|
+
expect(errorEvent?.message).toContain("aws sso login");
|
|
514
|
+
expect(result.isError).toBe(true);
|
|
515
|
+
expect(result.errorCategory).toBe("aws-auth");
|
|
516
|
+
expect(result.exitCode).toBe(1);
|
|
517
|
+
expect(result.failureReason).toContain("aws sso login");
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
test("CredentialsProviderError stopReason:'error' → aws-auth terminal failure", async () => {
|
|
521
|
+
const { events, result } = await runWithEvents([
|
|
522
|
+
errorMessageEnd("CredentialsProviderError: Could not load credentials from any providers"),
|
|
523
|
+
]);
|
|
524
|
+
expect(findError(events)?.category).toBe("aws-auth");
|
|
525
|
+
expect(result.errorCategory).toBe("aws-auth");
|
|
526
|
+
expect(result.isError).toBe(true);
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
test("AccessDeniedException stopReason:'error' → aws-access terminal failure", async () => {
|
|
530
|
+
const { events, result } = await runWithEvents([
|
|
531
|
+
errorMessageEnd("AccessDeniedException: not authorized to perform: bedrock:InvokeModel"),
|
|
532
|
+
]);
|
|
533
|
+
expect(findError(events)?.category).toBe("aws-access");
|
|
534
|
+
expect(result.errorCategory).toBe("aws-access");
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
test("ValidationException stopReason:'error' → aws-model terminal failure", async () => {
|
|
538
|
+
const { events, result } = await runWithEvents([
|
|
539
|
+
errorMessageEnd(
|
|
540
|
+
"ValidationException: Invocation of model ID x with on-demand throughput isn't supported",
|
|
541
|
+
),
|
|
542
|
+
]);
|
|
543
|
+
expect(findError(events)?.category).toBe("aws-model");
|
|
544
|
+
expect(result.errorCategory).toBe("aws-model");
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
test("non-AWS stopReason:'error' → still terminal failure, no AWS category", async () => {
|
|
548
|
+
const { events, result } = await runWithEvents([
|
|
549
|
+
errorMessageEnd("Some unrecognized provider failure"),
|
|
550
|
+
]);
|
|
551
|
+
const errorEvent = findError(events);
|
|
552
|
+
// A terminal stopReason:'error' is a genuine failure by definition — it must
|
|
553
|
+
// surface (no silent green), but it carries no AWS category.
|
|
554
|
+
expect(errorEvent).toBeDefined();
|
|
555
|
+
expect(errorEvent?.category).toBeUndefined();
|
|
556
|
+
expect(errorEvent?.message).toContain("Some unrecognized provider failure");
|
|
557
|
+
expect(result.isError).toBe(true);
|
|
558
|
+
expect(result.exitCode).toBe(1);
|
|
559
|
+
expect(result.errorCategory).toBeUndefined();
|
|
560
|
+
});
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
describe("PiMonoSession — RETRYABLE failure via auto_retry_end success:false", () => {
|
|
564
|
+
test("throttle finalError after exhausted retries → aws-throttle terminal failure", async () => {
|
|
565
|
+
const { events, result } = await runWithEvents([
|
|
566
|
+
// Each retry attempt also produces an errored message_end before retrying;
|
|
567
|
+
// the definitive terminal marker is auto_retry_end success:false.
|
|
568
|
+
errorMessageEnd("ThrottlingException: Rate exceeded"),
|
|
569
|
+
autoRetryEnd(false, "ThrottlingException: Rate exceeded"),
|
|
570
|
+
]);
|
|
571
|
+
const errorEvent = findError(events);
|
|
572
|
+
expect(errorEvent?.category).toBe("aws-throttle");
|
|
573
|
+
expect(result.errorCategory).toBe("aws-throttle");
|
|
574
|
+
expect(result.isError).toBe(true);
|
|
575
|
+
expect(result.exitCode).toBe(1);
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
test("5xx finalError (non-AWS) → terminal failure surfaced, no AWS category", async () => {
|
|
579
|
+
const { events, result } = await runWithEvents([
|
|
580
|
+
autoRetryEnd(false, "provider returned error: 503 service unavailable"),
|
|
581
|
+
]);
|
|
582
|
+
expect(findError(events)).toBeDefined();
|
|
583
|
+
expect(result.isError).toBe(true);
|
|
584
|
+
expect(result.errorCategory).toBeUndefined();
|
|
585
|
+
});
|
|
586
|
+
});
|
|
587
|
+
|
|
588
|
+
describe("PiMonoSession — recovery clears the tracked error (no false failure)", () => {
|
|
589
|
+
// The never-cleared-on-recovery false-fail bug the redesign eliminates.
|
|
590
|
+
test("errored turn then successful auto_retry_end → success, output, no error", async () => {
|
|
591
|
+
const { events, result } = await runWithEvents([
|
|
592
|
+
errorMessageEnd("ThrottlingException: Rate exceeded"),
|
|
593
|
+
autoRetryEnd(true),
|
|
594
|
+
successMessageEnd("Recovered answer"),
|
|
595
|
+
]);
|
|
596
|
+
expect(findError(events)).toBeUndefined();
|
|
597
|
+
expect(result.isError).toBe(false);
|
|
598
|
+
expect(result.exitCode).toBe(0);
|
|
599
|
+
expect(result.output).toBe("Recovered answer");
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
test("errored turn then a later successful message_end → success, no error", async () => {
|
|
603
|
+
const { events, result } = await runWithEvents([
|
|
604
|
+
errorMessageEnd("ExpiredTokenException: token expired"),
|
|
605
|
+
successMessageEnd("Final answer after creds refreshed"),
|
|
606
|
+
]);
|
|
607
|
+
expect(findError(events)).toBeUndefined();
|
|
608
|
+
expect(result.isError).toBe(false);
|
|
609
|
+
expect(result.output).toBe("Final answer after creds refreshed");
|
|
610
|
+
});
|
|
611
|
+
|
|
612
|
+
test("clean success path emits a result event and no error", async () => {
|
|
613
|
+
const { events, result } = await runWithEvents([successMessageEnd("All done")]);
|
|
614
|
+
expect(findError(events)).toBeUndefined();
|
|
615
|
+
expect(events.some((e) => e.type === "result")).toBe(true);
|
|
616
|
+
expect(result.isError).toBe(false);
|
|
617
|
+
expect(result.output).toBe("All done");
|
|
618
|
+
});
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
describe("PiMonoSession — thrown-exception catch path (defense-in-depth)", () => {
|
|
622
|
+
// AWS failures arrive as events, not throws, but a genuine unexpected throw
|
|
623
|
+
// (MCP/transport) must still fail the task; an AWS signature that reaches the
|
|
624
|
+
// catch is still classified.
|
|
625
|
+
async function runWithThrow(message: string) {
|
|
626
|
+
const logFile = join(
|
|
627
|
+
tmpLogDir,
|
|
628
|
+
`throw-${Date.now()}-${Math.random().toString(36).slice(2)}.log`,
|
|
629
|
+
);
|
|
630
|
+
const session = new PiMonoSession(
|
|
631
|
+
makeMockAgentSession({ throwError: message }),
|
|
632
|
+
makeSessionConfig(logFile),
|
|
633
|
+
false,
|
|
634
|
+
);
|
|
635
|
+
const emitted: ProviderEvent[] = [];
|
|
636
|
+
session.onEvent((e) => emitted.push(e));
|
|
637
|
+
const result = await session.waitForCompletion();
|
|
638
|
+
return { events: emitted, result };
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
test("thrown ExpiredToken → aws-auth error event + terminal failure", async () => {
|
|
642
|
+
const { events, result } = await runWithThrow(
|
|
643
|
+
"ExpiredTokenException: The security token is expired",
|
|
644
|
+
);
|
|
645
|
+
expect(findError(events)?.category).toBe("aws-auth");
|
|
646
|
+
expect(result.isError).toBe(true);
|
|
647
|
+
expect(result.errorCategory).toBe("aws-auth");
|
|
648
|
+
});
|
|
649
|
+
|
|
650
|
+
test("thrown non-AWS error → no AWS category, still terminal failure", async () => {
|
|
651
|
+
const { events, result } = await runWithThrow("ECONNREFUSED 127.0.0.1:3013");
|
|
652
|
+
expect(findError(events)).toBeUndefined();
|
|
653
|
+
expect(result.isError).toBe(true);
|
|
654
|
+
expect(result.errorCategory).toBeUndefined();
|
|
655
|
+
});
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
describe("classifyAwsSdkError — all 4 categories (quick summary)", () => {
|
|
659
|
+
test("all four categories are reachable", () => {
|
|
660
|
+
const cases: Array<[string, string]> = [
|
|
661
|
+
["ExpiredTokenException: token expired", "aws-auth"],
|
|
662
|
+
["ThrottlingException: rate exceeded", "aws-throttle"],
|
|
663
|
+
["AccessDeniedException: no permission", "aws-access"],
|
|
664
|
+
["ValidationException: bad model", "aws-model"],
|
|
665
|
+
];
|
|
666
|
+
for (const [msg, expected] of cases) {
|
|
667
|
+
const r = classifyAwsSdkError(msg);
|
|
668
|
+
expect(r?.category).toBe(expected);
|
|
669
|
+
}
|
|
670
|
+
});
|
|
671
|
+
});
|
|
@@ -107,6 +107,15 @@ describe("PiMonoSession — provider tag on CostData", () => {
|
|
|
107
107
|
const session = new PiMonoSession(fake, makeConfig(logFile), false);
|
|
108
108
|
session.onEvent((e) => events.push(e));
|
|
109
109
|
|
|
110
|
+
const sessionInit = events.find((e) => e.type === "session_init");
|
|
111
|
+
expect(sessionInit).toBeDefined();
|
|
112
|
+
if (sessionInit?.type === "session_init") {
|
|
113
|
+
expect(sessionInit.provider).toBe("pi");
|
|
114
|
+
expect(sessionInit.harnessVariant).toBe("stock");
|
|
115
|
+
expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
|
|
116
|
+
expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
|
|
117
|
+
}
|
|
118
|
+
|
|
110
119
|
const result = await session.waitForCompletion();
|
|
111
120
|
|
|
112
121
|
// The load-bearing assertion. Phase 2's API recompute path keys off
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
2
|
+
import { mkdir, mkdtemp, rm } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
2
5
|
import {
|
|
3
6
|
type ApiConfig,
|
|
4
7
|
ensureTaskFinished,
|
|
8
|
+
getBridgeFailureDiagnostics,
|
|
5
9
|
handleStructuredOutputFallback,
|
|
6
10
|
} from "../commands/runner";
|
|
7
11
|
|
|
@@ -381,6 +385,27 @@ describe("ensureTaskFinished", () => {
|
|
|
381
385
|
expect(lastFinishBody!.failureReason).toBe("Out of memory");
|
|
382
386
|
});
|
|
383
387
|
|
|
388
|
+
test("appends failure diagnostics when exit code is non-zero", async () => {
|
|
389
|
+
resetMocks();
|
|
390
|
+
|
|
391
|
+
await ensureTaskFinished(
|
|
392
|
+
makeConfig(),
|
|
393
|
+
"worker",
|
|
394
|
+
"task-14b",
|
|
395
|
+
1,
|
|
396
|
+
"Session error (exit code 1): Unknown error",
|
|
397
|
+
undefined,
|
|
398
|
+
"claude",
|
|
399
|
+
"Claude bridge final tmux pane tail (/tmp/run/tmux-pane-final.txt):\nraw pane tail",
|
|
400
|
+
);
|
|
401
|
+
|
|
402
|
+
expect(lastFinishBody).toBeTruthy();
|
|
403
|
+
expect(lastFinishBody!.status).toBe("failed");
|
|
404
|
+
expect(lastFinishBody!.failureReason).toBe(
|
|
405
|
+
"Session error (exit code 1): Unknown error\n\nClaude bridge final tmux pane tail (/tmp/run/tmux-pane-final.txt):\nraw pane tail",
|
|
406
|
+
);
|
|
407
|
+
});
|
|
408
|
+
|
|
384
409
|
test("truncates long progress to 2000 chars", async () => {
|
|
385
410
|
resetMocks();
|
|
386
411
|
const longProgress = "x".repeat(3000);
|
|
@@ -399,3 +424,28 @@ describe("ensureTaskFinished", () => {
|
|
|
399
424
|
expect((lastFinishBody!.output as string).length).toBe(2000);
|
|
400
425
|
});
|
|
401
426
|
});
|
|
427
|
+
|
|
428
|
+
describe("getBridgeFailureDiagnostics", () => {
|
|
429
|
+
test("returns latest tmux pane artifact and 40-line tail", async () => {
|
|
430
|
+
const cwd = await mkdtemp(join(tmpdir(), "runner-bridge-diagnostics-"));
|
|
431
|
+
try {
|
|
432
|
+
const older = join(cwd, ".claude-bridge/runs/2026-01-01T00-00-00-000Z-old");
|
|
433
|
+
const newer = join(cwd, ".claude-bridge/runs/2026-01-01T00-00-01-000Z-new");
|
|
434
|
+
await mkdir(older, { recursive: true });
|
|
435
|
+
await mkdir(newer, { recursive: true });
|
|
436
|
+
await Bun.write(join(older, "tmux-pane-final.txt"), "old pane");
|
|
437
|
+
await Bun.write(
|
|
438
|
+
join(newer, "tmux-pane-final.txt"),
|
|
439
|
+
Array.from({ length: 45 }, (_, i) => `line ${i + 1}`).join("\n"),
|
|
440
|
+
);
|
|
441
|
+
|
|
442
|
+
const diagnostics = await getBridgeFailureDiagnostics(cwd);
|
|
443
|
+
|
|
444
|
+
expect(diagnostics?.artifactPath).toBe(join(newer, "tmux-pane-final.txt"));
|
|
445
|
+
expect(diagnostics?.paneTail?.startsWith("line 6\nline 7")).toBe(true);
|
|
446
|
+
expect(diagnostics?.paneTail?.endsWith("line 45")).toBe(true);
|
|
447
|
+
} finally {
|
|
448
|
+
await rm(cwd, { recursive: true, force: true });
|
|
449
|
+
}
|
|
450
|
+
});
|
|
451
|
+
});
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { unlink } from "node:fs/promises";
|
|
3
|
+
import { closeDb, getDb, initDb } from "../be/db";
|
|
4
|
+
import type { EmbeddingProvider } from "../be/memory/types";
|
|
5
|
+
import { runBootReembedScripts } from "../be/scripts/boot-reembed";
|
|
6
|
+
import { upsertScriptByName } from "../be/scripts/db";
|
|
7
|
+
import { setScriptEmbeddingProviderForTests } from "../be/scripts/embeddings";
|
|
8
|
+
|
|
9
|
+
const TEST_DB_PATH = "./test-scripts-boot-reembed.sqlite";
|
|
10
|
+
|
|
11
|
+
const signatureJson = JSON.stringify({
|
|
12
|
+
argsType: "{ value: string }",
|
|
13
|
+
resultType: "Promise<{ ok: boolean }>",
|
|
14
|
+
description: "",
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
async function clearDb() {
|
|
18
|
+
for (const suffix of ["", "-wal", "-shm"]) {
|
|
19
|
+
try {
|
|
20
|
+
await unlink(TEST_DB_PATH + suffix);
|
|
21
|
+
} catch {}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function source(label: string) {
|
|
26
|
+
return `export default async () => ({ label: ${JSON.stringify(label)} });`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
class FakeEmbeddingProvider implements EmbeddingProvider {
|
|
30
|
+
readonly name = "test/fake-boot-reembed";
|
|
31
|
+
readonly dimensions = 5;
|
|
32
|
+
readonly calls: string[] = [];
|
|
33
|
+
|
|
34
|
+
async embed(text: string): Promise<Float32Array | null> {
|
|
35
|
+
this.calls.push(text);
|
|
36
|
+
return new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5]);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async embedBatch(texts: string[]): Promise<(Float32Array | null)[]> {
|
|
40
|
+
return Promise.all(texts.map((text) => this.embed(text)));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
reset(): void {
|
|
44
|
+
this.calls.length = 0;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let provider: FakeEmbeddingProvider;
|
|
49
|
+
|
|
50
|
+
function embeddingCount(scriptId: string): number {
|
|
51
|
+
return (
|
|
52
|
+
getDb()
|
|
53
|
+
.prepare<{ count: number }, [string]>(
|
|
54
|
+
"SELECT COUNT(*) as count FROM script_embeddings WHERE scriptId = ?",
|
|
55
|
+
)
|
|
56
|
+
.get(scriptId)?.count ?? 0
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function totalEmbeddingCount(): number {
|
|
61
|
+
return (
|
|
62
|
+
getDb().prepare<{ count: number }, []>("SELECT COUNT(*) as count FROM script_embeddings").get()
|
|
63
|
+
?.count ?? 0
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
beforeAll(async () => {
|
|
68
|
+
await clearDb();
|
|
69
|
+
initDb(TEST_DB_PATH);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
afterAll(async () => {
|
|
73
|
+
setScriptEmbeddingProviderForTests(null);
|
|
74
|
+
closeDb();
|
|
75
|
+
await clearDb();
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
beforeEach(() => {
|
|
79
|
+
getDb().run("DELETE FROM scripts");
|
|
80
|
+
getDb().run("DELETE FROM script_embeddings");
|
|
81
|
+
provider = new FakeEmbeddingProvider();
|
|
82
|
+
setScriptEmbeddingProviderForTests(provider);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("boot-reembed-scripts", () => {
|
|
86
|
+
test("backfills scripts that were seeded with embeddingMode: skip", async () => {
|
|
87
|
+
const result = await upsertScriptByName({
|
|
88
|
+
name: "skipped-embed",
|
|
89
|
+
scope: "global",
|
|
90
|
+
source: source("skipped"),
|
|
91
|
+
description: "A script seeded without embedding",
|
|
92
|
+
intent: "Test backfill",
|
|
93
|
+
signatureJson,
|
|
94
|
+
embeddingMode: "skip",
|
|
95
|
+
});
|
|
96
|
+
expect(embeddingCount(result.script.id)).toBe(0);
|
|
97
|
+
|
|
98
|
+
provider.reset();
|
|
99
|
+
await runBootReembedScripts();
|
|
100
|
+
expect(embeddingCount(result.script.id)).toBe(1);
|
|
101
|
+
expect(provider.calls).toHaveLength(1);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test("no-ops when all scripts already have embeddings", async () => {
|
|
105
|
+
await upsertScriptByName({
|
|
106
|
+
name: "already-embedded",
|
|
107
|
+
scope: "global",
|
|
108
|
+
source: source("embedded"),
|
|
109
|
+
description: "Already has embedding",
|
|
110
|
+
intent: "No-op test",
|
|
111
|
+
signatureJson,
|
|
112
|
+
});
|
|
113
|
+
expect(totalEmbeddingCount()).toBe(1);
|
|
114
|
+
|
|
115
|
+
provider.reset();
|
|
116
|
+
await runBootReembedScripts();
|
|
117
|
+
expect(provider.calls).toHaveLength(0);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("skips scratch scripts during backfill", async () => {
|
|
121
|
+
await upsertScriptByName({
|
|
122
|
+
name: "scratch-no-backfill",
|
|
123
|
+
scope: "agent",
|
|
124
|
+
scopeId: "agent-1",
|
|
125
|
+
source: source("scratch"),
|
|
126
|
+
description: "Scratch script",
|
|
127
|
+
intent: "Should not be backfilled",
|
|
128
|
+
signatureJson,
|
|
129
|
+
isScratch: true,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
provider.reset();
|
|
133
|
+
await runBootReembedScripts();
|
|
134
|
+
expect(provider.calls).toHaveLength(0);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
test("backfills only scripts missing embeddings, not those that already have them", async () => {
|
|
138
|
+
const withEmbed = await upsertScriptByName({
|
|
139
|
+
name: "has-embed",
|
|
140
|
+
scope: "global",
|
|
141
|
+
source: source("has"),
|
|
142
|
+
description: "Has embedding",
|
|
143
|
+
intent: "Already embedded",
|
|
144
|
+
signatureJson,
|
|
145
|
+
});
|
|
146
|
+
const withoutEmbed = await upsertScriptByName({
|
|
147
|
+
name: "missing-embed",
|
|
148
|
+
scope: "global",
|
|
149
|
+
source: source("missing"),
|
|
150
|
+
description: "Missing embedding",
|
|
151
|
+
intent: "Needs backfill",
|
|
152
|
+
signatureJson,
|
|
153
|
+
embeddingMode: "skip",
|
|
154
|
+
});
|
|
155
|
+
expect(embeddingCount(withEmbed.script.id)).toBe(1);
|
|
156
|
+
expect(embeddingCount(withoutEmbed.script.id)).toBe(0);
|
|
157
|
+
|
|
158
|
+
provider.reset();
|
|
159
|
+
await runBootReembedScripts();
|
|
160
|
+
expect(provider.calls).toHaveLength(1);
|
|
161
|
+
expect(embeddingCount(withoutEmbed.script.id)).toBe(1);
|
|
162
|
+
});
|
|
163
|
+
});
|
|
@@ -268,6 +268,96 @@ describe("script embeddings", () => {
|
|
|
268
268
|
expect(topOneHits).toBeGreaterThanOrEqual(4);
|
|
269
269
|
});
|
|
270
270
|
|
|
271
|
+
test("embeddingMode: skip prevents embedding on new script", async () => {
|
|
272
|
+
provider.reset();
|
|
273
|
+
const result = await upsertScriptByName({
|
|
274
|
+
name: "skip-new",
|
|
275
|
+
scope: "agent",
|
|
276
|
+
scopeId: "agent-1",
|
|
277
|
+
source: source("skip-new"),
|
|
278
|
+
description: "Should not embed",
|
|
279
|
+
intent: "Skip mode test",
|
|
280
|
+
signatureJson,
|
|
281
|
+
agentId: "agent-1",
|
|
282
|
+
embeddingMode: "skip",
|
|
283
|
+
});
|
|
284
|
+
expect(result.isNew).toBe(true);
|
|
285
|
+
expect(embeddingCount(result.script.id)).toBe(0);
|
|
286
|
+
expect(provider.calls).toHaveLength(0);
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
test("embeddingMode: skip prevents embedding on source change", async () => {
|
|
290
|
+
const first = await upsertScriptByName({
|
|
291
|
+
name: "skip-update",
|
|
292
|
+
scope: "agent",
|
|
293
|
+
scopeId: "agent-1",
|
|
294
|
+
source: source("v1"),
|
|
295
|
+
description: "Will update",
|
|
296
|
+
intent: "Skip mode update test",
|
|
297
|
+
signatureJson,
|
|
298
|
+
agentId: "agent-1",
|
|
299
|
+
});
|
|
300
|
+
expect(embeddingCount(first.script.id)).toBe(1);
|
|
301
|
+
|
|
302
|
+
provider.reset();
|
|
303
|
+
const second = await upsertScriptByName({
|
|
304
|
+
name: "skip-update",
|
|
305
|
+
scope: "agent",
|
|
306
|
+
scopeId: "agent-1",
|
|
307
|
+
source: source("v2"),
|
|
308
|
+
description: "Updated source",
|
|
309
|
+
intent: "Skip mode update test",
|
|
310
|
+
signatureJson,
|
|
311
|
+
agentId: "agent-1",
|
|
312
|
+
embeddingMode: "skip",
|
|
313
|
+
});
|
|
314
|
+
expect(second.contentDeduped).toBe(false);
|
|
315
|
+
expect(provider.calls).toHaveLength(0);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
test("embeddingMode: skip prevents embedding on metadata change", async () => {
|
|
319
|
+
await upsertScriptByName({
|
|
320
|
+
name: "skip-meta",
|
|
321
|
+
scope: "agent",
|
|
322
|
+
scopeId: "agent-1",
|
|
323
|
+
source: source("skip-meta"),
|
|
324
|
+
description: "Original description",
|
|
325
|
+
intent: "Original intent",
|
|
326
|
+
signatureJson,
|
|
327
|
+
agentId: "agent-1",
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
provider.reset();
|
|
331
|
+
await upsertScriptByName({
|
|
332
|
+
name: "skip-meta",
|
|
333
|
+
scope: "agent",
|
|
334
|
+
scopeId: "agent-1",
|
|
335
|
+
source: source("skip-meta"),
|
|
336
|
+
description: "Changed description",
|
|
337
|
+
intent: "Changed intent",
|
|
338
|
+
signatureJson,
|
|
339
|
+
agentId: "agent-1",
|
|
340
|
+
embeddingMode: "skip",
|
|
341
|
+
});
|
|
342
|
+
expect(provider.calls).toHaveLength(0);
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
test("embeddingMode defaults to sync (embeds normally)", async () => {
|
|
346
|
+
provider.reset();
|
|
347
|
+
const result = await upsertScriptByName({
|
|
348
|
+
name: "default-sync",
|
|
349
|
+
scope: "agent",
|
|
350
|
+
scopeId: "agent-1",
|
|
351
|
+
source: source("default-sync"),
|
|
352
|
+
description: "Should embed by default",
|
|
353
|
+
intent: "Default mode test",
|
|
354
|
+
signatureJson,
|
|
355
|
+
agentId: "agent-1",
|
|
356
|
+
});
|
|
357
|
+
expect(embeddingCount(result.script.id)).toBe(1);
|
|
358
|
+
expect(provider.calls).toHaveLength(1);
|
|
359
|
+
});
|
|
360
|
+
|
|
271
361
|
test("reembedAllScripts updates every explicit script", async () => {
|
|
272
362
|
await upsertFixture({ name: "linear-one", description: "Linear issue parser" });
|
|
273
363
|
await upsertFixture({ name: "slack-one", description: "Slack message digest" });
|