@desplega.ai/agent-swarm 1.87.0 → 1.88.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +2 -1
  2. package/openapi.json +13 -1
  3. package/package.json +5 -5
  4. package/src/be/db.ts +49 -7
  5. package/src/be/migrations/080_skill_system_defaults.sql +8 -0
  6. package/src/be/modelsdev-cache.json +1123 -1034
  7. package/src/be/seed/registry.ts +3 -2
  8. package/src/be/seed-skills/index.ts +172 -0
  9. package/src/cli.tsx +33 -4
  10. package/src/commands/e2b-stack-wizard.tsx +394 -0
  11. package/src/commands/e2b.ts +1352 -53
  12. package/src/commands/onboard/dashboard-url.ts +29 -0
  13. package/src/commands/onboard/steps/post-dashboard.tsx +3 -1
  14. package/src/commands/onboard.tsx +3 -1
  15. package/src/commands/runner.ts +1 -0
  16. package/src/e2b/dispatch.ts +234 -18
  17. package/src/http/memory.ts +13 -1
  18. package/src/http/skills.ts +53 -0
  19. package/src/http/webhooks.ts +75 -0
  20. package/src/integrations/kapso/client.ts +82 -0
  21. package/src/memory/automatic-task-gate.ts +47 -0
  22. package/src/prompts/base-prompt.ts +16 -1
  23. package/src/prompts/session-templates.ts +51 -0
  24. package/src/providers/claude-adapter.ts +19 -0
  25. package/src/providers/codex-adapter.ts +22 -0
  26. package/src/providers/ctx-mode-env.ts +10 -0
  27. package/src/providers/opencode-adapter.ts +50 -1
  28. package/src/slack/blocks.ts +12 -4
  29. package/src/slack/watcher.ts +3 -3
  30. package/src/telemetry.ts +14 -1
  31. package/src/templates.d.ts +4 -0
  32. package/src/tests/base-prompt.test.ts +41 -0
  33. package/src/tests/claude-adapter.test.ts +86 -1
  34. package/src/tests/codex-adapter.test.ts +89 -0
  35. package/src/tests/e2b-dispatch.test.ts +603 -11
  36. package/src/tests/http-api-integration.test.ts +113 -0
  37. package/src/tests/kapso-client.test.ts +74 -1
  38. package/src/tests/kapso-inbound.test.ts +60 -2
  39. package/src/tests/opencode-adapter.test.ts +95 -0
  40. package/src/tests/prompt-template-session.test.ts +4 -2
  41. package/src/tests/self-improvement.test.ts +89 -0
  42. package/src/tests/skill-update-scope.test.ts +88 -1
  43. package/src/tests/slack-blocks.test.ts +15 -0
  44. package/src/tests/system-default-skills.test.ts +119 -0
  45. package/src/tests/telemetry-init.test.ts +86 -0
  46. package/src/tools/skills/skill-delete.ts +14 -0
  47. package/src/tools/skills/skill-update.ts +14 -0
  48. package/src/tools/store-progress.ts +19 -5
  49. package/src/types.ts +1 -0
  50. package/templates/skills/artifacts/config.json +1 -0
  51. package/templates/skills/kv-storage/config.json +1 -0
  52. package/templates/skills/pages/config.json +1 -0
  53. package/templates/skills/scheduled-task-resilience/config.json +1 -0
  54. package/templates/skills/swarm-scripts/SKILL.md +91 -0
  55. package/templates/skills/swarm-scripts/config.json +14 -0
  56. package/templates/skills/swarm-scripts/content.md +86 -0
  57. package/templates/skills/workflow-iterate/config.json +1 -0
  58. package/templates/skills/workflow-structured-output/config.json +1 -0
  59. package/tsconfig.json +2 -1
@@ -1,13 +1,28 @@
1
- import { describe, expect, test } from "bun:test";
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdtempSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import {
6
+ buildDashboardDeepLink,
7
+ type LaunchSpec,
8
+ loadRuntimeEnv,
9
+ parseFlags,
10
+ resolveIntegrationToggles,
11
+ runE2BCommand,
12
+ swarmGroupMembers,
13
+ } from "../commands/e2b";
14
+ import { buildOnboardDashboardUrl } from "../commands/onboard/dashboard-url";
2
15
  import {
3
- buildDetachedShell,
4
16
  buildImageTemplate,
5
17
  buildTemplateArgs,
18
+ buildTrackedShell,
6
19
  deleteTemplate,
7
20
  type E2BSandboxInfo,
8
21
  e2bSdkConnectionOptions,
22
+ sandboxLogPath,
9
23
  sandboxPortHost,
10
24
  setTemplateVisibility,
25
+ ttlRemaining,
11
26
  waitForAgentRegistration,
12
27
  } from "../e2b/dispatch";
13
28
  import {
@@ -144,16 +159,74 @@ describe("E2B dispatch helpers", () => {
144
159
  ).toBe("3013-sbx123.sandboxes.internal:8443");
145
160
  });
146
161
 
147
- test("buildDetachedShell backgrounds command and captures pid without invalid shell chaining", () => {
148
- const shell = buildDetachedShell("/api-entrypoint.sh", "/tmp/api.log", "/tmp/api.pid");
162
+ test("ttlRemaining reads authoritative endAt when present", () => {
163
+ const expiresAt = new Date(Date.now() + 1800 * 1000).toISOString();
164
+ const sandbox: E2BSandboxInfo = {
165
+ sandboxID: "sbx123",
166
+ templateID: "tpl",
167
+ endAt: expiresAt,
168
+ };
169
+
170
+ const ttl = ttlRemaining(sandbox);
171
+ expect(ttl.expiresAt).toBe(expiresAt);
172
+ // ~1800s remaining; allow a small window for wall-clock drift during the test.
173
+ expect(ttl.secondsLeft).toBeGreaterThan(1790);
174
+ expect(ttl.secondsLeft).toBeLessThanOrEqual(1800);
175
+ });
176
+
177
+ test("ttlRemaining falls back to client-side expiresAt and prefers endAt over it", () => {
178
+ const fallback = new Date(Date.now() + 600 * 1000).toISOString();
179
+ const fallbackOnly: E2BSandboxInfo = {
180
+ sandboxID: "sbx456",
181
+ templateID: "tpl",
182
+ expiresAt: fallback,
183
+ };
184
+ const fallbackTtl = ttlRemaining(fallbackOnly);
185
+ expect(fallbackTtl.expiresAt).toBe(fallback);
186
+ expect(fallbackTtl.secondsLeft).toBeGreaterThan(590);
187
+ expect(fallbackTtl.secondsLeft).toBeLessThanOrEqual(600);
188
+
189
+ // endAt is authoritative and wins over the client-side fallback.
190
+ const authoritative = new Date(Date.now() + 3600 * 1000).toISOString();
191
+ const both = ttlRemaining({ ...fallbackOnly, endAt: authoritative });
192
+ expect(both.expiresAt).toBe(authoritative);
193
+ expect(both.secondsLeft).toBeGreaterThan(3590);
194
+ });
195
+
196
+ test("ttlRemaining returns empty for absent endAt/expiresAt and clamps expired to zero", () => {
197
+ expect(ttlRemaining({ sandboxID: "none", templateID: "tpl" })).toEqual({});
198
+ const expired = ttlRemaining({
199
+ sandboxID: "old",
200
+ templateID: "tpl",
201
+ endAt: new Date(Date.now() - 60 * 1000).toISOString(),
202
+ });
203
+ expect(expired.secondsLeft).toBe(0);
204
+ });
205
+
206
+ test("buildTrackedShell pipes the entrypoint through tee to the log path (Phase 5)", () => {
207
+ const logPath = sandboxLogPath("api");
208
+ const shell = buildTrackedShell("/api-entrypoint.sh", logPath);
209
+
210
+ // Phase 5: the entrypoint runs as the SDK BACKGROUND command itself (envd
211
+ // owns/streams it), no longer a detached `nohup … &` grandchild.
212
+ expect(logPath).toBe("/tmp/agent-swarm-e2b-api.log");
213
+ expect(shell).toBe(
214
+ "set -o pipefail; /api-entrypoint.sh 2>&1 | tee /tmp/agent-swarm-e2b-api.log",
215
+ );
216
+ // Must tee to the deterministic file so `swarms logs` can read full history.
217
+ expect(shell).toContain(`tee ${logPath}`);
218
+ // pipefail makes the pipeline exit reflect the entrypoint (not tee) for the
219
+ // early-failure poll in startDetachedProcess.
220
+ expect(shell).toContain("set -o pipefail");
221
+ // The old detach primitives are gone.
222
+ expect(shell).not.toContain("nohup");
223
+ expect(shell).not.toContain("kill -0");
224
+ expect(shell).not.toContain("sleep 2");
225
+ });
149
226
 
150
- expect(shell).toContain("nohup /api-entrypoint.sh >/tmp/api.log 2>&1 </dev/null & pid=$!");
151
- expect(shell).toContain("sleep 2");
152
- expect(shell).toContain('kill -0 "$pid"');
153
- expect(shell).toContain("cat /tmp/api.log >&2");
154
- expect(shell).toContain("pid=$!");
155
- expect(shell).not.toContain("&;");
156
- expect(shell).not.toContain("& &&");
227
+ test("sandboxLogPath is deterministic per E2B role", () => {
228
+ expect(sandboxLogPath("api")).toBe("/tmp/agent-swarm-e2b-api.log");
229
+ expect(sandboxLogPath("worker")).toBe("/tmp/agent-swarm-e2b-worker.log");
157
230
  });
158
231
 
159
232
  test("E2B SDK connection options preserve loaded controller endpoints", () => {
@@ -328,3 +401,522 @@ describe("E2B dispatch helpers", () => {
328
401
  });
329
402
  });
330
403
  });
404
+
405
+ describe("E2B namespaced env scoping", () => {
406
+ const API_SPEC: LaunchSpec = { swarmRole: "api", envScope: "api" };
407
+ const LEAD_SPEC: LaunchSpec = { swarmRole: "worker", agentRole: "lead", envScope: "lead" };
408
+ const WORKER_SPEC: LaunchSpec = { swarmRole: "worker", agentRole: "worker", envScope: "worker" };
409
+ // A dummy MCP base URL — loadRuntimeEnv requires one for non-api roles.
410
+ const API_URL = "https://api.example.com";
411
+
412
+ // Phase 2 layering is precedence-only; --dry-run keeps the swarm-API-key
413
+ // resolution from throwing without touching E2B. We snapshot/restore the
414
+ // forward-key env vars so ambient values can't leak into the assertions.
415
+ const previous: Record<string, string | undefined> = {};
416
+ beforeEach(() => {
417
+ for (const key of ["AGENT_SWARM_API_KEY", "API_KEY", "HARNESS_PROVIDER"]) {
418
+ previous[key] = process.env[key];
419
+ delete process.env[key];
420
+ }
421
+ });
422
+ afterEach(() => {
423
+ for (const [key, value] of Object.entries(previous)) {
424
+ if (value === undefined) delete process.env[key];
425
+ else process.env[key] = value;
426
+ }
427
+ });
428
+
429
+ async function resolveAllScopes(argv: string[]) {
430
+ const flags = parseFlags(["start-stack", ...argv, "--dry-run"]);
431
+ const [api, lead, worker] = await Promise.all([
432
+ loadRuntimeEnv(flags, API_SPEC),
433
+ loadRuntimeEnv(flags, LEAD_SPEC, API_URL),
434
+ loadRuntimeEnv(flags, WORKER_SPEC, API_URL),
435
+ ]);
436
+ return { api, lead, worker };
437
+ }
438
+
439
+ test("--worker-secret lands only in the worker scope", async () => {
440
+ const { api, lead, worker } = await resolveAllScopes(["--worker-secret", "FOO=x"]);
441
+ expect(worker.FOO).toBe("x");
442
+ expect(lead.FOO).toBeUndefined();
443
+ expect(api.FOO).toBeUndefined();
444
+ });
445
+
446
+ test("--lead-secret lands only in the lead scope", async () => {
447
+ const { api, lead, worker } = await resolveAllScopes(["--lead-secret", "K=v"]);
448
+ expect(lead.K).toBe("v");
449
+ expect(worker.K).toBeUndefined();
450
+ expect(api.K).toBeUndefined();
451
+ });
452
+
453
+ test("--api-secret lands only in the api scope", async () => {
454
+ const { api, lead, worker } = await resolveAllScopes(["--api-secret", "ZED=q"]);
455
+ expect(api.ZED).toBe("q");
456
+ expect(lead.ZED).toBeUndefined();
457
+ expect(worker.ZED).toBeUndefined();
458
+ });
459
+
460
+ test("shared --secret applies to all three scopes", async () => {
461
+ const { api, lead, worker } = await resolveAllScopes(["--secret", "BAR=y"]);
462
+ expect(api.BAR).toBe("y");
463
+ expect(lead.BAR).toBe("y");
464
+ expect(worker.BAR).toBe("y");
465
+ });
466
+
467
+ test("scoped --secret layers on top of the shared --secret without replacing it", async () => {
468
+ // Shared sets SHARED + OVERRIDE; worker scope overrides OVERRIDE and adds
469
+ // WORKER_ONLY. The shared value must survive in the non-overridden scopes.
470
+ const { api, lead, worker } = await resolveAllScopes([
471
+ "--secret",
472
+ "SHARED=shared",
473
+ "--secret",
474
+ "OVERRIDE=shared-val",
475
+ "--worker-secret",
476
+ "OVERRIDE=worker-val",
477
+ "--worker-secret",
478
+ "WORKER_ONLY=w",
479
+ ]);
480
+
481
+ expect(api.SHARED).toBe("shared");
482
+ expect(lead.SHARED).toBe("shared");
483
+ expect(worker.SHARED).toBe("shared");
484
+
485
+ expect(worker.OVERRIDE).toBe("worker-val");
486
+ expect(lead.OVERRIDE).toBe("shared-val");
487
+ expect(api.OVERRIDE).toBe("shared-val");
488
+
489
+ expect(worker.WORKER_ONLY).toBe("w");
490
+ expect(lead.WORKER_ONLY).toBeUndefined();
491
+ expect(api.WORKER_ONLY).toBeUndefined();
492
+ });
493
+
494
+ test("scoped --{scope}-env-file layers over the shared --env-file", async () => {
495
+ const dir = mkdtempSync(join(tmpdir(), "e2b-env-scope-"));
496
+ const sharedFile = join(dir, "shared.env");
497
+ const workerFile = join(dir, "worker.env");
498
+ writeFileSync(sharedFile, "SHARED_FILE=base\nFROM_SHARED=keep\n");
499
+ writeFileSync(workerFile, "SHARED_FILE=override\nWORKER_FILE_ONLY=w\n");
500
+
501
+ const { api, lead, worker } = await resolveAllScopes([
502
+ "--env-file",
503
+ sharedFile,
504
+ "--worker-env-file",
505
+ workerFile,
506
+ ]);
507
+
508
+ // Shared file is visible everywhere.
509
+ expect(api.FROM_SHARED).toBe("keep");
510
+ expect(lead.FROM_SHARED).toBe("keep");
511
+ expect(worker.FROM_SHARED).toBe("keep");
512
+
513
+ // Worker-scoped file overrides the shared value only in the worker scope.
514
+ expect(worker.SHARED_FILE).toBe("override");
515
+ expect(lead.SHARED_FILE).toBe("base");
516
+ expect(api.SHARED_FILE).toBe("base");
517
+
518
+ // Worker-only key never bleeds into the other scopes.
519
+ expect(worker.WORKER_FILE_ONLY).toBe("w");
520
+ expect(lead.WORKER_FILE_ONLY).toBeUndefined();
521
+ expect(api.WORKER_FILE_ONLY).toBeUndefined();
522
+ });
523
+
524
+ test("scoped --secret wins over both shared and scoped env-files (precedence order)", async () => {
525
+ const dir = mkdtempSync(join(tmpdir(), "e2b-env-prec-"));
526
+ const sharedFile = join(dir, "shared.env");
527
+ const workerFile = join(dir, "worker.env");
528
+ writeFileSync(sharedFile, "PREC=from-shared-file\n");
529
+ writeFileSync(workerFile, "PREC=from-worker-file\n");
530
+
531
+ const { worker } = await resolveAllScopes([
532
+ "--env-file",
533
+ sharedFile,
534
+ "--worker-env-file",
535
+ workerFile,
536
+ "--secret",
537
+ "PREC=from-shared-secret",
538
+ "--worker-secret",
539
+ "PREC=from-worker-secret",
540
+ ]);
541
+
542
+ // Highest-precedence non-forced layer wins.
543
+ expect(worker.PREC).toBe("from-worker-secret");
544
+ });
545
+
546
+ test("AGENT_ROLE comes from the spec; lead spec yields AGENT_ROLE=lead", async () => {
547
+ const { lead, worker } = await resolveAllScopes([]);
548
+ expect(lead.AGENT_ROLE).toBe("lead");
549
+ expect(worker.AGENT_ROLE).toBe("worker");
550
+ });
551
+
552
+ test("worker spec without an agentRole falls back to the global --agent-role", async () => {
553
+ // start-worker stays identical: WORKER_SPEC carries agentRole:"worker" only
554
+ // in start-stack; the legacy path uses a spec with no agentRole and relies
555
+ // on --agent-role. Mirror that here with an agentRole-less worker spec.
556
+ const flags = parseFlags(["start-worker", "--agent-role", "lead", "--dry-run"]);
557
+ const legacyWorkerSpec: LaunchSpec = { swarmRole: "worker", envScope: "worker" };
558
+ const env = await loadRuntimeEnv(flags, legacyWorkerSpec, API_URL);
559
+ expect(env.AGENT_ROLE).toBe("lead");
560
+ });
561
+
562
+ test("forced API_KEY/AGENT_SWARM_API_KEY win over a user --secret API_KEY", async () => {
563
+ // A user must not be able to break swarm auth by overriding API_KEY via a
564
+ // scoped or shared secret — the forced resolution always applies last.
565
+ const flags = parseFlags([
566
+ "start-api",
567
+ "--api-key",
568
+ "forced-key",
569
+ "--secret",
570
+ "API_KEY=attacker",
571
+ "--dry-run",
572
+ ]);
573
+ const env = await loadRuntimeEnv(flags, API_SPEC);
574
+ expect(env.API_KEY).toBe("forced-key");
575
+ expect(env.AGENT_SWARM_API_KEY).toBe("forced-key");
576
+ });
577
+ });
578
+
579
+ describe("E2B start-stack topology (Phase 3)", () => {
580
+ const previous: Record<string, string | undefined> = {};
581
+ beforeEach(() => {
582
+ for (const key of ["AGENT_SWARM_API_KEY", "API_KEY", "HARNESS_PROVIDER"]) {
583
+ previous[key] = process.env[key];
584
+ delete process.env[key];
585
+ }
586
+ });
587
+ afterEach(() => {
588
+ for (const [key, value] of Object.entries(previous)) {
589
+ if (value === undefined) delete process.env[key];
590
+ else process.env[key] = value;
591
+ }
592
+ });
593
+
594
+ /** Run `e2b <argv>` capturing stdout, then parse the JSON it printed. */
595
+ async function runStackJson(argv: string[]): Promise<Record<string, unknown>> {
596
+ const originalLog = console.log;
597
+ const lines: string[] = [];
598
+ console.log = (...args: unknown[]) => {
599
+ lines.push(args.map(String).join(" "));
600
+ };
601
+ const previousExitCode = process.exitCode;
602
+ try {
603
+ await runE2BCommand(argv);
604
+ } finally {
605
+ console.log = originalLog;
606
+ }
607
+ // A clean dry-run must not set a failure exit code.
608
+ expect(process.exitCode ?? 0).toBe(previousExitCode ?? 0);
609
+ // Phase 4 prepends a "swarm: <slug>" echo before the JSON; parse from the
610
+ // first line that opens the JSON object so the preamble is skipped.
611
+ const jsonStart = lines.findIndex((l) => l.trim().startsWith("{"));
612
+ return JSON.parse(lines.slice(Math.max(jsonStart, 0)).join("\n")) as Record<string, unknown>;
613
+ }
614
+
615
+ test("dry-run stack provisions api + lead + N workers", async () => {
616
+ const payload = await runStackJson([
617
+ "start-stack",
618
+ "--dry-run",
619
+ "--yes",
620
+ "--workers",
621
+ "2",
622
+ "--swarm",
623
+ "test",
624
+ "--json",
625
+ ]);
626
+
627
+ expect(payload.api).toBeDefined();
628
+ expect(payload.lead).toBeDefined();
629
+ expect(Array.isArray(payload.workers)).toBe(true);
630
+ expect((payload.workers as unknown[]).length).toBe(2);
631
+ // The lead is E2B SwarmRole "worker" with AGENT_ROLE lead.
632
+ expect((payload.lead as { role: string }).role).toBe("worker");
633
+ expect((payload.api as { role: string }).role).toBe("api");
634
+ });
635
+
636
+ test("--no-lead keeps the legacy api + workers topology (no lead key)", async () => {
637
+ const payload = await runStackJson([
638
+ "start-stack",
639
+ "--dry-run",
640
+ "--yes",
641
+ "--no-lead",
642
+ "--workers",
643
+ "2",
644
+ "--swarm",
645
+ "test",
646
+ "--json",
647
+ ]);
648
+
649
+ expect(payload.api).toBeDefined();
650
+ expect(payload.lead).toBeUndefined();
651
+ expect(Array.isArray(payload.workers)).toBe(true);
652
+ expect((payload.workers as unknown[]).length).toBe(2);
653
+ });
654
+
655
+ test("rejects a shared explicit --agent-id across multiple workers", async () => {
656
+ // A single explicit --agent-id reused for N>1 workers would collapse them
657
+ // into one agent record (the API reuses the row for an existing X-Agent-ID).
658
+ // The guard must fire before any sandbox is provisioned, even on dry-run.
659
+ // runE2BCommand swallows the throw into a stderr line + exitCode=1, so assert
660
+ // on those rather than on a propagated exception.
661
+ const originalError = console.error;
662
+ const errLines: string[] = [];
663
+ console.error = (...args: unknown[]) => {
664
+ errLines.push(args.map(String).join(" "));
665
+ };
666
+ const previousExitCode = process.exitCode;
667
+ try {
668
+ await runE2BCommand([
669
+ "start-stack",
670
+ "--dry-run",
671
+ "--yes",
672
+ "--workers",
673
+ "2",
674
+ "--swarm",
675
+ "test",
676
+ "--agent-id",
677
+ "fixed-worker",
678
+ "--json",
679
+ ]);
680
+ } finally {
681
+ console.error = originalError;
682
+ }
683
+ expect(process.exitCode).toBe(1);
684
+ process.exitCode = previousExitCode ?? 0;
685
+ expect(errLines.join("\n")).toContain("--agent-id cannot be shared across multiple workers");
686
+ });
687
+
688
+ test("allows an explicit --agent-id for a single-worker stack", async () => {
689
+ // One worker + explicit ID is unambiguous — no collision, so it must pass.
690
+ const payload = await runStackJson([
691
+ "start-stack",
692
+ "--dry-run",
693
+ "--yes",
694
+ "--workers",
695
+ "1",
696
+ "--swarm",
697
+ "test",
698
+ "--agent-id",
699
+ "fixed-worker",
700
+ "--json",
701
+ ]);
702
+ expect((payload.workers as unknown[]).length).toBe(1);
703
+ });
704
+
705
+ test("integration toggles disable only the unlisted/--no-<x> integrations", () => {
706
+ // Default: all on.
707
+ expect(resolveIntegrationToggles(parseFlags(["start-stack"]))).toEqual({
708
+ slack: true,
709
+ github: true,
710
+ jira: true,
711
+ linear: true,
712
+ });
713
+ // --no-slack flips just slack off.
714
+ expect(resolveIntegrationToggles(parseFlags(["start-stack", "--no-slack"]))).toMatchObject({
715
+ slack: false,
716
+ github: true,
717
+ });
718
+ // --integrations is an allowlist: only github stays on.
719
+ expect(
720
+ resolveIntegrationToggles(parseFlags(["start-stack", "--integrations", "github"])),
721
+ ).toEqual({
722
+ slack: false,
723
+ github: true,
724
+ jira: false,
725
+ linear: false,
726
+ });
727
+ });
728
+
729
+ test("integration disables land only on the API runtime scope", async () => {
730
+ const flags = parseFlags([
731
+ "start-stack",
732
+ "--no-slack",
733
+ "--integrations",
734
+ "github",
735
+ "--dry-run",
736
+ "--api-key",
737
+ "k",
738
+ ]);
739
+ const api = await loadRuntimeEnv(flags, { swarmRole: "api", envScope: "api" });
740
+ const worker = await loadRuntimeEnv(
741
+ flags,
742
+ { swarmRole: "worker", agentRole: "worker", envScope: "worker" },
743
+ "https://api.example.com",
744
+ );
745
+
746
+ expect(api.SLACK_DISABLE).toBe("true");
747
+ expect(api.JIRA_DISABLE).toBe("true");
748
+ expect(api.LINEAR_DISABLE).toBe("true");
749
+ // github stayed on via the allowlist.
750
+ expect(api.GITHUB_DISABLE).toBeUndefined();
751
+ // The worker scope never carries these API-side toggles.
752
+ expect(worker.SLACK_DISABLE).toBeUndefined();
753
+ });
754
+ });
755
+
756
+ describe("E2B swarm grouping + deep-link (Phase 4)", () => {
757
+ const previous: Record<string, string | undefined> = {};
758
+ beforeEach(() => {
759
+ for (const key of ["AGENT_SWARM_API_KEY", "API_KEY", "HARNESS_PROVIDER", "APP_URL"]) {
760
+ previous[key] = process.env[key];
761
+ delete process.env[key];
762
+ }
763
+ });
764
+ afterEach(() => {
765
+ for (const [key, value] of Object.entries(previous)) {
766
+ if (value === undefined) delete process.env[key];
767
+ else process.env[key] = value;
768
+ }
769
+ });
770
+
771
+ /** Run `e2b <argv>` capturing stdout lines (no JSON parse). */
772
+ async function runStackLines(argv: string[]): Promise<string[]> {
773
+ const originalLog = console.log;
774
+ const lines: string[] = [];
775
+ console.log = (...args: unknown[]) => {
776
+ lines.push(args.map(String).join(" "));
777
+ };
778
+ try {
779
+ await runE2BCommand(argv);
780
+ } finally {
781
+ console.log = originalLog;
782
+ }
783
+ return lines;
784
+ }
785
+
786
+ test("dry-run stack stamps swarm + swarmRole onto every sandbox's metadata", async () => {
787
+ const lines = await runStackLines([
788
+ "start-stack",
789
+ "--dry-run",
790
+ "--yes",
791
+ "--workers",
792
+ "1",
793
+ "--swarm",
794
+ "demo",
795
+ "--json",
796
+ ]);
797
+ // The "swarm: demo" echo precedes the JSON; parse only the JSON tail.
798
+ const jsonStart = lines.findIndex((l) => l.trim().startsWith("{"));
799
+ const payload = JSON.parse(lines.slice(jsonStart).join("\n")) as {
800
+ api: { sandbox: { metadata: Record<string, string> } };
801
+ lead: { sandbox: { metadata: Record<string, string> } };
802
+ workers: { sandbox: { metadata: Record<string, string> } }[];
803
+ };
804
+
805
+ // Shared slug across all roles.
806
+ expect(payload.api.sandbox.metadata.swarm).toBe("demo");
807
+ expect(payload.lead.sandbox.metadata.swarm).toBe("demo");
808
+ expect(payload.workers[0]?.sandbox.metadata.swarm).toBe("demo");
809
+
810
+ // Distinct grouping roles (lead is E2B role:"worker" but swarmRole:"lead").
811
+ expect(payload.api.sandbox.metadata.swarmRole).toBe("api");
812
+ expect(payload.lead.sandbox.metadata.swarmRole).toBe("lead");
813
+ expect(payload.workers[0]?.sandbox.metadata.swarmRole).toBe("worker");
814
+
815
+ // API carries its port; lead/worker do not (they carry agentId reconstruction-ready data).
816
+ expect(payload.api.sandbox.metadata.apiPort).toBe("3013");
817
+ });
818
+
819
+ test("a stack with no --swarm generates a shared slug and echoes it", async () => {
820
+ const lines = await runStackLines(["start-stack", "--dry-run", "--yes", "--workers", "1"]);
821
+ const swarmLine = lines.find((l) => l.startsWith("swarm: "));
822
+ expect(swarmLine).toBeDefined();
823
+ const slug = swarmLine?.slice("swarm: ".length).trim() ?? "";
824
+ expect(slug).toMatch(/^swarm-[0-9a-f]{6}$/);
825
+ });
826
+
827
+ test("e2b dashboard deep-link uses camelCase params and hides the key by default", () => {
828
+ const masked = buildDashboardDeepLink(
829
+ { apiUrl: "https://api.example.com", apiKey: "super-secret-key", name: "demo" },
830
+ false,
831
+ );
832
+ // camelCase params the SPA reads.
833
+ expect(masked).toContain("apiUrl=https%3A%2F%2Fapi.example.com");
834
+ expect(masked).toContain("name=demo");
835
+ // Key hidden — the real value MUST NOT appear.
836
+ expect(masked).toContain("apiKey=<hidden — pass --reveal-key>");
837
+ expect(masked).not.toContain("super-secret-key");
838
+ // Never snake_case.
839
+ expect(masked).not.toContain("api_url");
840
+ expect(masked).not.toContain("api_key");
841
+ });
842
+
843
+ test("e2b dashboard deep-link embeds the real key only when revealed", () => {
844
+ const revealed = buildDashboardDeepLink(
845
+ { apiUrl: "https://api.example.com", apiKey: "super-secret-key", name: "demo" },
846
+ true,
847
+ );
848
+ expect(revealed).toContain("apiKey=super-secret-key");
849
+ expect(revealed).not.toContain("<hidden");
850
+ expect(revealed).toContain("apiUrl=https%3A%2F%2Fapi.example.com");
851
+ });
852
+
853
+ test("--reveal-key gating: default masks the key in stack output, flag reveals it", async () => {
854
+ process.env.APP_URL = "https://dash.example.com";
855
+ const baseArgs = [
856
+ "start-stack",
857
+ "--dry-run",
858
+ "--yes",
859
+ "--workers",
860
+ "1",
861
+ "--api-key",
862
+ "k3y-s3cr3t-value",
863
+ ];
864
+
865
+ const maskedLines = await runStackLines(baseArgs);
866
+ const maskedDash = maskedLines.find((l) => l.startsWith("dashboard: ")) ?? "";
867
+ expect(maskedDash).toContain("apiKey=<hidden — pass --reveal-key>");
868
+ expect(maskedDash).not.toContain("k3y-s3cr3t-value");
869
+
870
+ const revealedLines = await runStackLines([...baseArgs, "--reveal-key"]);
871
+ const revealedDash = revealedLines.find((l) => l.startsWith("dashboard: ")) ?? "";
872
+ expect(revealedDash).toContain("apiKey=k3y-s3cr3t-value");
873
+ expect(revealedDash).not.toContain("<hidden");
874
+ });
875
+
876
+ test("onboarding dashboard builder emits camelCase apiUrl/apiKey (not snake_case)", () => {
877
+ const url = buildOnboardDashboardUrl({
878
+ apiUrl: "http://localhost:3013",
879
+ apiKey: "onboard-key",
880
+ });
881
+ expect(url).toContain("apiUrl=http%3A%2F%2Flocalhost%3A3013");
882
+ expect(url).toContain("apiKey=onboard-key");
883
+ // The bug we fixed: snake_case is silently ignored by the SPA.
884
+ expect(url).not.toContain("api_url");
885
+ expect(url).not.toContain("api_key");
886
+ expect(url.startsWith("https://app.agent-swarm.dev?")).toBe(true);
887
+ });
888
+
889
+ test("swarmGroupMembers restricts a named swarm to dispatcher-owned sandboxes", () => {
890
+ const sandboxes: E2BSandboxInfo[] = [
891
+ // Ours: matching slug + our launcher tag.
892
+ {
893
+ sandboxID: "ours-api",
894
+ templateID: "tpl",
895
+ metadata: { swarm: "myswarm", launcher: "agent-swarm-e2b", swarmRole: "api" },
896
+ },
897
+ {
898
+ sandboxID: "ours-worker",
899
+ templateID: "tpl",
900
+ metadata: { swarm: "myswarm", launcher: "agent-swarm-e2b", swarmRole: "worker" },
901
+ },
902
+ // Foreign: same slug, but NOT launched by us — must be excluded so
903
+ // `swarms kill/info/logs/add` can never touch it.
904
+ {
905
+ sandboxID: "foreign-collision",
906
+ templateID: "tpl",
907
+ metadata: { swarm: "myswarm" },
908
+ },
909
+ // Ours, but a different swarm — excluded by the slug filter.
910
+ {
911
+ sandboxID: "ours-other",
912
+ templateID: "tpl",
913
+ metadata: { swarm: "otherswarm", launcher: "agent-swarm-e2b" },
914
+ },
915
+ ];
916
+
917
+ const members = swarmGroupMembers(sandboxes, "myswarm");
918
+ expect(members.map((m) => m.sandboxID).sort()).toEqual(["ours-api", "ours-worker"]);
919
+ // The foreign sandbox with a colliding generic `metadata.swarm` is dropped.
920
+ expect(members.some((m) => m.sandboxID === "foreign-collision")).toBe(false);
921
+ });
922
+ });