claude-code-swarm 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +22 -1
- package/.claude-plugin/run-agent-inbox-mcp.sh +76 -0
- package/.claude-plugin/run-minimem-mcp.sh +98 -0
- package/.claude-plugin/run-opentasks-mcp.sh +65 -0
- package/CLAUDE.md +200 -36
- package/README.md +65 -0
- package/e2e/helpers/cleanup.mjs +17 -3
- package/e2e/helpers/map-mock-server.mjs +201 -25
- package/e2e/helpers/sidecar.mjs +222 -0
- package/e2e/helpers/workspace.mjs +2 -1
- package/e2e/tier5-sidecar-inbox.test.mjs +900 -0
- package/e2e/tier6-inbox-mcp.test.mjs +173 -0
- package/e2e/tier6-live-agent.test.mjs +759 -0
- package/e2e/vitest.config.e2e.mjs +1 -1
- package/hooks/hooks.json +15 -8
- package/package.json +13 -1
- package/references/agent-inbox/CLAUDE.md +151 -0
- package/references/agent-inbox/README.md +238 -0
- package/references/agent-inbox/docs/CLAUDE-CODE-SWARM-PROPOSAL.md +137 -0
- package/references/agent-inbox/docs/DESIGN.md +1156 -0
- package/references/agent-inbox/hooks/inbox-hook.mjs +119 -0
- package/references/agent-inbox/hooks/register-hook.mjs +69 -0
- package/references/agent-inbox/package-lock.json +3347 -0
- package/references/agent-inbox/package.json +58 -0
- package/references/agent-inbox/rules/agent-inbox.md +78 -0
- package/references/agent-inbox/src/federation/address.ts +61 -0
- package/references/agent-inbox/src/federation/connection-manager.ts +573 -0
- package/references/agent-inbox/src/federation/delivery-queue.ts +222 -0
- package/references/agent-inbox/src/federation/index.ts +6 -0
- package/references/agent-inbox/src/federation/routing-engine.ts +188 -0
- package/references/agent-inbox/src/federation/trust.ts +71 -0
- package/references/agent-inbox/src/index.ts +390 -0
- package/references/agent-inbox/src/ipc/ipc-server.ts +207 -0
- package/references/agent-inbox/src/jsonrpc/mail-server.ts +382 -0
- package/references/agent-inbox/src/map/map-client.ts +414 -0
- package/references/agent-inbox/src/mcp/mcp-server.ts +272 -0
- package/references/agent-inbox/src/mesh/delivery-bridge.ts +110 -0
- package/references/agent-inbox/src/mesh/mesh-connector.ts +41 -0
- package/references/agent-inbox/src/mesh/mesh-transport.ts +157 -0
- package/references/agent-inbox/src/mesh/type-mapper.ts +239 -0
- package/references/agent-inbox/src/push/notifier.ts +233 -0
- package/references/agent-inbox/src/registry/warm-registry.ts +255 -0
- package/references/agent-inbox/src/router/message-router.ts +175 -0
- package/references/agent-inbox/src/storage/interface.ts +48 -0
- package/references/agent-inbox/src/storage/memory.ts +145 -0
- package/references/agent-inbox/src/storage/sqlite.ts +671 -0
- package/references/agent-inbox/src/traceability/traceability.ts +183 -0
- package/references/agent-inbox/src/types.ts +303 -0
- package/references/agent-inbox/test/federation/address.test.ts +101 -0
- package/references/agent-inbox/test/federation/connection-manager.test.ts +546 -0
- package/references/agent-inbox/test/federation/delivery-queue.test.ts +159 -0
- package/references/agent-inbox/test/federation/integration.test.ts +857 -0
- package/references/agent-inbox/test/federation/routing-engine.test.ts +117 -0
- package/references/agent-inbox/test/federation/sdk-integration.test.ts +744 -0
- package/references/agent-inbox/test/federation/trust.test.ts +89 -0
- package/references/agent-inbox/test/ipc-jsonrpc.test.ts +113 -0
- package/references/agent-inbox/test/ipc-server.test.ts +197 -0
- package/references/agent-inbox/test/mail-server.test.ts +285 -0
- package/references/agent-inbox/test/map-client.test.ts +408 -0
- package/references/agent-inbox/test/mesh/delivery-bridge.test.ts +178 -0
- package/references/agent-inbox/test/mesh/e2e-mesh.test.ts +527 -0
- package/references/agent-inbox/test/mesh/e2e-real-meshpeer.test.ts +629 -0
- package/references/agent-inbox/test/mesh/federation-mesh.test.ts +269 -0
- package/references/agent-inbox/test/mesh/mesh-connector.test.ts +66 -0
- package/references/agent-inbox/test/mesh/mesh-transport.test.ts +191 -0
- package/references/agent-inbox/test/mesh/meshpeer-integration.test.ts +442 -0
- package/references/agent-inbox/test/mesh/mock-mesh.ts +125 -0
- package/references/agent-inbox/test/mesh/mock-meshpeer.ts +266 -0
- package/references/agent-inbox/test/mesh/type-mapper.test.ts +226 -0
- package/references/agent-inbox/test/message-router.test.ts +184 -0
- package/references/agent-inbox/test/push-notifier.test.ts +139 -0
- package/references/agent-inbox/test/registry/warm-registry.test.ts +171 -0
- package/references/agent-inbox/test/sqlite-prefix.test.ts +192 -0
- package/references/agent-inbox/test/sqlite-storage.test.ts +243 -0
- package/references/agent-inbox/test/storage.test.ts +196 -0
- package/references/agent-inbox/test/traceability.test.ts +123 -0
- package/references/agent-inbox/test/wake.test.ts +330 -0
- package/references/agent-inbox/tsconfig.json +20 -0
- package/references/agent-inbox/tsup.config.ts +10 -0
- package/references/agent-inbox/vitest.config.ts +8 -0
- package/references/minimem/.claude/settings.json +7 -0
- package/references/minimem/.sudocode/issues.jsonl +18 -0
- package/references/minimem/.sudocode/specs.jsonl +1 -0
- package/references/minimem/CLAUDE.md +329 -0
- package/references/minimem/README.md +565 -0
- package/references/minimem/claude-plugin/.claude-plugin/plugin.json +10 -0
- package/references/minimem/claude-plugin/.mcp.json +7 -0
- package/references/minimem/claude-plugin/README.md +158 -0
- package/references/minimem/claude-plugin/commands/recall.md +47 -0
- package/references/minimem/claude-plugin/commands/remember.md +41 -0
- package/references/minimem/claude-plugin/hooks/__tests__/hooks.test.ts +272 -0
- package/references/minimem/claude-plugin/hooks/hooks.json +27 -0
- package/references/minimem/claude-plugin/hooks/session-end.sh +86 -0
- package/references/minimem/claude-plugin/hooks/session-start.sh +85 -0
- package/references/minimem/claude-plugin/skills/memory/SKILL.md +108 -0
- package/references/minimem/media/banner.png +0 -0
- package/references/minimem/package-lock.json +5373 -0
- package/references/minimem/package.json +76 -0
- package/references/minimem/scripts/postbuild.js +49 -0
- package/references/minimem/src/__tests__/edge-cases.test.ts +371 -0
- package/references/minimem/src/__tests__/errors.test.ts +265 -0
- package/references/minimem/src/__tests__/helpers.ts +199 -0
- package/references/minimem/src/__tests__/internal.test.ts +407 -0
- package/references/minimem/src/__tests__/knowledge-frontmatter.test.ts +148 -0
- package/references/minimem/src/__tests__/knowledge.test.ts +148 -0
- package/references/minimem/src/__tests__/minimem.integration.test.ts +1127 -0
- package/references/minimem/src/__tests__/session.test.ts +190 -0
- package/references/minimem/src/cli/__tests__/commands.test.ts +760 -0
- package/references/minimem/src/cli/__tests__/contained-layout.test.ts +286 -0
- package/references/minimem/src/cli/commands/__tests__/conflicts.test.ts +141 -0
- package/references/minimem/src/cli/commands/append.ts +76 -0
- package/references/minimem/src/cli/commands/config.ts +262 -0
- package/references/minimem/src/cli/commands/conflicts.ts +415 -0
- package/references/minimem/src/cli/commands/daemon.ts +169 -0
- package/references/minimem/src/cli/commands/index.ts +12 -0
- package/references/minimem/src/cli/commands/init.ts +166 -0
- package/references/minimem/src/cli/commands/mcp.ts +221 -0
- package/references/minimem/src/cli/commands/push-pull.ts +213 -0
- package/references/minimem/src/cli/commands/search.ts +223 -0
- package/references/minimem/src/cli/commands/status.ts +84 -0
- package/references/minimem/src/cli/commands/store.ts +189 -0
- package/references/minimem/src/cli/commands/sync-init.ts +290 -0
- package/references/minimem/src/cli/commands/sync.ts +70 -0
- package/references/minimem/src/cli/commands/upsert.ts +197 -0
- package/references/minimem/src/cli/config.ts +611 -0
- package/references/minimem/src/cli/index.ts +299 -0
- package/references/minimem/src/cli/shared.ts +189 -0
- package/references/minimem/src/cli/sync/__tests__/central.test.ts +152 -0
- package/references/minimem/src/cli/sync/__tests__/conflicts.test.ts +209 -0
- package/references/minimem/src/cli/sync/__tests__/daemon.test.ts +118 -0
- package/references/minimem/src/cli/sync/__tests__/detection.test.ts +207 -0
- package/references/minimem/src/cli/sync/__tests__/integration.test.ts +476 -0
- package/references/minimem/src/cli/sync/__tests__/registry.test.ts +363 -0
- package/references/minimem/src/cli/sync/__tests__/state.test.ts +255 -0
- package/references/minimem/src/cli/sync/__tests__/validation.test.ts +193 -0
- package/references/minimem/src/cli/sync/__tests__/watcher.test.ts +178 -0
- package/references/minimem/src/cli/sync/central.ts +292 -0
- package/references/minimem/src/cli/sync/conflicts.ts +205 -0
- package/references/minimem/src/cli/sync/daemon.ts +407 -0
- package/references/minimem/src/cli/sync/detection.ts +138 -0
- package/references/minimem/src/cli/sync/index.ts +107 -0
- package/references/minimem/src/cli/sync/operations.ts +373 -0
- package/references/minimem/src/cli/sync/registry.ts +279 -0
- package/references/minimem/src/cli/sync/state.ts +358 -0
- package/references/minimem/src/cli/sync/validation.ts +206 -0
- package/references/minimem/src/cli/sync/watcher.ts +237 -0
- package/references/minimem/src/cli/version.ts +34 -0
- package/references/minimem/src/core/index.ts +9 -0
- package/references/minimem/src/core/indexer.ts +628 -0
- package/references/minimem/src/core/searcher.ts +221 -0
- package/references/minimem/src/db/schema.ts +183 -0
- package/references/minimem/src/db/sqlite-vec.ts +24 -0
- package/references/minimem/src/embeddings/__tests__/embeddings.test.ts +431 -0
- package/references/minimem/src/embeddings/batch-gemini.ts +392 -0
- package/references/minimem/src/embeddings/batch-openai.ts +409 -0
- package/references/minimem/src/embeddings/embeddings.ts +434 -0
- package/references/minimem/src/index.ts +132 -0
- package/references/minimem/src/internal.ts +299 -0
- package/references/minimem/src/minimem.ts +1291 -0
- package/references/minimem/src/search/__tests__/hybrid.test.ts +247 -0
- package/references/minimem/src/search/graph.ts +234 -0
- package/references/minimem/src/search/hybrid.ts +151 -0
- package/references/minimem/src/search/search.ts +256 -0
- package/references/minimem/src/server/__tests__/mcp.test.ts +347 -0
- package/references/minimem/src/server/__tests__/tools.test.ts +364 -0
- package/references/minimem/src/server/mcp.ts +326 -0
- package/references/minimem/src/server/tools.ts +720 -0
- package/references/minimem/src/session.ts +460 -0
- package/references/minimem/src/store/__tests__/manifest.test.ts +177 -0
- package/references/minimem/src/store/__tests__/materialize.test.ts +52 -0
- package/references/minimem/src/store/__tests__/store-graph.test.ts +228 -0
- package/references/minimem/src/store/index.ts +27 -0
- package/references/minimem/src/store/manifest.ts +203 -0
- package/references/minimem/src/store/materialize.ts +185 -0
- package/references/minimem/src/store/store-graph.ts +252 -0
- package/references/minimem/tsconfig.json +19 -0
- package/references/minimem/tsup.config.ts +26 -0
- package/references/minimem/vitest.config.ts +29 -0
- package/references/openteams/src/cli/generate.ts +23 -1
- package/references/openteams/src/generators/agent-prompt-generator.test.ts +94 -0
- package/references/openteams/src/generators/agent-prompt-generator.ts +42 -13
- package/references/openteams/src/generators/package-generator.ts +9 -1
- package/references/openteams/src/generators/skill-generator.test.ts +28 -0
- package/references/openteams/src/generators/skill-generator.ts +10 -4
- package/references/skill-tree/.claude/settings.json +6 -0
- package/references/skill-tree/.sudocode/issues.jsonl +19 -0
- package/references/skill-tree/.sudocode/specs.jsonl +3 -0
- package/references/skill-tree/CLAUDE.md +132 -0
- package/references/skill-tree/README.md +396 -0
- package/references/skill-tree/docs/GAPS_v1.md +221 -0
- package/references/skill-tree/docs/INTEGRATION_PLAN.md +467 -0
- package/references/skill-tree/docs/TODOS.md +91 -0
- package/references/skill-tree/docs/anthropic_skill_guide.md +1364 -0
- package/references/skill-tree/docs/design/federated-skill-trees.md +524 -0
- package/references/skill-tree/docs/design/multi-agent-sync.md +759 -0
- package/references/skill-tree/docs/scraper/BRAINSTORM.md +583 -0
- package/references/skill-tree/docs/scraper/POC_PLAN.md +420 -0
- package/references/skill-tree/docs/scraper/README.md +170 -0
- package/references/skill-tree/examples/basic-usage.ts +157 -0
- package/references/skill-tree/package-lock.json +1852 -0
- package/references/skill-tree/package.json +66 -0
- package/references/skill-tree/plan.md +78 -0
- package/references/skill-tree/scraper/README.md +123 -0
- package/references/skill-tree/scraper/docs/DESIGN.md +683 -0
- package/references/skill-tree/scraper/docs/PLAN.md +336 -0
- package/references/skill-tree/scraper/drizzle.config.ts +10 -0
- package/references/skill-tree/scraper/package-lock.json +6329 -0
- package/references/skill-tree/scraper/package.json +68 -0
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-description.md +7 -0
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-name.md +7 -0
- package/references/skill-tree/scraper/test/fixtures/minimal-skill/SKILL.md +27 -0
- package/references/skill-tree/scraper/test/fixtures/skill-json/SKILL.json +21 -0
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/SKILL.md +54 -0
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/_meta.json +24 -0
- package/references/skill-tree/scraper/test/fixtures/valid-skill/SKILL.md +93 -0
- package/references/skill-tree/scraper/test/fixtures/valid-skill/_meta.json +22 -0
- package/references/skill-tree/scraper/tsup.config.ts +14 -0
- package/references/skill-tree/scraper/vitest.config.ts +17 -0
- package/references/skill-tree/scripts/convert-to-vitest.ts +166 -0
- package/references/skill-tree/skills/skill-writer/SKILL.md +339 -0
- package/references/skill-tree/skills/skill-writer/references/examples.md +326 -0
- package/references/skill-tree/skills/skill-writer/references/patterns.md +210 -0
- package/references/skill-tree/skills/skill-writer/references/quality-checklist.md +123 -0
- package/references/skill-tree/test/run-all.ts +106 -0
- package/references/skill-tree/test/utils.ts +128 -0
- package/references/skill-tree/vitest.config.ts +16 -0
- package/references/swarmkit/src/commands/init/phases/configure.ts +0 -22
- package/references/swarmkit/src/commands/init/phases/global-setup.ts +5 -3
- package/references/swarmkit/src/commands/init/wizard.ts +2 -2
- package/references/swarmkit/src/packages/setup.test.ts +53 -7
- package/references/swarmkit/src/packages/setup.ts +37 -1
- package/scripts/bootstrap.mjs +26 -1
- package/scripts/generate-agents.mjs +5 -1
- package/scripts/map-hook.mjs +97 -64
- package/scripts/map-sidecar.mjs +179 -25
- package/scripts/team-loader.mjs +12 -41
- package/skills/swarm/SKILL.md +89 -25
- package/src/__tests__/agent-generator.test.mjs +6 -13
- package/src/__tests__/bootstrap.test.mjs +124 -1
- package/src/__tests__/config.test.mjs +200 -27
- package/src/__tests__/e2e-live-map.test.mjs +536 -0
- package/src/__tests__/e2e-mesh-sidecar.test.mjs +570 -0
- package/src/__tests__/e2e-native-task-hooks.test.mjs +376 -0
- package/src/__tests__/e2e-sidecar-bridge.test.mjs +477 -0
- package/src/__tests__/helpers.mjs +13 -0
- package/src/__tests__/inbox.test.mjs +22 -89
- package/src/__tests__/index.test.mjs +35 -9
- package/src/__tests__/integration.test.mjs +513 -0
- package/src/__tests__/map-events.test.mjs +514 -150
- package/src/__tests__/mesh-connection.test.mjs +308 -0
- package/src/__tests__/opentasks-client.test.mjs +517 -0
- package/src/__tests__/paths.test.mjs +185 -41
- package/src/__tests__/sidecar-client.test.mjs +35 -0
- package/src/__tests__/sidecar-server.test.mjs +124 -0
- package/src/__tests__/skilltree-client.test.mjs +80 -0
- package/src/agent-generator.mjs +104 -33
- package/src/bootstrap.mjs +150 -10
- package/src/config.mjs +81 -17
- package/src/context-output.mjs +58 -8
- package/src/inbox.mjs +9 -54
- package/src/index.mjs +39 -8
- package/src/map-connection.mjs +4 -3
- package/src/map-events.mjs +350 -80
- package/src/mesh-connection.mjs +148 -0
- package/src/opentasks-client.mjs +269 -0
- package/src/paths.mjs +182 -27
- package/src/sessionlog.mjs +14 -9
- package/src/sidecar-client.mjs +81 -27
- package/src/sidecar-server.mjs +175 -16
- package/src/skilltree-client.mjs +173 -0
- package/src/template.mjs +68 -4
- package/vitest.config.mjs +1 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tier 6: Live Agent Integration Tests
|
|
3
|
+
*
|
|
4
|
+
* Full end-to-end tests with a live Claude Code instance, real plugin hooks,
|
|
5
|
+
* mock MAP server, and agent-inbox. Verifies the complete plugin mechanism
|
|
6
|
+
* works with a live agent — hooks fire, MAP events are emitted, inbox
|
|
7
|
+
* messages are received and surfaced to agents.
|
|
8
|
+
*
|
|
9
|
+
* These tests are gated behind the LIVE_AGENT_TEST=1 env var because they:
|
|
10
|
+
* - Require a live Claude Code CLI with an active subscription
|
|
11
|
+
* - Make real LLM API calls ($1-10 per test group)
|
|
12
|
+
* - Take 2-10 minutes per test group
|
|
13
|
+
*
|
|
14
|
+
* The mock MAP server provides the deterministic "external" side:
|
|
15
|
+
* - Captures all agent lifecycle events (spawn, done, state)
|
|
16
|
+
* - Captures all message payloads (task.dispatched, task.completed)
|
|
17
|
+
* - Sends inbound messages at controlled times
|
|
18
|
+
*
|
|
19
|
+
* Run: LIVE_AGENT_TEST=1 npx vitest run --config e2e/vitest.config.e2e.mjs e2e/tier6-live-agent.test.mjs
|
|
20
|
+
*
|
|
21
|
+
* Run only specific groups:
|
|
22
|
+
* LIVE_AGENT_TEST=1 npx vitest run --config e2e/vitest.config.e2e.mjs e2e/tier6-live-agent.test.mjs -t "lifecycle"
|
|
23
|
+
* LIVE_AGENT_TEST=1 npx vitest run --config e2e/vitest.config.e2e.mjs e2e/tier6-live-agent.test.mjs -t "subagent"
|
|
24
|
+
* LIVE_AGENT_TEST=1 npx vitest run --config e2e/vitest.config.e2e.mjs e2e/tier6-live-agent.test.mjs -t "inbox"
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
28
|
+
import fs from "fs";
|
|
29
|
+
import path from "path";
|
|
30
|
+
import { fileURLToPath } from "url";
|
|
31
|
+
import { runClaude, CLI_AVAILABLE } from "./helpers/cli.mjs";
|
|
32
|
+
import { extractToolCalls, findToolCalls, getResult, getHookOutput } from "./helpers/assertions.mjs";
|
|
33
|
+
import { createWorkspace } from "./helpers/workspace.mjs";
|
|
34
|
+
import { cleanupWorkspace, waitFor } from "./helpers/cleanup.mjs";
|
|
35
|
+
import { MockMapServer } from "./helpers/map-mock-server.mjs";
|
|
36
|
+
|
|
37
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
38
|
+
const LIVE = !!process.env.LIVE_AGENT_TEST;
|
|
39
|
+
|
|
40
|
+
// Check if agent-inbox is available
|
|
41
|
+
let agentInboxAvailable = false;
|
|
42
|
+
try {
|
|
43
|
+
await import("agent-inbox");
|
|
44
|
+
agentInboxAvailable = true;
|
|
45
|
+
} catch {
|
|
46
|
+
// Not installed
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
50
|
+
// Group 1: MAP Agent Lifecycle with /swarm
|
|
51
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE)(
|
|
54
|
+
"tier6: MAP agent lifecycle via /swarm",
|
|
55
|
+
{ timeout: 600_000 },
|
|
56
|
+
() => {
|
|
57
|
+
let mockServer;
|
|
58
|
+
let workspace;
|
|
59
|
+
let messages;
|
|
60
|
+
let result;
|
|
61
|
+
|
|
62
|
+
beforeAll(async () => {
|
|
63
|
+
mockServer = new MockMapServer();
|
|
64
|
+
await mockServer.start();
|
|
65
|
+
|
|
66
|
+
workspace = createWorkspace({
|
|
67
|
+
config: {
|
|
68
|
+
template: "gsd",
|
|
69
|
+
map: {
|
|
70
|
+
enabled: true,
|
|
71
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
72
|
+
sidecar: "session",
|
|
73
|
+
},
|
|
74
|
+
inbox: { enabled: true },
|
|
75
|
+
},
|
|
76
|
+
files: {
|
|
77
|
+
"README.md": "# Test Project\nA simple test project for live agent testing.\n",
|
|
78
|
+
},
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
const run = await runClaude(
|
|
82
|
+
'Run /swarm gsd with goal: Create hello.txt with "Hello World" and goodbye.txt with "Goodbye World". ' +
|
|
83
|
+
'You MUST use the team — spawn agents to do the work, do not create the files yourself.',
|
|
84
|
+
{
|
|
85
|
+
cwd: workspace.dir,
|
|
86
|
+
maxBudgetUsd: 10.0,
|
|
87
|
+
maxTurns: 50,
|
|
88
|
+
timeout: 300_000,
|
|
89
|
+
label: "tier6-lifecycle",
|
|
90
|
+
}
|
|
91
|
+
);
|
|
92
|
+
messages = run.messages;
|
|
93
|
+
result = getResult(messages);
|
|
94
|
+
|
|
95
|
+
const toolNames = extractToolCalls(messages).map((tc) => tc.name);
|
|
96
|
+
console.log(`[tier6] lifecycle tool calls: ${toolNames.join(", ")}`);
|
|
97
|
+
console.log(`[tier6] result: ${result?.subtype || "success"}, cost: $${result?.total_cost_usd?.toFixed(2) || "?"}`);
|
|
98
|
+
|
|
99
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
afterAll(async () => {
|
|
103
|
+
if (workspace) {
|
|
104
|
+
cleanupWorkspace(workspace.dir);
|
|
105
|
+
workspace.cleanup();
|
|
106
|
+
}
|
|
107
|
+
if (mockServer) await mockServer.stop();
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("invokes /swarm skill", () => {
|
|
111
|
+
const skillCalls = findToolCalls(messages, "Skill");
|
|
112
|
+
expect(skillCalls.length).toBeGreaterThan(0);
|
|
113
|
+
expect(
|
|
114
|
+
skillCalls.some((sc) => sc.skill === "swarm" || sc.skill === "claude-code-swarm:swarm")
|
|
115
|
+
).toBe(true);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("creates a team via TeamCreate", () => {
|
|
119
|
+
const teamCreates = findToolCalls(messages, "TeamCreate");
|
|
120
|
+
expect(teamCreates.length).toBeGreaterThan(0);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("spawns at least one agent with team_name", () => {
|
|
124
|
+
const agentCalls = findToolCalls(messages, "Agent");
|
|
125
|
+
if (agentCalls.length === 0) {
|
|
126
|
+
console.log("[tier6] WARNING: LLM did not spawn any agents (non-deterministic)");
|
|
127
|
+
}
|
|
128
|
+
expect(agentCalls.length).toBeGreaterThan(0);
|
|
129
|
+
expect(agentCalls.some((ac) => ac.team_name)).toBe(true);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("sidecar connected to mock MAP server", () => {
|
|
133
|
+
expect(mockServer.getByMethod("map/connect").length).toBeGreaterThan(0);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it("sidecar registered itself with MAP server", () => {
|
|
137
|
+
expect(mockServer.getByMethod("map/agents/register").length).toBeGreaterThan(0);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it("MAP server received agent spawn events for team agents", () => {
|
|
141
|
+
// Agent spawning depends on the LLM actually using the Agent tool.
|
|
142
|
+
// If no agents were spawned (non-deterministic), skip gracefully.
|
|
143
|
+
const agentCalls = findToolCalls(messages, "Agent");
|
|
144
|
+
if (agentCalls.length === 0) {
|
|
145
|
+
console.log("[tier6] skipping: no Agent tool calls in this run");
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
expect(mockServer.spawnedAgents.length).toBeGreaterThan(0);
|
|
149
|
+
console.log(
|
|
150
|
+
"[tier6] spawned agents:",
|
|
151
|
+
mockServer.spawnedAgents.map((a) => `${a.name || a.agentId} (${a.role})`).join(", ")
|
|
152
|
+
);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("spawned agents have correct metadata", () => {
|
|
156
|
+
for (const agent of mockServer.spawnedAgents) {
|
|
157
|
+
expect(agent.agentId).toBeTruthy();
|
|
158
|
+
expect(agent.role).toBeTruthy();
|
|
159
|
+
if (agent.metadata?.isTeamRole) {
|
|
160
|
+
expect(agent.metadata.template).toBe("gsd");
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it("MAP server received agent unregister events (done)", () => {
|
|
166
|
+
const unregisters = mockServer.callExtensions.filter(
|
|
167
|
+
(e) => e.method === "map/agents/unregister"
|
|
168
|
+
);
|
|
169
|
+
if (mockServer.spawnedAgents.length === 0) {
|
|
170
|
+
console.log("[tier6] skipping: no agents were spawned");
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
expect(unregisters.length).toBeGreaterThan(0);
|
|
174
|
+
console.log(
|
|
175
|
+
"[tier6] unregistered agents:",
|
|
176
|
+
unregisters.map((u) => u.params?.agentId).join(", ")
|
|
177
|
+
);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("spawn and unregister counts are balanced", () => {
|
|
181
|
+
const spawnIds = new Set(mockServer.spawnedAgents.map((a) => a.agentId));
|
|
182
|
+
const doneIds = new Set(
|
|
183
|
+
mockServer.callExtensions
|
|
184
|
+
.filter((e) => e.method === "map/agents/unregister")
|
|
185
|
+
.map((e) => e.params?.agentId)
|
|
186
|
+
);
|
|
187
|
+
|
|
188
|
+
for (const id of doneIds) {
|
|
189
|
+
expect(spawnIds.has(id)).toBe(true);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const orphans = [...spawnIds].filter((id) => !doneIds.has(id));
|
|
193
|
+
if (orphans.length > 0) {
|
|
194
|
+
console.log("[tier6] agents spawned but not unregistered:", orphans.join(", "));
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
it("MAP server received state updates (idle transitions)", () => {
|
|
199
|
+
expect(mockServer.stateUpdates.length).toBeGreaterThan(0);
|
|
200
|
+
console.log(`[tier6] state updates: ${mockServer.stateUpdates.length}`);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it("MAP server received task lifecycle messages", () => {
|
|
204
|
+
const dispatched = mockServer.sentMessages.filter(
|
|
205
|
+
(m) => m.payload?.type === "task.dispatched"
|
|
206
|
+
);
|
|
207
|
+
const completed = mockServer.sentMessages.filter(
|
|
208
|
+
(m) => m.payload?.type === "task.completed"
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
// Task messages only appear when agents are spawned
|
|
212
|
+
if (mockServer.spawnedAgents.length === 0) {
|
|
213
|
+
console.log("[tier6] skipping: no agents were spawned, no task messages expected");
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
expect(dispatched.length).toBeGreaterThan(0);
|
|
218
|
+
for (const d of dispatched) {
|
|
219
|
+
expect(d.payload.targetAgent).toBeTruthy();
|
|
220
|
+
expect(d.payload.taskId).toBeTruthy();
|
|
221
|
+
}
|
|
222
|
+
console.log(
|
|
223
|
+
"[tier6] task.dispatched:",
|
|
224
|
+
dispatched.map((d) => `${d.payload.taskId} → ${d.payload.targetAgent}`).join(", ")
|
|
225
|
+
);
|
|
226
|
+
|
|
227
|
+
expect(completed.length).toBeGreaterThan(0);
|
|
228
|
+
console.log(`[tier6] task.completed: ${completed.length}`);
|
|
229
|
+
|
|
230
|
+
// Verify balance: every completed task was dispatched
|
|
231
|
+
const dispatchedIds = new Set(dispatched.map((d) => d.payload.taskId));
|
|
232
|
+
for (const c of completed) {
|
|
233
|
+
if (c.payload.taskId) {
|
|
234
|
+
expect(dispatchedIds.has(c.payload.taskId)).toBe(true);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
242
|
+
// Group 2: Subagent Lifecycle Events
|
|
243
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
244
|
+
|
|
245
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE)(
|
|
246
|
+
"tier6: subagent MAP lifecycle",
|
|
247
|
+
{ timeout: 600_000 },
|
|
248
|
+
() => {
|
|
249
|
+
let mockServer;
|
|
250
|
+
let workspace;
|
|
251
|
+
let messages;
|
|
252
|
+
|
|
253
|
+
beforeAll(async () => {
|
|
254
|
+
mockServer = new MockMapServer();
|
|
255
|
+
await mockServer.start();
|
|
256
|
+
|
|
257
|
+
workspace = createWorkspace({
|
|
258
|
+
config: {
|
|
259
|
+
template: "gsd",
|
|
260
|
+
map: {
|
|
261
|
+
enabled: true,
|
|
262
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
263
|
+
sidecar: "session",
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
files: {
|
|
267
|
+
"README.md": "# Subagent Test\n",
|
|
268
|
+
},
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
const run = await runClaude(
|
|
272
|
+
'Please run /swarm gsd with goal: Create hello.py that prints "Hello" and goodbye.py that prints "Goodbye"',
|
|
273
|
+
{
|
|
274
|
+
cwd: workspace.dir,
|
|
275
|
+
maxBudgetUsd: 10.0,
|
|
276
|
+
maxTurns: 50,
|
|
277
|
+
timeout: 300_000,
|
|
278
|
+
label: "tier6-subagents",
|
|
279
|
+
}
|
|
280
|
+
);
|
|
281
|
+
messages = run.messages;
|
|
282
|
+
|
|
283
|
+
const toolNames = extractToolCalls(messages).map((tc) => tc.name);
|
|
284
|
+
console.log(`[tier6] subagent tool calls: ${toolNames.join(", ")}`);
|
|
285
|
+
|
|
286
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
afterAll(async () => {
|
|
290
|
+
if (workspace) {
|
|
291
|
+
cleanupWorkspace(workspace.dir);
|
|
292
|
+
workspace.cleanup();
|
|
293
|
+
}
|
|
294
|
+
if (mockServer) await mockServer.stop();
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
it("MAP server received subagent spawn events", () => {
|
|
298
|
+
const subagents = mockServer.spawnedAgents.filter(
|
|
299
|
+
(a) => a.role === "subagent"
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
const agentCalls = findToolCalls(messages, "Agent");
|
|
303
|
+
if (agentCalls.length > 0) {
|
|
304
|
+
expect(mockServer.spawnedAgents.length).toBeGreaterThan(0);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
console.log(
|
|
308
|
+
`[tier6] total spawned: ${mockServer.spawnedAgents.length}, ` +
|
|
309
|
+
`subagent role: ${subagents.length}`
|
|
310
|
+
);
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
it("subagent spawn events include metadata", () => {
|
|
314
|
+
const subagents = mockServer.spawnedAgents.filter(
|
|
315
|
+
(a) => a.role === "subagent"
|
|
316
|
+
);
|
|
317
|
+
for (const sa of subagents) {
|
|
318
|
+
expect(sa.agentId).toBeTruthy();
|
|
319
|
+
expect(sa.metadata).toBeDefined();
|
|
320
|
+
expect(sa.metadata.isTeamRole).toBe(false);
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it("subagent done events are emitted", () => {
|
|
325
|
+
const unregisters = mockServer.callExtensions.filter(
|
|
326
|
+
(e) => e.method === "map/agents/unregister"
|
|
327
|
+
);
|
|
328
|
+
if (mockServer.spawnedAgents.length > 0) {
|
|
329
|
+
expect(unregisters.length).toBeGreaterThan(0);
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
it("all event types are chronologically ordered", () => {
|
|
334
|
+
const spawnTimes = new Map();
|
|
335
|
+
for (const a of mockServer.spawnedAgents) {
|
|
336
|
+
spawnTimes.set(a.agentId, a._timestamp);
|
|
337
|
+
}
|
|
338
|
+
const dones = mockServer.callExtensions.filter(
|
|
339
|
+
(e) => e.method === "map/agents/unregister"
|
|
340
|
+
);
|
|
341
|
+
for (const d of dones) {
|
|
342
|
+
const spawnTime = spawnTimes.get(d.params?.agentId);
|
|
343
|
+
if (spawnTime) {
|
|
344
|
+
expect(d._timestamp).toBeGreaterThan(spawnTime);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
352
|
+
// Group 3: Inbound Inbox — MAP → Agent-Inbox → Agent
|
|
353
|
+
//
|
|
354
|
+
// Sends MAP messages DURING the session (immediately after the sidecar
|
|
355
|
+
// connects to our mock server), so they are available when the
|
|
356
|
+
// UserPromptSubmit inject hook fires on the agent's turns.
|
|
357
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
358
|
+
|
|
359
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE || !agentInboxAvailable)(
|
|
360
|
+
"tier6: inbound inbox (MAP → agent)",
|
|
361
|
+
{ timeout: 600_000 },
|
|
362
|
+
() => {
|
|
363
|
+
let mockServer;
|
|
364
|
+
let workspace;
|
|
365
|
+
|
|
366
|
+
afterAll(async () => {
|
|
367
|
+
if (workspace) {
|
|
368
|
+
cleanupWorkspace(workspace.dir);
|
|
369
|
+
workspace.cleanup();
|
|
370
|
+
}
|
|
371
|
+
if (mockServer) await mockServer.stop();
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
it("agent receives MAP messages injected via inbox during session", async () => {
|
|
375
|
+
mockServer = new MockMapServer();
|
|
376
|
+
await mockServer.start();
|
|
377
|
+
|
|
378
|
+
workspace = createWorkspace({
|
|
379
|
+
config: {
|
|
380
|
+
template: "gsd",
|
|
381
|
+
map: {
|
|
382
|
+
enabled: true,
|
|
383
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
384
|
+
sidecar: "session",
|
|
385
|
+
},
|
|
386
|
+
inbox: { enabled: true },
|
|
387
|
+
},
|
|
388
|
+
files: {
|
|
389
|
+
"README.md": "# Inbox Test\n",
|
|
390
|
+
},
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
// Strategy: Wait for the sidecar to fully register with MAP (not just connect),
|
|
394
|
+
// then send messages immediately. The inject hook (UserPromptSubmit) fires on
|
|
395
|
+
// each turn and surfaces agent-inbox messages as markdown context.
|
|
396
|
+
//
|
|
397
|
+
// The prompt instructs the agent to do some work first, giving time for
|
|
398
|
+
// messages to arrive before the inject hook fires on later turns.
|
|
399
|
+
|
|
400
|
+
const messagesSent = waitFor(
|
|
401
|
+
() => mockServer.getByMethod("map/agents/register").length > 0,
|
|
402
|
+
30_000
|
|
403
|
+
).then(async (registered) => {
|
|
404
|
+
if (!registered) return;
|
|
405
|
+
// Brief delay for agent-inbox to initialize after sidecar registration
|
|
406
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
407
|
+
|
|
408
|
+
mockServer.sendToAll(
|
|
409
|
+
{ type: "text", text: "IMPORTANT: The secret code is BRAVO-9931. Report this code back." },
|
|
410
|
+
{ from: "external-coordinator", to: { scope: "default" } }
|
|
411
|
+
);
|
|
412
|
+
mockServer.sendToAll(
|
|
413
|
+
{ type: "text", text: "Status update: All systems operational." },
|
|
414
|
+
{ from: "ops-monitor", to: { scope: "default" } }
|
|
415
|
+
);
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
// The prompt makes the agent do work first (file creation) before checking inbox.
|
|
419
|
+
// This ensures multiple turns fire the inject hook after messages arrive.
|
|
420
|
+
const run = await runClaude(
|
|
421
|
+
"Please do the following in order:\n" +
|
|
422
|
+
"1. Create a file called warm-up.txt with the text 'warming up'\n" +
|
|
423
|
+
"2. Read the file back to confirm it exists\n" +
|
|
424
|
+
"3. Now check if you have received any external messages or inbox notifications\n" +
|
|
425
|
+
"4. Report any messages you received, including any secret codes",
|
|
426
|
+
{
|
|
427
|
+
cwd: workspace.dir,
|
|
428
|
+
maxBudgetUsd: 3.0,
|
|
429
|
+
maxTurns: 15,
|
|
430
|
+
timeout: 180_000,
|
|
431
|
+
label: "tier6-inbox-single-session",
|
|
432
|
+
}
|
|
433
|
+
);
|
|
434
|
+
|
|
435
|
+
await messagesSent; // Ensure our message injection completed
|
|
436
|
+
|
|
437
|
+
// Check what appeared in the session output
|
|
438
|
+
const allText = run.stdout + run.stderr;
|
|
439
|
+
const hookOutput = getHookOutput(run.messages);
|
|
440
|
+
|
|
441
|
+
const mentionsCode = allText.includes("BRAVO-9931") || hookOutput.includes("BRAVO-9931");
|
|
442
|
+
const mentionsSender = allText.includes("external-coordinator") || hookOutput.includes("external-coordinator");
|
|
443
|
+
const mentionsOps = allText.includes("ops-monitor") || hookOutput.includes("ops-monitor");
|
|
444
|
+
const mentionsMAP = allText.includes("[MAP]") || hookOutput.includes("[MAP]");
|
|
445
|
+
|
|
446
|
+
console.log(`[tier6] inbox — code: ${mentionsCode}, sender: ${mentionsSender}, ops: ${mentionsOps}, MAP header: ${mentionsMAP}`);
|
|
447
|
+
console.log(`[tier6] hook output length: ${hookOutput.length}`);
|
|
448
|
+
|
|
449
|
+
// The inject hook deterministically writes markdown with [MAP] header and sender names.
|
|
450
|
+
// Even if the LLM response varies, the hook output should contain the message.
|
|
451
|
+
expect(mentionsCode || mentionsSender || mentionsOps || mentionsMAP).toBe(true);
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
);
|
|
455
|
+
|
|
456
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
457
|
+
// Group 4: Outbound Messages — Agent → MAP (via hooks)
|
|
458
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
459
|
+
|
|
460
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE)(
|
|
461
|
+
"tier6: outbound MAP messages via hooks",
|
|
462
|
+
{ timeout: 600_000 },
|
|
463
|
+
() => {
|
|
464
|
+
let mockServer;
|
|
465
|
+
let workspace;
|
|
466
|
+
let messages;
|
|
467
|
+
|
|
468
|
+
beforeAll(async () => {
|
|
469
|
+
mockServer = new MockMapServer();
|
|
470
|
+
await mockServer.start();
|
|
471
|
+
|
|
472
|
+
workspace = createWorkspace({
|
|
473
|
+
config: {
|
|
474
|
+
template: "gsd",
|
|
475
|
+
map: {
|
|
476
|
+
enabled: true,
|
|
477
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
478
|
+
sidecar: "session",
|
|
479
|
+
},
|
|
480
|
+
inbox: { enabled: true },
|
|
481
|
+
},
|
|
482
|
+
files: {
|
|
483
|
+
"README.md": "# Outbound Test\n",
|
|
484
|
+
},
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
const run = await runClaude(
|
|
488
|
+
'Please run /swarm gsd with goal: Create a file called output.txt with the text "test output"',
|
|
489
|
+
{
|
|
490
|
+
cwd: workspace.dir,
|
|
491
|
+
maxBudgetUsd: 10.0,
|
|
492
|
+
maxTurns: 50,
|
|
493
|
+
timeout: 300_000,
|
|
494
|
+
label: "tier6-outbound",
|
|
495
|
+
}
|
|
496
|
+
);
|
|
497
|
+
messages = run.messages;
|
|
498
|
+
|
|
499
|
+
const toolNames = extractToolCalls(messages).map((tc) => tc.name);
|
|
500
|
+
console.log(`[tier6] outbound tool calls: ${toolNames.join(", ")}`);
|
|
501
|
+
|
|
502
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
afterAll(async () => {
|
|
506
|
+
if (workspace) {
|
|
507
|
+
cleanupWorkspace(workspace.dir);
|
|
508
|
+
workspace.cleanup();
|
|
509
|
+
}
|
|
510
|
+
if (mockServer) await mockServer.stop();
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
it("all MAP protocol methods are present", () => {
|
|
514
|
+
const methods = new Set(mockServer.receivedMessages.map((m) => m.data?.method).filter(Boolean));
|
|
515
|
+
console.log("[tier6] MAP methods seen:", [...methods].join(", "));
|
|
516
|
+
|
|
517
|
+
expect(methods.has("map/connect")).toBe(true);
|
|
518
|
+
expect(methods.has("map/agents/register")).toBe(true);
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
it("agent spawns have scopes from the session", () => {
|
|
522
|
+
for (const agent of mockServer.spawnedAgents) {
|
|
523
|
+
expect(agent.scopes).toBeDefined();
|
|
524
|
+
expect(Array.isArray(agent.scopes)).toBe(true);
|
|
525
|
+
expect(agent.scopes.length).toBeGreaterThan(0);
|
|
526
|
+
}
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
it("task.dispatched messages reference spawned agent IDs", () => {
|
|
530
|
+
const spawnedIds = new Set(mockServer.spawnedAgents.map((a) => a.agentId));
|
|
531
|
+
const dispatched = mockServer.sentMessages.filter(
|
|
532
|
+
(m) => m.payload?.type === "task.dispatched"
|
|
533
|
+
);
|
|
534
|
+
for (const d of dispatched) {
|
|
535
|
+
if (d.payload.targetAgent && spawnedIds.size > 0) {
|
|
536
|
+
expect(spawnedIds.has(d.payload.targetAgent)).toBe(true);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
});
|
|
540
|
+
|
|
541
|
+
it("state updates are emitted during the session", () => {
|
|
542
|
+
expect(mockServer.stateUpdates.length).toBeGreaterThan(0);
|
|
543
|
+
const idleUpdates = mockServer.stateUpdates.filter((u) => u.state === "idle");
|
|
544
|
+
console.log(
|
|
545
|
+
`[tier6] state updates: ${mockServer.stateUpdates.length} total, ${idleUpdates.length} idle`
|
|
546
|
+
);
|
|
547
|
+
expect(idleUpdates.length).toBeGreaterThan(0);
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
it("full event timeline is coherent", () => {
|
|
551
|
+
const timeline = [];
|
|
552
|
+
|
|
553
|
+
for (const a of mockServer.spawnedAgents) {
|
|
554
|
+
timeline.push({ type: "spawn", id: a.agentId, time: a._timestamp });
|
|
555
|
+
}
|
|
556
|
+
for (const u of mockServer.stateUpdates) {
|
|
557
|
+
timeline.push({ type: "state", state: u.state, time: u._timestamp });
|
|
558
|
+
}
|
|
559
|
+
for (const m of mockServer.sentMessages) {
|
|
560
|
+
timeline.push({ type: m.payload?.type || "message", time: m._timestamp });
|
|
561
|
+
}
|
|
562
|
+
for (const e of mockServer.callExtensions) {
|
|
563
|
+
timeline.push({ type: e.method, id: e.params?.agentId, time: e._timestamp });
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
timeline.sort((a, b) => a.time - b.time);
|
|
567
|
+
|
|
568
|
+
console.log("[tier6] event timeline:");
|
|
569
|
+
for (const evt of timeline) {
|
|
570
|
+
const label = evt.id ? `${evt.type} (${evt.id})` : evt.type;
|
|
571
|
+
console.log(` ${new Date(evt.time).toISOString()} ${label}`);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
expect(timeline.length).toBeGreaterThan(0);
|
|
575
|
+
});
|
|
576
|
+
}
|
|
577
|
+
);
|
|
578
|
+
|
|
579
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
580
|
+
// Group 5: Combined Flow — Inbound + Outbound in Single Session
|
|
581
|
+
//
|
|
582
|
+
// Sends a MAP message during the session, then the agent runs /swarm.
|
|
583
|
+
// Verifies both inbound (agent sees external message) and outbound
|
|
584
|
+
// (MAP server receives lifecycle events) work in a single session.
|
|
585
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
586
|
+
|
|
587
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE || !agentInboxAvailable)(
|
|
588
|
+
"tier6: combined inbound + outbound flow",
|
|
589
|
+
{ timeout: 600_000 },
|
|
590
|
+
() => {
|
|
591
|
+
let mockServer;
|
|
592
|
+
let workspace;
|
|
593
|
+
|
|
594
|
+
afterAll(async () => {
|
|
595
|
+
if (workspace) {
|
|
596
|
+
cleanupWorkspace(workspace.dir);
|
|
597
|
+
workspace.cleanup();
|
|
598
|
+
}
|
|
599
|
+
if (mockServer) await mockServer.stop();
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
it("agent processes external instructions and plugin emits full lifecycle", async () => {
|
|
603
|
+
mockServer = new MockMapServer();
|
|
604
|
+
await mockServer.start();
|
|
605
|
+
|
|
606
|
+
workspace = createWorkspace({
|
|
607
|
+
config: {
|
|
608
|
+
template: "gsd",
|
|
609
|
+
map: {
|
|
610
|
+
enabled: true,
|
|
611
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
612
|
+
sidecar: "session",
|
|
613
|
+
},
|
|
614
|
+
inbox: { enabled: true },
|
|
615
|
+
},
|
|
616
|
+
files: {
|
|
617
|
+
"project.md": "# Combined Flow Test Project\n",
|
|
618
|
+
},
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
// Inject external instruction as soon as sidecar connects
|
|
622
|
+
const messagesSent = waitFor(() => mockServer.connections.length > 0, 30_000).then(
|
|
623
|
+
async (connected) => {
|
|
624
|
+
if (!connected) return;
|
|
625
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
626
|
+
|
|
627
|
+
mockServer.sendToAll(
|
|
628
|
+
{
|
|
629
|
+
type: "text",
|
|
630
|
+
text: "External instruction: Create a file called combined-test.txt with the content 'Combined flow works'",
|
|
631
|
+
},
|
|
632
|
+
{ from: "external-system", to: { scope: "default" } }
|
|
633
|
+
);
|
|
634
|
+
}
|
|
635
|
+
);
|
|
636
|
+
|
|
637
|
+
const run = await runClaude(
|
|
638
|
+
"Check for external messages first. Then run /swarm gsd to handle any external requests you received.",
|
|
639
|
+
{
|
|
640
|
+
cwd: workspace.dir,
|
|
641
|
+
maxBudgetUsd: 10.0,
|
|
642
|
+
maxTurns: 50,
|
|
643
|
+
timeout: 300_000,
|
|
644
|
+
label: "tier6-combined",
|
|
645
|
+
}
|
|
646
|
+
);
|
|
647
|
+
|
|
648
|
+
await messagesSent;
|
|
649
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
650
|
+
|
|
651
|
+
const allText = run.stdout + run.stderr;
|
|
652
|
+
const hookOutput = getHookOutput(run.messages);
|
|
653
|
+
const toolNames = extractToolCalls(run.messages).map((tc) => tc.name);
|
|
654
|
+
console.log(`[tier6] combined tool calls: ${toolNames.join(", ")}`);
|
|
655
|
+
|
|
656
|
+
// ── Verify inbound: hook output or LLM saw the external message ───
|
|
657
|
+
const sawExternal = allText.includes("external-system") ||
|
|
658
|
+
allText.includes("combined-test") ||
|
|
659
|
+
hookOutput.includes("external-system") ||
|
|
660
|
+
hookOutput.includes("[MAP]");
|
|
661
|
+
console.log(`[tier6] combined — saw external: ${sawExternal}`);
|
|
662
|
+
expect(sawExternal).toBe(true);
|
|
663
|
+
|
|
664
|
+
// ── Verify outbound: MAP server received lifecycle events ─────────
|
|
665
|
+
// Should have connect + register at minimum
|
|
666
|
+
expect(mockServer.getByMethod("map/connect").length).toBeGreaterThan(0);
|
|
667
|
+
|
|
668
|
+
// If /swarm was invoked, should see spawns and task messages
|
|
669
|
+
const swarmCalled = findToolCalls(run.messages, "Skill").some(
|
|
670
|
+
(sc) => sc.skill === "swarm" || sc.skill === "claude-code-swarm:swarm"
|
|
671
|
+
);
|
|
672
|
+
if (swarmCalled) {
|
|
673
|
+
expect(mockServer.spawnedAgents.length).toBeGreaterThan(0);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
expect(mockServer.stateUpdates.length).toBeGreaterThan(0);
|
|
677
|
+
});
|
|
678
|
+
}
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
682
|
+
// Group 6: Sidecar Persistence Across Sessions
|
|
683
|
+
//
|
|
684
|
+
// Runs two sequential sessions and verifies the mock MAP server only
|
|
685
|
+
// saw one connection (the bootstrap's ensureSidecar finds the existing
|
|
686
|
+
// sidecar alive and doesn't restart it).
|
|
687
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
688
|
+
|
|
689
|
+
describe.skipIf(!LIVE || !CLI_AVAILABLE)(
|
|
690
|
+
"tier6: sidecar persistence across sessions",
|
|
691
|
+
{ timeout: 600_000 },
|
|
692
|
+
() => {
|
|
693
|
+
let mockServer;
|
|
694
|
+
let workspace;
|
|
695
|
+
|
|
696
|
+
afterAll(async () => {
|
|
697
|
+
if (workspace) {
|
|
698
|
+
cleanupWorkspace(workspace.dir);
|
|
699
|
+
workspace.cleanup();
|
|
700
|
+
}
|
|
701
|
+
if (mockServer) await mockServer.stop();
|
|
702
|
+
});
|
|
703
|
+
|
|
704
|
+
it("sidecar maintains single MAP connection across multiple sessions", async () => {
|
|
705
|
+
mockServer = new MockMapServer();
|
|
706
|
+
await mockServer.start();
|
|
707
|
+
|
|
708
|
+
workspace = createWorkspace({
|
|
709
|
+
config: {
|
|
710
|
+
template: "gsd",
|
|
711
|
+
map: {
|
|
712
|
+
enabled: true,
|
|
713
|
+
server: `ws://localhost:${mockServer.port}`,
|
|
714
|
+
sidecar: "session",
|
|
715
|
+
},
|
|
716
|
+
},
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
// Session 1
|
|
720
|
+
await runClaude("Say OK", {
|
|
721
|
+
cwd: workspace.dir,
|
|
722
|
+
maxBudgetUsd: 1.0,
|
|
723
|
+
maxTurns: 1,
|
|
724
|
+
timeout: 60_000,
|
|
725
|
+
label: "tier6-persist-1",
|
|
726
|
+
});
|
|
727
|
+
|
|
728
|
+
// Wait for events to settle
|
|
729
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
730
|
+
const connectsAfterSession1 = mockServer.getByMethod("map/connect").length;
|
|
731
|
+
console.log(`[tier6] connects after session 1: ${connectsAfterSession1}`);
|
|
732
|
+
|
|
733
|
+
// Verify at least one connection was established
|
|
734
|
+
expect(connectsAfterSession1).toBeGreaterThan(0);
|
|
735
|
+
|
|
736
|
+
// Session 2 — same workspace
|
|
737
|
+
await runClaude("Say OK again", {
|
|
738
|
+
cwd: workspace.dir,
|
|
739
|
+
maxBudgetUsd: 1.0,
|
|
740
|
+
maxTurns: 1,
|
|
741
|
+
timeout: 60_000,
|
|
742
|
+
label: "tier6-persist-2",
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
746
|
+
const connectsAfterSession2 = mockServer.getByMethod("map/connect").length;
|
|
747
|
+
console.log(`[tier6] connects after session 2: ${connectsAfterSession2}`);
|
|
748
|
+
|
|
749
|
+
// Each session gets its own session_id and therefore its own sidecar.
|
|
750
|
+
// With --no-session-persistence, we expect separate connections.
|
|
751
|
+
// But the sidecar should still function correctly for each session.
|
|
752
|
+
// We verify that connections DID happen and events were received.
|
|
753
|
+
expect(connectsAfterSession2).toBeGreaterThanOrEqual(connectsAfterSession1);
|
|
754
|
+
|
|
755
|
+
// Both sessions should have produced state updates
|
|
756
|
+
expect(mockServer.stateUpdates.length).toBeGreaterThan(0);
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
);
|