opencode-swarm-plugin 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.hive/issues.jsonl +16 -4
  2. package/.hive/memories.jsonl +274 -1
  3. package/.turbo/turbo-build.log +4 -4
  4. package/.turbo/turbo-test.log +318 -318
  5. package/CHANGELOG.md +113 -0
  6. package/bin/swarm.test.ts +106 -0
  7. package/bin/swarm.ts +413 -179
  8. package/dist/compaction-hook.d.ts +54 -4
  9. package/dist/compaction-hook.d.ts.map +1 -1
  10. package/dist/eval-capture.d.ts +122 -17
  11. package/dist/eval-capture.d.ts.map +1 -1
  12. package/dist/index.d.ts +1 -7
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +1278 -619
  15. package/dist/planning-guardrails.d.ts +121 -0
  16. package/dist/planning-guardrails.d.ts.map +1 -1
  17. package/dist/plugin.d.ts +9 -9
  18. package/dist/plugin.d.ts.map +1 -1
  19. package/dist/plugin.js +1283 -329
  20. package/dist/schemas/task.d.ts +0 -1
  21. package/dist/schemas/task.d.ts.map +1 -1
  22. package/dist/swarm-decompose.d.ts +0 -8
  23. package/dist/swarm-decompose.d.ts.map +1 -1
  24. package/dist/swarm-orchestrate.d.ts.map +1 -1
  25. package/dist/swarm-prompts.d.ts +0 -4
  26. package/dist/swarm-prompts.d.ts.map +1 -1
  27. package/dist/swarm-review.d.ts.map +1 -1
  28. package/dist/swarm.d.ts +0 -6
  29. package/dist/swarm.d.ts.map +1 -1
  30. package/evals/README.md +38 -0
  31. package/evals/coordinator-session.eval.ts +154 -0
  32. package/evals/fixtures/coordinator-sessions.ts +328 -0
  33. package/evals/lib/data-loader.ts +69 -0
  34. package/evals/scorers/coordinator-discipline.evalite-test.ts +536 -0
  35. package/evals/scorers/coordinator-discipline.ts +315 -0
  36. package/evals/scorers/index.ts +12 -0
  37. package/examples/plugin-wrapper-template.ts +303 -4
  38. package/package.json +2 -2
  39. package/src/compaction-hook.test.ts +8 -1
  40. package/src/compaction-hook.ts +31 -21
  41. package/src/eval-capture.test.ts +390 -0
  42. package/src/eval-capture.ts +163 -4
  43. package/src/hive.integration.test.ts +148 -0
  44. package/src/hive.ts +89 -0
  45. package/src/index.ts +68 -1
  46. package/src/planning-guardrails.test.ts +387 -2
  47. package/src/planning-guardrails.ts +289 -0
  48. package/src/plugin.ts +10 -10
  49. package/src/swarm-decompose.test.ts +195 -0
  50. package/src/swarm-decompose.ts +72 -1
  51. package/src/swarm-orchestrate.ts +44 -0
  52. package/src/swarm-prompts.ts +20 -0
  53. package/src/swarm-review.integration.test.ts +24 -29
  54. package/src/swarm-review.ts +41 -0
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Coordinator Discipline Scorers - Evaluate coordinator behavior
3
+ *
4
+ * These scorers measure whether a coordinator follows the protocol:
5
+ * 1. Don't edit files directly (spawn workers)
6
+ * 2. Don't run tests directly (workers do verification)
7
+ * 3. Spawn workers for all subtasks
8
+ * 4. Review worker output before accepting
9
+ * 5. Minimize time to first spawn (don't overthink)
10
+ *
11
+ * Inputs: CoordinatorSession from eval-capture
12
+ */
13
+
14
+ import { createScorer } from "evalite";
15
+ import type { CoordinatorSession } from "../../src/eval-capture.js";
16
+
17
+ /**
18
+ * Violation Count Scorer
19
+ *
20
+ * Counts VIOLATION events in the session.
21
+ * Each violation reduces score by 0.2.
22
+ *
23
+ * Violations tracked:
24
+ * - coordinator_edited_file (should spawn worker instead)
25
+ * - coordinator_ran_tests (workers do verification)
26
+ * - coordinator_reserved_files (only workers reserve)
27
+ * - no_worker_spawned (subtask exists but no worker)
28
+ *
29
+ * Score: 1.0 - (0.2 * violation_count), floored at 0.0
30
+ */
31
+ export const violationCount = createScorer({
32
+ name: "Violation Count",
33
+ description: "Coordinator followed protocol (no direct edits, tests, or reservations)",
34
+ scorer: ({ output }) => {
35
+ try {
36
+ const session = JSON.parse(String(output)) as CoordinatorSession;
37
+
38
+ // Count violations
39
+ const violations = session.events.filter(
40
+ (e) => e.event_type === "VIOLATION"
41
+ );
42
+
43
+ const count = violations.length;
44
+ const score = Math.max(0, 1.0 - count * 0.2);
45
+
46
+ if (count === 0) {
47
+ return {
48
+ score: 1.0,
49
+ message: "Perfect - 0 violations",
50
+ };
51
+ }
52
+
53
+ return {
54
+ score,
55
+ message: `${count} violations detected`,
56
+ };
57
+ } catch (error) {
58
+ return {
59
+ score: 0,
60
+ message: `Failed to parse CoordinatorSession: ${error}`,
61
+ };
62
+ }
63
+ },
64
+ });
65
+
66
+ /**
67
+ * Spawn Efficiency Scorer
68
+ *
69
+ * Measures whether workers were spawned for all subtasks.
70
+ * Coordinators should delegate work, not do it themselves.
71
+ *
72
+ * Score: workers_spawned / subtasks_planned
73
+ */
74
+ export const spawnEfficiency = createScorer({
75
+ name: "Spawn Efficiency",
76
+ description: "Workers spawned for all subtasks (delegation ratio)",
77
+ scorer: ({ output }) => {
78
+ try {
79
+ const session = JSON.parse(String(output)) as CoordinatorSession;
80
+
81
+ // Find decomposition_complete event (has subtask count)
82
+ const decomp = session.events.find(
83
+ (e) =>
84
+ e.event_type === "DECISION" &&
85
+ e.decision_type === "decomposition_complete"
86
+ );
87
+
88
+ if (!decomp) {
89
+ return {
90
+ score: 0,
91
+ message: "No decomposition event found",
92
+ };
93
+ }
94
+
95
+ const subtaskCount = (decomp.payload as { subtask_count?: number })?.subtask_count || 0;
96
+
97
+ if (subtaskCount === 0) {
98
+ return {
99
+ score: 0,
100
+ message: "No subtasks planned",
101
+ };
102
+ }
103
+
104
+ // Count worker_spawned events
105
+ const spawned = session.events.filter(
106
+ (e) =>
107
+ e.event_type === "DECISION" && e.decision_type === "worker_spawned"
108
+ ).length;
109
+
110
+ const score = spawned / subtaskCount;
111
+
112
+ return {
113
+ score,
114
+ message: `${spawned}/${subtaskCount} workers spawned (${(score * 100).toFixed(0)}%)`,
115
+ };
116
+ } catch (error) {
117
+ return {
118
+ score: 0,
119
+ message: `Failed to parse CoordinatorSession: ${error}`,
120
+ };
121
+ }
122
+ },
123
+ });
124
+
125
+ /**
126
+ * Review Thoroughness Scorer
127
+ *
128
+ * Measures whether coordinator reviewed worker output.
129
+ * Should have review_completed events for all finished subtasks.
130
+ *
131
+ * Score: reviews_completed / workers_finished
132
+ */
133
+ export const reviewThoroughness = createScorer({
134
+ name: "Review Thoroughness",
135
+ description: "Coordinator reviewed all worker output",
136
+ scorer: ({ output }) => {
137
+ try {
138
+ const session = JSON.parse(String(output)) as CoordinatorSession;
139
+
140
+ // Count finished workers (subtask_success or subtask_failed)
141
+ const finished = session.events.filter(
142
+ (e) =>
143
+ e.event_type === "OUTCOME" &&
144
+ (e.outcome_type === "subtask_success" ||
145
+ e.outcome_type === "subtask_failed")
146
+ ).length;
147
+
148
+ if (finished === 0) {
149
+ return {
150
+ score: 1.0,
151
+ message: "No finished workers to review",
152
+ };
153
+ }
154
+
155
+ // Count review_completed events
156
+ const reviewed = session.events.filter(
157
+ (e) =>
158
+ e.event_type === "DECISION" && e.decision_type === "review_completed"
159
+ ).length;
160
+
161
+ const score = reviewed / finished;
162
+
163
+ return {
164
+ score,
165
+ message: `${reviewed}/${finished} workers reviewed (${(score * 100).toFixed(0)}%)`,
166
+ };
167
+ } catch (error) {
168
+ return {
169
+ score: 0,
170
+ message: `Failed to parse CoordinatorSession: ${error}`,
171
+ };
172
+ }
173
+ },
174
+ });
175
+
176
+ /**
177
+ * Time to First Spawn Scorer
178
+ *
179
+ * Measures how fast the coordinator spawned the first worker.
180
+ * Overthinking and perfectionism delays workers and blocks progress.
181
+ *
182
+ * Normalization:
183
+ * - < 60s: 1.0 (excellent)
184
+ * - 60-300s: linear decay to 0.5
185
+ * - > 300s: 0.0 (way too slow)
186
+ *
187
+ * Score: normalized to 0-1 (faster is better)
188
+ */
189
+ export const timeToFirstSpawn = createScorer({
190
+ name: "Time to First Spawn",
191
+ description: "Coordinator spawned workers quickly (no overthinking)",
192
+ scorer: ({ output }) => {
193
+ try {
194
+ const session = JSON.parse(String(output)) as CoordinatorSession;
195
+
196
+ // Find decomposition_complete event
197
+ const decomp = session.events.find(
198
+ (e) =>
199
+ e.event_type === "DECISION" &&
200
+ e.decision_type === "decomposition_complete"
201
+ );
202
+
203
+ if (!decomp) {
204
+ return {
205
+ score: 0,
206
+ message: "No decomposition event found",
207
+ };
208
+ }
209
+
210
+ // Find first worker_spawned event
211
+ const firstSpawn = session.events.find(
212
+ (e) =>
213
+ e.event_type === "DECISION" && e.decision_type === "worker_spawned"
214
+ );
215
+
216
+ if (!firstSpawn) {
217
+ return {
218
+ score: 0,
219
+ message: "No worker spawned",
220
+ };
221
+ }
222
+
223
+ // Calculate time delta
224
+ const decompTime = new Date(decomp.timestamp).getTime();
225
+ const spawnTime = new Date(firstSpawn.timestamp).getTime();
226
+ const deltaMs = spawnTime - decompTime;
227
+
228
+ // Normalize: < 60s = 1.0, > 300s = 0.0, linear in between
229
+ const EXCELLENT_MS = 60_000;
230
+ const POOR_MS = 300_000;
231
+
232
+ let score: number;
233
+ if (deltaMs < EXCELLENT_MS) {
234
+ score = 1.0;
235
+ } else if (deltaMs > POOR_MS) {
236
+ score = 0.0;
237
+ } else {
238
+ // Linear decay from 1.0 to 0.0
239
+ score = 1.0 - (deltaMs - EXCELLENT_MS) / (POOR_MS - EXCELLENT_MS);
240
+ }
241
+
242
+ const seconds = Math.round(deltaMs / 1000);
243
+
244
+ return {
245
+ score,
246
+ message: `First spawn after ${deltaMs}ms (${seconds}s)`,
247
+ };
248
+ } catch (error) {
249
+ return {
250
+ score: 0,
251
+ message: `Failed to parse CoordinatorSession: ${error}`,
252
+ };
253
+ }
254
+ },
255
+ });
256
+
257
+ /**
258
+ * Overall Discipline Scorer
259
+ *
260
+ * Weighted composite of all coordinator discipline metrics.
261
+ *
262
+ * Weights:
263
+ * - Violations: 30% (most critical - breaking protocol)
264
+ * - Spawn efficiency: 25% (delegation is key)
265
+ * - Review thoroughness: 25% (quality gate)
266
+ * - Time to first spawn: 20% (bias toward action)
267
+ *
268
+ * Score: 0.0 to 1.0
269
+ */
270
+ export const overallDiscipline = createScorer({
271
+ name: "Overall Coordinator Discipline",
272
+ description: "Composite score for coordinator protocol adherence",
273
+ scorer: ({ output, expected }) => {
274
+ try {
275
+ // Run all scorers
276
+ const scores = {
277
+ violations: violationCount.scorer({ output, expected }),
278
+ spawn: spawnEfficiency.scorer({ output, expected }),
279
+ review: reviewThoroughness.scorer({ output, expected }),
280
+ speed: timeToFirstSpawn.scorer({ output, expected }),
281
+ };
282
+
283
+ // Weighted average
284
+ const weights = {
285
+ violations: 0.3,
286
+ spawn: 0.25,
287
+ review: 0.25,
288
+ speed: 0.2,
289
+ };
290
+
291
+ const totalScore =
292
+ scores.violations.score * weights.violations +
293
+ scores.spawn.score * weights.spawn +
294
+ scores.review.score * weights.review +
295
+ scores.speed.score * weights.speed;
296
+
297
+ const details = [
298
+ `Violations: ${(scores.violations.score * 100).toFixed(0)}%`,
299
+ `Spawn: ${(scores.spawn.score * 100).toFixed(0)}%`,
300
+ `Review: ${(scores.review.score * 100).toFixed(0)}%`,
301
+ `Speed: ${(scores.speed.score * 100).toFixed(0)}%`,
302
+ ].join(", ");
303
+
304
+ return {
305
+ score: totalScore,
306
+ message: `Overall: ${(totalScore * 100).toFixed(0)}% (${details})`,
307
+ };
308
+ } catch (error) {
309
+ return {
310
+ score: 0,
311
+ message: `Failed to compute composite score: ${error}`,
312
+ };
313
+ }
314
+ },
315
+ });
@@ -78,6 +78,18 @@ export {
78
78
  compactionQuality,
79
79
  } from "./compaction-scorers.js";
80
80
 
81
+ // ============================================================================
82
+ // Coordinator discipline scorers
83
+ // ============================================================================
84
+
85
+ export {
86
+ violationCount,
87
+ spawnEfficiency,
88
+ reviewThoroughness,
89
+ timeToFirstSpawn,
90
+ overallDiscipline,
91
+ } from "./coordinator-discipline.js";
92
+
81
93
  /**
82
94
  * Checks that subtasks cover the full task scope
83
95
  *
@@ -14,6 +14,7 @@
14
14
  * - SWARM_PROJECT_DIR: Project directory (critical for database path)
15
15
  */
16
16
  import type { Plugin, PluginInput, Hooks } from "@opencode-ai/plugin";
17
+ import type { ToolPart } from "@opencode-ai/sdk";
17
18
  import { tool } from "@opencode-ai/plugin";
18
19
  import { spawn } from "child_process";
19
20
  import { appendFileSync, mkdirSync, existsSync } from "node:fs";
@@ -68,6 +69,10 @@ function logCompaction(
68
69
  // This is CRITICAL: without it, the CLI uses process.cwd() which may be wrong
69
70
  let projectDirectory: string = process.cwd();
70
71
 
72
+ // Module-level SDK client - set during plugin initialization
73
+ // Used for scanning session messages during compaction
74
+ let sdkClient: any = null;
75
+
71
76
  // =============================================================================
72
77
  // CLI Execution Helper
73
78
  // =============================================================================
@@ -1039,7 +1044,9 @@ async function querySwarmState(sessionID: string): Promise<SwarmStateSnapshot> {
1039
1044
  let cells: any[] = [];
1040
1045
  if (cellsResult.exitCode === 0) {
1041
1046
  try {
1042
- cells = JSON.parse(cellsResult.stdout);
1047
+ const parsed = JSON.parse(cellsResult.stdout);
1048
+ // Handle wrapped response: { success: true, data: [...] }
1049
+ cells = Array.isArray(parsed) ? parsed : (parsed?.data ?? []);
1043
1050
  } catch (parseErr) {
1044
1051
  logCompaction("error", "query_swarm_state_parse_failed", {
1045
1052
  session_id: sessionID,
@@ -1175,7 +1182,7 @@ async function generateCompactionPrompt(
1175
1182
  snapshot: SwarmStateSnapshot,
1176
1183
  ): Promise<string | null> {
1177
1184
  const startTime = Date.now();
1178
- const liteModel = process.env.OPENCODE_LITE_MODEL || "claude-3-5-haiku-20241022";
1185
+ const liteModel = process.env.OPENCODE_LITE_MODEL || "__SWARM_LITE_MODEL__";
1179
1186
 
1180
1187
  logCompaction("debug", "generate_compaction_prompt_start", {
1181
1188
  session_id: snapshot.sessionID,
@@ -1324,6 +1331,226 @@ Keep the prompt concise but actionable. Use actual data from the snapshot, not p
1324
1331
  }
1325
1332
  }
1326
1333
 
1334
+ /**
1335
+ * Session message scan result
1336
+ */
1337
+ interface SessionScanResult {
1338
+ messageCount: number;
1339
+ toolCalls: Array<{
1340
+ toolName: string;
1341
+ args: Record<string, unknown>;
1342
+ output?: string;
1343
+ }>;
1344
+ swarmDetected: boolean;
1345
+ reasons: string[];
1346
+ }
1347
+
1348
+ /**
1349
+ * Scan session messages for swarm tool calls
1350
+ *
1351
+ * Uses SDK client to fetch messages and look for swarm activity.
1352
+ * This can detect swarm work even if no cells exist yet.
1353
+ */
1354
+ async function scanSessionMessages(sessionID: string): Promise<SessionScanResult> {
1355
+ const startTime = Date.now();
1356
+ const result: SessionScanResult = {
1357
+ messageCount: 0,
1358
+ toolCalls: [],
1359
+ swarmDetected: false,
1360
+ reasons: [],
1361
+ };
1362
+
1363
+ logCompaction("debug", "session_scan_start", {
1364
+ session_id: sessionID,
1365
+ has_sdk_client: !!sdkClient,
1366
+ });
1367
+
1368
+ if (!sdkClient) {
1369
+ logCompaction("warn", "session_scan_no_sdk_client", {
1370
+ session_id: sessionID,
1371
+ });
1372
+ return result;
1373
+ }
1374
+
1375
+ try {
1376
+ // Fetch session messages
1377
+ const messagesStart = Date.now();
1378
+ const rawResponse = await sdkClient.session.messages({ path: { id: sessionID } });
1379
+ const messagesDuration = Date.now() - messagesStart;
1380
+
1381
+ // Log the RAW response to understand its shape
1382
+ logCompaction("debug", "session_scan_raw_response", {
1383
+ session_id: sessionID,
1384
+ response_type: typeof rawResponse,
1385
+ is_array: Array.isArray(rawResponse),
1386
+ is_null: rawResponse === null,
1387
+ is_undefined: rawResponse === undefined,
1388
+ keys: rawResponse && typeof rawResponse === 'object' ? Object.keys(rawResponse) : [],
1389
+ raw_preview: JSON.stringify(rawResponse)?.slice(0, 500),
1390
+ });
1391
+
1392
+ // The response might be wrapped - check common patterns
1393
+ const messages = Array.isArray(rawResponse)
1394
+ ? rawResponse
1395
+ : rawResponse?.data
1396
+ ? rawResponse.data
1397
+ : rawResponse?.messages
1398
+ ? rawResponse.messages
1399
+ : rawResponse?.items
1400
+ ? rawResponse.items
1401
+ : [];
1402
+
1403
+ result.messageCount = messages?.length ?? 0;
1404
+
1405
+ logCompaction("debug", "session_scan_messages_fetched", {
1406
+ session_id: sessionID,
1407
+ duration_ms: messagesDuration,
1408
+ message_count: result.messageCount,
1409
+ extraction_method: Array.isArray(rawResponse) ? 'direct_array' : rawResponse?.data ? 'data_field' : rawResponse?.messages ? 'messages_field' : rawResponse?.items ? 'items_field' : 'fallback_empty',
1410
+ });
1411
+
1412
+ if (!Array.isArray(messages) || messages.length === 0) {
1413
+ logCompaction("debug", "session_scan_no_messages", {
1414
+ session_id: sessionID,
1415
+ });
1416
+ return result;
1417
+ }
1418
+
1419
+ // Swarm-related tool patterns
1420
+ const swarmTools = [
1421
+ // High confidence - active swarm coordination
1422
+ "hive_create_epic",
1423
+ "swarm_decompose",
1424
+ "swarm_spawn_subtask",
1425
+ "swarm_complete",
1426
+ "swarmmail_init",
1427
+ "swarmmail_reserve",
1428
+ // Medium confidence - swarm activity
1429
+ "hive_start",
1430
+ "hive_close",
1431
+ "swarm_status",
1432
+ "swarm_progress",
1433
+ "swarmmail_send",
1434
+ // Low confidence - possible swarm
1435
+ "hive_create",
1436
+ "hive_query",
1437
+ ];
1438
+
1439
+ const highConfidenceTools = new Set([
1440
+ "hive_create_epic",
1441
+ "swarm_decompose",
1442
+ "swarm_spawn_subtask",
1443
+ "swarmmail_init",
1444
+ "swarmmail_reserve",
1445
+ ]);
1446
+
1447
+ // Scan messages for tool calls
1448
+ let swarmToolCount = 0;
1449
+ let highConfidenceCount = 0;
1450
+
1451
+ // Debug: collect part types to understand message structure
1452
+ const partTypeCounts: Record<string, number> = {};
1453
+ let messagesWithParts = 0;
1454
+ let messagesWithoutParts = 0;
1455
+ let samplePartTypes: string[] = [];
1456
+
1457
+ for (const message of messages) {
1458
+ if (!message.parts || !Array.isArray(message.parts)) {
1459
+ messagesWithoutParts++;
1460
+ continue;
1461
+ }
1462
+ messagesWithParts++;
1463
+
1464
+ for (const part of message.parts) {
1465
+ const partType = part.type || "unknown";
1466
+ partTypeCounts[partType] = (partTypeCounts[partType] || 0) + 1;
1467
+
1468
+ // Collect first 10 unique part types for debugging
1469
+ if (samplePartTypes.length < 10 && !samplePartTypes.includes(partType)) {
1470
+ samplePartTypes.push(partType);
1471
+ }
1472
+
1473
+ // Check if this is a tool call part
1474
+ // OpenCode SDK: ToolPart has type="tool", tool=<string name>, state={...}
1475
+ if (part.type === "tool") {
1476
+ const toolPart = part as ToolPart;
1477
+ const toolName = toolPart.tool; // tool name is a string directly
1478
+
1479
+ if (toolName && swarmTools.includes(toolName)) {
1480
+ swarmToolCount++;
1481
+
1482
+ if (highConfidenceTools.has(toolName)) {
1483
+ highConfidenceCount++;
1484
+ }
1485
+
1486
+ // Extract args/output from state if available
1487
+ const state = toolPart.state;
1488
+ const args = state && "input" in state ? state.input : {};
1489
+ const output = state && "output" in state ? state.output : undefined;
1490
+
1491
+ result.toolCalls.push({
1492
+ toolName,
1493
+ args,
1494
+ output,
1495
+ });
1496
+
1497
+ logCompaction("debug", "session_scan_tool_found", {
1498
+ session_id: sessionID,
1499
+ tool_name: toolName,
1500
+ is_high_confidence: highConfidenceTools.has(toolName),
1501
+ });
1502
+ }
1503
+ }
1504
+ }
1505
+ }
1506
+
1507
+ // Determine if swarm detected based on tool calls
1508
+ if (highConfidenceCount > 0) {
1509
+ result.swarmDetected = true;
1510
+ result.reasons.push(`${highConfidenceCount} high-confidence swarm tools (${Array.from(new Set(result.toolCalls.filter(tc => highConfidenceTools.has(tc.toolName)).map(tc => tc.toolName))).join(", ")})`);
1511
+ }
1512
+
1513
+ if (swarmToolCount > 0 && !result.swarmDetected) {
1514
+ result.swarmDetected = true;
1515
+ result.reasons.push(`${swarmToolCount} swarm-related tools used`);
1516
+ }
1517
+
1518
+ const totalDuration = Date.now() - startTime;
1519
+
1520
+ // Debug: log part type distribution to understand message structure
1521
+ logCompaction("debug", "session_scan_part_types", {
1522
+ session_id: sessionID,
1523
+ messages_with_parts: messagesWithParts,
1524
+ messages_without_parts: messagesWithoutParts,
1525
+ part_type_counts: partTypeCounts,
1526
+ sample_part_types: samplePartTypes,
1527
+ });
1528
+
1529
+ logCompaction("info", "session_scan_complete", {
1530
+ session_id: sessionID,
1531
+ duration_ms: totalDuration,
1532
+ message_count: result.messageCount,
1533
+ tool_call_count: result.toolCalls.length,
1534
+ swarm_tool_count: swarmToolCount,
1535
+ high_confidence_count: highConfidenceCount,
1536
+ swarm_detected: result.swarmDetected,
1537
+ reasons: result.reasons,
1538
+ unique_tools: Array.from(new Set(result.toolCalls.map(tc => tc.toolName))),
1539
+ });
1540
+
1541
+ return result;
1542
+ } catch (err) {
1543
+ const totalDuration = Date.now() - startTime;
1544
+ logCompaction("error", "session_scan_exception", {
1545
+ session_id: sessionID,
1546
+ error: err instanceof Error ? err.message : String(err),
1547
+ stack: err instanceof Error ? err.stack : undefined,
1548
+ duration_ms: totalDuration,
1549
+ });
1550
+ return result;
1551
+ }
1552
+ }
1553
+
1327
1554
  /**
1328
1555
  * Check for swarm sign - evidence a swarm passed through
1329
1556
  *
@@ -1647,13 +1874,18 @@ type ExtendedHooks = Hooks & {
1647
1874
  ) => Promise<void>;
1648
1875
  };
1649
1876
 
1650
- export const SwarmPlugin: Plugin = async (
1877
+ // NOTE: Only default export - named exports cause double registration!
1878
+ // OpenCode's plugin loader calls ALL exports as functions.
1879
+ const SwarmPlugin: Plugin = async (
1651
1880
  input: PluginInput,
1652
1881
  ): Promise<ExtendedHooks> => {
1653
1882
  // CRITICAL: Set project directory from OpenCode input
1654
1883
  // Without this, CLI uses wrong database path
1655
1884
  projectDirectory = input.directory;
1656
1885
 
1886
+ // Store SDK client for session message scanning during compaction
1887
+ sdkClient = input.client;
1888
+
1657
1889
  return {
1658
1890
  tool: {
1659
1891
  // Beads
@@ -1751,7 +1983,23 @@ export const SwarmPlugin: Plugin = async (
1751
1983
  });
1752
1984
 
1753
1985
  // =======================================================================
1754
- // STEP 1: Detect swarm state from hive
1986
+ // STEP 1: Scan session messages for swarm tool calls
1987
+ // =======================================================================
1988
+ const sessionScanStart = Date.now();
1989
+ const sessionScan = await scanSessionMessages(input.sessionID);
1990
+ const sessionScanDuration = Date.now() - sessionScanStart;
1991
+
1992
+ logCompaction("info", "session_scan_results", {
1993
+ session_id: input.sessionID,
1994
+ duration_ms: sessionScanDuration,
1995
+ message_count: sessionScan.messageCount,
1996
+ tool_call_count: sessionScan.toolCalls.length,
1997
+ swarm_detected_from_messages: sessionScan.swarmDetected,
1998
+ reasons: sessionScan.reasons,
1999
+ });
2000
+
2001
+ // =======================================================================
2002
+ // STEP 2: Detect swarm state from hive cells
1755
2003
  // =======================================================================
1756
2004
  const detectionStart = Date.now();
1757
2005
  const detection = await detectSwarm();
@@ -1766,6 +2014,57 @@ export const SwarmPlugin: Plugin = async (
1766
2014
  reason_count: detection.reasons.length,
1767
2015
  });
1768
2016
 
2017
+ // =======================================================================
2018
+ // STEP 3: Merge session scan with hive detection for final confidence
2019
+ // =======================================================================
2020
+ // If session messages show high-confidence swarm tools, boost confidence
2021
+ if (sessionScan.swarmDetected && sessionScan.reasons.some(r => r.includes("high-confidence"))) {
2022
+ if (detection.confidence === "none" || detection.confidence === "low") {
2023
+ detection.confidence = "high";
2024
+ detection.detected = true;
2025
+ detection.reasons.push(...sessionScan.reasons);
2026
+
2027
+ logCompaction("info", "confidence_boost_from_session_scan", {
2028
+ session_id: input.sessionID,
2029
+ original_confidence: detection.confidence,
2030
+ boosted_to: "high",
2031
+ session_reasons: sessionScan.reasons,
2032
+ });
2033
+ }
2034
+ } else if (sessionScan.swarmDetected) {
2035
+ // Medium boost for any swarm tools found
2036
+ if (detection.confidence === "none") {
2037
+ detection.confidence = "medium";
2038
+ detection.detected = true;
2039
+ detection.reasons.push(...sessionScan.reasons);
2040
+
2041
+ logCompaction("info", "confidence_boost_from_session_scan", {
2042
+ session_id: input.sessionID,
2043
+ original_confidence: "none",
2044
+ boosted_to: "medium",
2045
+ session_reasons: sessionScan.reasons,
2046
+ });
2047
+ } else if (detection.confidence === "low") {
2048
+ detection.confidence = "medium";
2049
+ detection.reasons.push(...sessionScan.reasons);
2050
+
2051
+ logCompaction("info", "confidence_boost_from_session_scan", {
2052
+ session_id: input.sessionID,
2053
+ original_confidence: "low",
2054
+ boosted_to: "medium",
2055
+ session_reasons: sessionScan.reasons,
2056
+ });
2057
+ }
2058
+ }
2059
+
2060
+ logCompaction("info", "final_swarm_detection", {
2061
+ session_id: input.sessionID,
2062
+ confidence: detection.confidence,
2063
+ detected: detection.detected,
2064
+ combined_reasons: detection.reasons,
2065
+ message_scan_contributed: sessionScan.swarmDetected,
2066
+ });
2067
+
1769
2068
  if (detection.confidence === "high" || detection.confidence === "medium") {
1770
2069
  // Definite or probable swarm - try LLM-powered compaction
1771
2070
  logCompaction("info", "swarm_detected_attempting_llm", {