opencode-swarm-plugin 0.40.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
  2. package/.hive/analysis/session-data-quality-audit.md +320 -0
  3. package/.hive/eval-results.json +481 -24
  4. package/.hive/issues.jsonl +67 -16
  5. package/.hive/memories.jsonl +159 -1
  6. package/.opencode/eval-history.jsonl +315 -0
  7. package/.turbo/turbo-build.log +5 -5
  8. package/CHANGELOG.md +165 -0
  9. package/README.md +2 -0
  10. package/SCORER-ANALYSIS.md +598 -0
  11. package/bin/eval-gate.test.ts +158 -0
  12. package/bin/eval-gate.ts +74 -0
  13. package/bin/swarm.serve.test.ts +46 -0
  14. package/bin/swarm.test.ts +661 -732
  15. package/bin/swarm.ts +335 -0
  16. package/dist/compaction-hook.d.ts +7 -5
  17. package/dist/compaction-hook.d.ts.map +1 -1
  18. package/dist/compaction-prompt-scoring.d.ts +1 -0
  19. package/dist/compaction-prompt-scoring.d.ts.map +1 -1
  20. package/dist/eval-runner.d.ts +134 -0
  21. package/dist/eval-runner.d.ts.map +1 -0
  22. package/dist/hive.d.ts.map +1 -1
  23. package/dist/index.d.ts +29 -0
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +99741 -58858
  26. package/dist/memory-tools.d.ts +70 -2
  27. package/dist/memory-tools.d.ts.map +1 -1
  28. package/dist/memory.d.ts +37 -0
  29. package/dist/memory.d.ts.map +1 -1
  30. package/dist/observability-tools.d.ts +64 -0
  31. package/dist/observability-tools.d.ts.map +1 -1
  32. package/dist/plugin.js +99356 -58318
  33. package/dist/swarm-orchestrate.d.ts.map +1 -1
  34. package/dist/swarm-prompts.d.ts +32 -1
  35. package/dist/swarm-prompts.d.ts.map +1 -1
  36. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
  37. package/evals/ARCHITECTURE.md +1189 -0
  38. package/evals/example.eval.ts +3 -4
  39. package/evals/fixtures/compaction-prompt-cases.ts +6 -0
  40. package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
  41. package/evals/scorers/coordinator-discipline.ts +0 -323
  42. package/evals/swarm-decomposition.eval.ts +4 -2
  43. package/package.json +4 -3
  44. package/src/compaction-prompt-scorers.test.ts +185 -9
  45. package/src/compaction-prompt-scoring.ts +7 -5
  46. package/src/eval-runner.test.ts +128 -1
  47. package/src/eval-runner.ts +46 -0
  48. package/src/hive.ts +43 -42
  49. package/src/memory-tools.test.ts +84 -0
  50. package/src/memory-tools.ts +68 -3
  51. package/src/memory.test.ts +2 -112
  52. package/src/memory.ts +88 -49
  53. package/src/observability-tools.test.ts +13 -0
  54. package/src/observability-tools.ts +277 -0
  55. package/src/swarm-orchestrate.test.ts +162 -0
  56. package/src/swarm-orchestrate.ts +7 -5
  57. package/src/swarm-prompts.test.ts +168 -4
  58. package/src/swarm-prompts.ts +228 -7
  59. package/.env +0 -2
  60. package/.turbo/turbo-test.log +0 -481
  61. package/.turbo/turbo-typecheck.log +0 -1
@@ -3,6 +3,15 @@
3
3
  *
4
4
  * TDD approach - tests written FIRST to define scorer behavior
5
5
  * Tests the PURE scoring functions (not evalite wrappers)
6
+ *
7
+ * **Case-Sensitivity Verification**:
8
+ * All tool name regexes MUST be case-insensitive (/i flag) because:
9
+ * - LLMs generate inconsistent casing (Edit vs edit, Read vs read)
10
+ * - Fixtures contain mixed case examples
11
+ * - Scoring must be robust to case variations
12
+ *
13
+ * Fixed in commit adding /i flags to Edit, Write, bash patterns.
14
+ * Tests added to prevent regression.
6
15
  */
7
16
 
8
17
  import { describe, expect, test } from "bun:test";
@@ -15,6 +24,109 @@ import {
15
24
  scorePostCompactionDiscipline,
16
25
  } from "./compaction-prompt-scoring.js";
17
26
 
27
+ describe("Case-Insensitive Tool Detection (Regression Prevention)", () => {
28
+ test("all scorers handle mixed-case tool names correctly", () => {
29
+ // Real-world example with mixed casing from LLM output
30
+ const prompt: CompactionPrompt = {
31
+ content: `┌─────────────────────────────────────────┐
32
+ │ YOU ARE THE COORDINATOR │
33
+ └─────────────────────────────────────────┘
34
+
35
+ You are coordinating epic mjkw81rkq4c.
36
+
37
+ ## IMMEDIATE ACTIONS
38
+
39
+ 1. swarm_status(epic_id='mjkw81rkq4c', project_key='/path')
40
+ 2. swarmmail_inbox()
41
+
42
+ ## FORBIDDEN TOOLS
43
+
44
+ NEVER use these tools - delegate to workers:
45
+ - edit (file modifications)
46
+ - write (file creation)
47
+ - BASH (shell commands for file mods)
48
+ - swarmmail_reserve (only workers)
49
+ - git commit (workers handle)
50
+
51
+ ALWAYS spawn workers for code changes.`,
52
+ };
53
+
54
+ // Epic ID detection should work
55
+ const epicResult = scoreEpicIdSpecificity(prompt);
56
+ expect(epicResult.score).toBe(1.0);
57
+
58
+ // Actionability should detect swarm_status
59
+ const actionResult = scoreActionability(prompt);
60
+ expect(actionResult.score).toBe(1.0);
61
+
62
+ // Coordinator identity should detect ASCII + NEVER/ALWAYS
63
+ const identityResult = scoreCoordinatorIdentity(prompt);
64
+ expect(identityResult.score).toBe(1.0);
65
+
66
+ // Forbidden tools should detect all 5 despite mixed case
67
+ const forbiddenResult = scoreForbiddenToolsPresent(prompt);
68
+ expect(forbiddenResult.score).toBe(1.0);
69
+ expect(forbiddenResult.message).toContain("All 5");
70
+
71
+ // Post-compaction discipline should detect swarm_status as first tool
72
+ const disciplineResult = scorePostCompactionDiscipline(prompt);
73
+ expect(disciplineResult.score).toBe(1.0);
74
+ });
75
+
76
+ test("forbidden tools scorer detects lowercase tool names", () => {
77
+ // Previously failed before /i flags were added
78
+ const prompt: CompactionPrompt = {
79
+ content: `Don't use: edit, write, bash, swarmmail_reserve, git commit`,
80
+ };
81
+
82
+ const result = scoreForbiddenToolsPresent(prompt);
83
+
84
+ // Should detect all 5 tools regardless of case
85
+ expect(result.score).toBe(1.0);
86
+ expect(result.message).toContain("All 5");
87
+ });
88
+
89
+ test("forbidden tools scorer detects UPPERCASE tool names", () => {
90
+ const prompt: CompactionPrompt = {
91
+ content: `Forbidden: EDIT, WRITE, BASH, swarmmail_reserve, git commit`,
92
+ };
93
+
94
+ const result = scoreForbiddenToolsPresent(prompt);
95
+
96
+ expect(result.score).toBe(1.0);
97
+ expect(result.message).toContain("All 5");
98
+ });
99
+
100
+ test("post-compaction discipline detects mixed-case first tools", () => {
101
+ const testCases = [
102
+ { tool: "EDIT", shouldPass: false },
103
+ { tool: "edit", shouldPass: false },
104
+ { tool: "Edit", shouldPass: false },
105
+ { tool: "WRITE", shouldPass: false },
106
+ { tool: "write", shouldPass: false },
107
+ { tool: "READ", shouldPass: false },
108
+ { tool: "read", shouldPass: false },
109
+ { tool: "swarm_status", shouldPass: true },
110
+ { tool: "SWARM_STATUS", shouldPass: true },
111
+ { tool: "swarmmail_inbox", shouldPass: true },
112
+ ];
113
+
114
+ for (const { tool, shouldPass } of testCases) {
115
+ const prompt: CompactionPrompt = {
116
+ content: `1. ${tool}()`,
117
+ };
118
+
119
+ const result = scorePostCompactionDiscipline(prompt);
120
+
121
+ if (shouldPass) {
122
+ expect(result.score).toBe(1.0);
123
+ } else {
124
+ expect(result.score).toBe(0.0);
125
+ }
126
+ }
127
+ });
128
+ });
129
+
18
130
  describe("epicIdSpecificity scorer", () => {
19
131
  test("scores 1.0 for real epic IDs", () => {
20
132
  const prompt: CompactionPrompt = {
@@ -173,16 +285,17 @@ describe("forbiddenToolsPresent scorer", () => {
173
285
  - Edit (use swarm_spawn_subtask)
174
286
  - Write (use swarm_spawn_subtask)
175
287
  - swarmmail_reserve (only workers reserve)
176
- - bash with git commit (workers commit)`,
288
+ - git commit (workers commit)
289
+ - bash (for file modifications)`,
177
290
  };
178
291
 
179
292
  const result = scoreForbiddenToolsPresent(prompt);
180
293
 
181
294
  expect(result.score).toBe(1.0);
182
- expect(result.message).toContain("All 4 forbidden tools");
295
+ expect(result.message).toContain("All 5 forbidden tools");
183
296
  });
184
297
 
185
- test("scores 0.75 when 3 out of 4 tools listed", () => {
298
+ test("scores 0.6 when 3 out of 5 tools listed", () => {
186
299
  const prompt: CompactionPrompt = {
187
300
  content: `🚫 FORBIDDEN TOOLS:
188
301
  - Edit
@@ -192,19 +305,19 @@ describe("forbiddenToolsPresent scorer", () => {
192
305
 
193
306
  const result = scoreForbiddenToolsPresent(prompt);
194
307
 
195
- expect(result.score).toBe(0.75);
196
- expect(result.message).toContain("3/4");
308
+ expect(result.score).toBe(0.6);
309
+ expect(result.message).toContain("3/5");
197
310
  });
198
311
 
199
- test("scores 0.5 when 2 out of 4 tools listed", () => {
312
+ test("scores 0.4 when 2 out of 5 tools listed", () => {
200
313
  const prompt: CompactionPrompt = {
201
314
  content: `Don't use Edit or Write directly.`,
202
315
  };
203
316
 
204
317
  const result = scoreForbiddenToolsPresent(prompt);
205
318
 
206
- expect(result.score).toBe(0.5);
207
- expect(result.message).toContain("2/4");
319
+ expect(result.score).toBe(0.4);
320
+ expect(result.message).toContain("2/5");
208
321
  });
209
322
 
210
323
  test("scores 0.0 when no forbidden tools listed", () => {
@@ -215,7 +328,34 @@ describe("forbiddenToolsPresent scorer", () => {
215
328
  const result = scoreForbiddenToolsPresent(prompt);
216
329
 
217
330
  expect(result.score).toBe(0.0);
218
- expect(result.message).toContain("0/4");
331
+ expect(result.message).toContain("0/5");
332
+ });
333
+
334
+ test("scores 1.0 with lowercase forbidden tools (case-insensitive)", () => {
335
+ const prompt: CompactionPrompt = {
336
+ content: `🚫 FORBIDDEN TOOLS - NEVER call these:
337
+ - edit (use swarm_spawn_subtask)
338
+ - write (use swarm_spawn_subtask)
339
+ - swarmmail_reserve (only workers reserve)
340
+ - git commit (workers commit)
341
+ - bash (for file modifications)`,
342
+ };
343
+
344
+ const result = scoreForbiddenToolsPresent(prompt);
345
+
346
+ expect(result.score).toBe(1.0);
347
+ expect(result.message).toContain("All 5 forbidden tools");
348
+ });
349
+
350
+ test("scores correctly with mixed case forbidden tools", () => {
351
+ const prompt: CompactionPrompt = {
352
+ content: `Avoid: edit, Write, BASH`,
353
+ };
354
+
355
+ const result = scoreForbiddenToolsPresent(prompt);
356
+
357
+ expect(result.score).toBe(0.6);
358
+ expect(result.message).toContain("3/5");
219
359
  });
220
360
  });
221
361
 
@@ -296,4 +436,40 @@ describe("postCompactionDiscipline scorer", () => {
296
436
  expect(result.score).toBe(0.0);
297
437
  expect(result.message).toContain("No tool");
298
438
  });
439
+
440
+ test("scores 0.0 when first tool is lowercase 'read' (case-insensitive)", () => {
441
+ const prompt: CompactionPrompt = {
442
+ content: `1. read(file='src/index.ts')
443
+ 2. swarm_status()`,
444
+ };
445
+
446
+ const result = scorePostCompactionDiscipline(prompt);
447
+
448
+ expect(result.score).toBe(0.0);
449
+ expect(result.message).toContain("read");
450
+ });
451
+
452
+ test("scores 0.0 when first tool is lowercase 'edit'", () => {
453
+ const prompt: CompactionPrompt = {
454
+ content: `1. edit(file='src/auth.ts', ...)
455
+ 2. swarm_status()`,
456
+ };
457
+
458
+ const result = scorePostCompactionDiscipline(prompt);
459
+
460
+ expect(result.score).toBe(0.0);
461
+ expect(result.message).toContain("edit");
462
+ });
463
+
464
+ test("scores 0.0 when first tool is lowercase 'write'", () => {
465
+ const prompt: CompactionPrompt = {
466
+ content: `1. write(file='README.md', content='...')
467
+ 2. swarm_status()`,
468
+ };
469
+
470
+ const result = scorePostCompactionDiscipline(prompt);
471
+
472
+ expect(result.score).toBe(0.0);
473
+ expect(result.message).toContain("write");
474
+ });
299
475
  });
@@ -203,6 +203,7 @@ export function scoreCoordinatorIdentity(
203
203
  * 2. Write
204
204
  * 3. swarmmail_reserve (only workers reserve)
205
205
  * 4. git commit (workers commit)
206
+ * 5. bash (for file modifications)
206
207
  *
207
208
  * @returns ratio of forbidden tools mentioned (0.0 to 1.0)
208
209
  */
@@ -211,10 +212,11 @@ export function scoreForbiddenToolsPresent(
211
212
  ): ScorerResult {
212
213
  // Check for forbidden tool mentions
213
214
  const forbiddenTools = [
214
- /\bEdit\b/,
215
- /\bWrite\b/,
215
+ /\bEdit\b/i,
216
+ /\bWrite\b/i,
216
217
  /swarmmail_reserve/,
217
218
  /git commit/,
219
+ /\bbash\b/i,
218
220
  ];
219
221
 
220
222
  const foundTools = forbiddenTools.filter((pattern) =>
@@ -226,20 +228,20 @@ export function scoreForbiddenToolsPresent(
226
228
  if (score === 1.0) {
227
229
  return {
228
230
  score: 1.0,
229
- message: "All 4 forbidden tools listed",
231
+ message: "All 5 forbidden tools listed",
230
232
  };
231
233
  }
232
234
 
233
235
  if (score === 0) {
234
236
  return {
235
237
  score: 0.0,
236
- message: "No forbidden tools listed (0/4)",
238
+ message: "No forbidden tools listed (0/5)",
237
239
  };
238
240
  }
239
241
 
240
242
  return {
241
243
  score,
242
- message: `${foundTools.length}/4 forbidden tools listed`,
244
+ message: `${foundTools.length}/5 forbidden tools listed`,
243
245
  };
244
246
  }
245
247
 
@@ -4,9 +4,11 @@
4
4
  * TDD: These tests MUST fail initially, then pass after implementation.
5
5
  */
6
6
 
7
- import { describe, test, expect, beforeAll } from "bun:test";
7
+ import { describe, test, expect, beforeAll, afterEach } from "bun:test";
8
8
  import { runEvals } from "./eval-runner";
9
9
  import path from "node:path";
10
+ import fs from "node:fs";
11
+ import { getEvalHistoryPath } from "./eval-history";
10
12
 
11
13
  // Use project root for all tests
12
14
  const PROJECT_ROOT = path.resolve(import.meta.dir, "..");
@@ -93,4 +95,129 @@ describe("runEvals", () => {
93
95
  expect(result.totalSuites).toBe(0);
94
96
  expect(result.suites).toEqual([]);
95
97
  }, 10000);
98
+
99
+ test("records eval run to history after execution", async () => {
100
+ // Clean up any existing history before test
101
+ const historyPath = getEvalHistoryPath(PROJECT_ROOT);
102
+ const historyBackup = historyPath + ".backup";
103
+
104
+ // Backup existing history
105
+ if (fs.existsSync(historyPath)) {
106
+ fs.copyFileSync(historyPath, historyBackup);
107
+ }
108
+
109
+ try {
110
+ // Remove history file to get clean state
111
+ if (fs.existsSync(historyPath)) {
112
+ fs.unlinkSync(historyPath);
113
+ }
114
+
115
+ // Run evals
116
+ const result = await runEvals({
117
+ cwd: PROJECT_ROOT,
118
+ suiteFilter: "example",
119
+ });
120
+
121
+ // Should have succeeded
122
+ expect(result.success).toBe(true);
123
+ expect(result.suites.length).toBeGreaterThan(0);
124
+
125
+ // History file should have been created
126
+ expect(fs.existsSync(historyPath)).toBe(true);
127
+
128
+ // Read history file
129
+ const historyContent = fs.readFileSync(historyPath, "utf-8");
130
+ const lines = historyContent.trim().split("\n");
131
+
132
+ // Should have one line per suite
133
+ expect(lines.length).toBe(result.suites.length);
134
+
135
+ // Parse first line and verify structure
136
+ const firstRecord = JSON.parse(lines[0]);
137
+
138
+ // Verify structure has all required fields
139
+ expect(typeof firstRecord.timestamp).toBe("string");
140
+ expect(typeof firstRecord.eval_name).toBe("string");
141
+ expect(typeof firstRecord.score).toBe("number");
142
+ expect(typeof firstRecord.run_count).toBe("number");
143
+
144
+ // Verify eval_name matches suite name
145
+ expect(firstRecord.eval_name).toBe(result.suites[0].name);
146
+
147
+ // Verify score matches suite averageScore
148
+ expect(firstRecord.score).toBe(result.suites[0].averageScore);
149
+
150
+ // First run should have run_count = 1
151
+ expect(firstRecord.run_count).toBe(1);
152
+ } finally {
153
+ // Restore backup
154
+ if (fs.existsSync(historyBackup)) {
155
+ fs.copyFileSync(historyBackup, historyPath);
156
+ fs.unlinkSync(historyBackup);
157
+ }
158
+ }
159
+ }, 30000);
160
+
161
+ test("checks gates for each suite after recording", async () => {
162
+ const result = await runEvals({
163
+ cwd: PROJECT_ROOT,
164
+ suiteFilter: "example",
165
+ });
166
+
167
+ expect(result.success).toBe(true);
168
+ expect(result.gateResults).toBeDefined();
169
+ expect(Array.isArray(result.gateResults)).toBe(true);
170
+
171
+ // Should have gate result for each suite
172
+ expect(result.gateResults?.length).toBe(result.suites.length);
173
+
174
+ // Each gate result should have required fields
175
+ if (result.gateResults && result.gateResults.length > 0) {
176
+ const gateResult = result.gateResults[0];
177
+ expect(gateResult).toHaveProperty("suite");
178
+ expect(gateResult).toHaveProperty("passed");
179
+ expect(gateResult).toHaveProperty("phase");
180
+ expect(gateResult).toHaveProperty("message");
181
+ expect(gateResult).toHaveProperty("currentScore");
182
+ }
183
+ }, 30000);
184
+
185
+ test("calls learnFromEvalFailure when gate fails", async () => {
186
+ // This test requires manually creating a history with regression
187
+ // For now, we just verify the code path exists
188
+ // In practice, this would be tested with mocked checkGate returning failed=true
189
+
190
+ const result = await runEvals({
191
+ cwd: PROJECT_ROOT,
192
+ suiteFilter: "example",
193
+ });
194
+
195
+ // Gate results should be present even if no failures
196
+ expect(result.gateResults).toBeDefined();
197
+ }, 30000);
198
+
199
+ test("does NOT call learnFromEvalFailure when gate passes", async () => {
200
+ // Similar to above - verifies the happy path
201
+ // Real test would mock checkGate and verify learnFromEvalFailure NOT called
202
+
203
+ const result = await runEvals({
204
+ cwd: PROJECT_ROOT,
205
+ suiteFilter: "example",
206
+ });
207
+
208
+ // Should succeed with gate results
209
+ expect(result.success).toBe(true);
210
+ expect(result.gateResults).toBeDefined();
211
+ }, 30000);
212
+
213
+ test("includes gateResults in return value", async () => {
214
+ const result = await runEvals({
215
+ cwd: PROJECT_ROOT,
216
+ suiteFilter: "example",
217
+ });
218
+
219
+ // gateResults should be array (even if empty)
220
+ expect(result).toHaveProperty("gateResults");
221
+ expect(Array.isArray(result.gateResults)).toBe(true);
222
+ }, 30000);
96
223
  });
@@ -13,6 +13,10 @@ import { createInMemoryStorage } from "evalite/in-memory-storage";
13
13
  import type { Evalite } from "evalite/types";
14
14
  import fs from "node:fs/promises";
15
15
  import path from "node:path";
16
+ import { recordEvalRun, getScoreHistory } from "./eval-history.js";
17
+ import { checkGate } from "./eval-gates.js";
18
+ import { learnFromEvalFailure } from "./eval-learning.js";
19
+ import { getMemoryAdapter } from "./memory-tools.js";
16
20
 
17
21
  /**
18
22
  * Options for running evals programmatically
@@ -97,6 +101,17 @@ export interface RunEvalsResult {
97
101
 
98
102
  /** Error message if run failed */
99
103
  error?: string;
104
+
105
+ /** Gate check results per suite */
106
+ gateResults?: Array<{
107
+ suite: string;
108
+ passed: boolean;
109
+ phase: string;
110
+ message: string;
111
+ baseline?: number;
112
+ currentScore: number;
113
+ regressionPercent?: number;
114
+ }>;
100
115
  }
101
116
 
102
117
  /**
@@ -246,6 +261,36 @@ export async function runEvals(
246
261
  })),
247
262
  }));
248
263
 
264
+ // Record eval runs to history
265
+ for (const suite of suites) {
266
+ const history = getScoreHistory(projectRoot, suite.name);
267
+ recordEvalRun(projectRoot, {
268
+ timestamp: new Date().toISOString(),
269
+ eval_name: suite.name,
270
+ score: suite.averageScore,
271
+ run_count: history.length + 1,
272
+ });
273
+ }
274
+
275
+ // Check gates for each suite
276
+ const gateResults = [];
277
+ for (const suite of suites) {
278
+ const history = getScoreHistory(projectRoot, suite.name);
279
+ const gate = checkGate(projectRoot, suite.name, suite.averageScore);
280
+ gateResults.push({ suite: suite.name, ...gate });
281
+
282
+ // If gate failed, trigger learning
283
+ if (!gate.passed) {
284
+ try {
285
+ const memoryAdapter = await getMemoryAdapter();
286
+ await learnFromEvalFailure(suite.name, suite.averageScore, history, memoryAdapter);
287
+ } catch (e) {
288
+ // Learning is best-effort, don't fail the eval run
289
+ console.warn(`Failed to store learning for ${suite.name}:`, e);
290
+ }
291
+ }
292
+ }
293
+
249
294
  // Calculate overall metrics
250
295
  const totalEvals = suites.reduce((sum, s) => sum + s.evalCount, 0);
251
296
  const averageScore =
@@ -263,6 +308,7 @@ export async function runEvals(
263
308
  totalEvals,
264
309
  averageScore,
265
310
  suites,
311
+ gateResults,
266
312
  };
267
313
  } catch (error) {
268
314
  // Return error result
package/src/hive.ts CHANGED
@@ -741,42 +741,44 @@ export const hive_create_epic = tool({
741
741
  };
742
742
 
743
743
  // Emit DecompositionGeneratedEvent for learning system
744
- if (args.project_key) {
745
- try {
746
- const event = createEvent("decomposition_generated", {
747
- project_key: args.project_key,
748
- epic_id: epic.id,
749
- task: args.task || validated.epic_title,
750
- context: validated.epic_description,
751
- strategy: args.strategy || "feature-based",
752
- epic_title: validated.epic_title,
753
- subtasks: validated.subtasks.map((st) => ({
754
- title: st.title,
755
- files: st.files || [],
756
- priority: st.priority,
757
- })),
758
- recovery_context: args.recovery_context,
759
- });
760
- await appendEvent(event, args.project_key);
761
- } catch (error) {
762
- // Non-fatal - log and continue
763
- console.warn(
764
- "[hive_create_epic] Failed to emit DecompositionGeneratedEvent:",
765
- error,
766
- );
767
- }
744
+ // Always emit using projectKey (from getHiveWorkingDirectory), not args.project_key
745
+ // This fixes the bug where events weren't emitted when callers didn't pass project_key
746
+ const effectiveProjectKey = args.project_key || projectKey;
747
+ try {
748
+ const event = createEvent("decomposition_generated", {
749
+ project_key: effectiveProjectKey,
750
+ epic_id: epic.id,
751
+ task: args.task || validated.epic_title,
752
+ context: validated.epic_description,
753
+ strategy: args.strategy || "feature-based",
754
+ epic_title: validated.epic_title,
755
+ subtasks: validated.subtasks.map((st) => ({
756
+ title: st.title,
757
+ files: st.files || [],
758
+ priority: st.priority,
759
+ })),
760
+ recovery_context: args.recovery_context,
761
+ });
762
+ await appendEvent(event, effectiveProjectKey);
763
+ } catch (error) {
764
+ // Non-fatal - log and continue
765
+ console.warn(
766
+ "[hive_create_epic] Failed to emit DecompositionGeneratedEvent:",
767
+ error,
768
+ );
769
+ }
768
770
 
769
- // Capture decomposition_complete event for eval scoring
770
- try {
771
- const { captureCoordinatorEvent } = await import("./eval-capture.js");
772
-
773
- // Build files_per_subtask map (indexed by subtask index)
774
- const filesPerSubtask: Record<number, string[]> = {};
775
- validated.subtasks.forEach((subtask, index) => {
776
- if (subtask.files && subtask.files.length > 0) {
777
- filesPerSubtask[index] = subtask.files;
778
- }
779
- });
771
+ // Capture decomposition_complete event for eval scoring
772
+ try {
773
+ const { captureCoordinatorEvent } = await import("./eval-capture.js");
774
+
775
+ // Build files_per_subtask map (indexed by subtask index)
776
+ const filesPerSubtask: Record<number, string[]> = {};
777
+ validated.subtasks.forEach((subtask, index) => {
778
+ if (subtask.files && subtask.files.length > 0) {
779
+ filesPerSubtask[index] = subtask.files;
780
+ }
781
+ });
780
782
 
781
783
  captureCoordinatorEvent({
782
784
  session_id: ctx.sessionID || "unknown",
@@ -792,13 +794,12 @@ export const hive_create_epic = tool({
792
794
  task: args.task,
793
795
  },
794
796
  });
795
- } catch (error) {
796
- // Non-fatal - log and continue
797
- console.warn(
798
- "[hive_create_epic] Failed to capture decomposition_complete event:",
799
- error,
800
- );
801
- }
797
+ } catch (error) {
798
+ // Non-fatal - log and continue
799
+ console.warn(
800
+ "[hive_create_epic] Failed to capture decomposition_complete event:",
801
+ error,
802
+ );
802
803
  }
803
804
 
804
805
  // Sync cells to JSONL so spawned workers can see them immediately