opencode-swarm-plugin 0.38.0 → 0.39.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +11 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +130 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +475 -0
- package/bin/swarm.ts +383 -0
- package/dist/compaction-hook.d.ts +1 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +81 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +370 -13
- package/dist/plugin.js +203 -13
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +589 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
- package/evals/scorers/coordinator-discipline.ts +13 -13
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +81 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +422 -0
- package/src/eval-capture.ts +94 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/index.ts +61 -1
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/src/eval-capture.test.ts
CHANGED
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
type CoordinatorSession,
|
|
12
12
|
CoordinatorSessionSchema,
|
|
13
13
|
captureCoordinatorEvent,
|
|
14
|
+
captureCompactionEvent,
|
|
14
15
|
saveSession,
|
|
15
16
|
} from "./eval-capture.js";
|
|
16
17
|
|
|
@@ -336,6 +337,127 @@ describe("captureCoordinatorEvent", () => {
|
|
|
336
337
|
});
|
|
337
338
|
});
|
|
338
339
|
|
|
340
|
+
describe("COMPACTION events", () => {
|
|
341
|
+
test("validates detection_complete event", () => {
|
|
342
|
+
const event: CoordinatorEvent = {
|
|
343
|
+
session_id: "test-session",
|
|
344
|
+
epic_id: "bd-123",
|
|
345
|
+
timestamp: new Date().toISOString(),
|
|
346
|
+
event_type: "COMPACTION",
|
|
347
|
+
compaction_type: "detection_complete",
|
|
348
|
+
payload: {
|
|
349
|
+
confidence: "high",
|
|
350
|
+
context_type: "full",
|
|
351
|
+
epic_id: "bd-456",
|
|
352
|
+
},
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test("validates prompt_generated event", () => {
|
|
359
|
+
const event: CoordinatorEvent = {
|
|
360
|
+
session_id: "test-session",
|
|
361
|
+
epic_id: "bd-123",
|
|
362
|
+
timestamp: new Date().toISOString(),
|
|
363
|
+
event_type: "COMPACTION",
|
|
364
|
+
compaction_type: "prompt_generated",
|
|
365
|
+
payload: {
|
|
366
|
+
prompt_length: 5000,
|
|
367
|
+
full_prompt: "You are a coordinator...", // Full prompt content captured
|
|
368
|
+
context_type: "full",
|
|
369
|
+
},
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
test("validates context_injected event", () => {
|
|
376
|
+
const event: CoordinatorEvent = {
|
|
377
|
+
session_id: "test-session",
|
|
378
|
+
epic_id: "bd-123",
|
|
379
|
+
timestamp: new Date().toISOString(),
|
|
380
|
+
event_type: "COMPACTION",
|
|
381
|
+
compaction_type: "context_injected",
|
|
382
|
+
payload: {
|
|
383
|
+
context_type: "fallback",
|
|
384
|
+
injected_sections: ["swarm_status", "mandatory_instructions"],
|
|
385
|
+
},
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
test("validates resumption_started event", () => {
|
|
392
|
+
const event: CoordinatorEvent = {
|
|
393
|
+
session_id: "test-session",
|
|
394
|
+
epic_id: "bd-123",
|
|
395
|
+
timestamp: new Date().toISOString(),
|
|
396
|
+
event_type: "COMPACTION",
|
|
397
|
+
compaction_type: "resumption_started",
|
|
398
|
+
payload: {
|
|
399
|
+
epic_id: "bd-456",
|
|
400
|
+
agent_role: "coordinator",
|
|
401
|
+
context_loaded: true,
|
|
402
|
+
},
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
test("validates tool_call_tracked event", () => {
|
|
409
|
+
const event: CoordinatorEvent = {
|
|
410
|
+
session_id: "test-session",
|
|
411
|
+
epic_id: "bd-123",
|
|
412
|
+
timestamp: new Date().toISOString(),
|
|
413
|
+
event_type: "COMPACTION",
|
|
414
|
+
compaction_type: "tool_call_tracked",
|
|
415
|
+
payload: {
|
|
416
|
+
tool_name: "hive_create_epic",
|
|
417
|
+
extracted_data: {
|
|
418
|
+
epic_id: "bd-789",
|
|
419
|
+
epic_title: "Add auth",
|
|
420
|
+
},
|
|
421
|
+
},
|
|
422
|
+
};
|
|
423
|
+
|
|
424
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
test("rejects invalid compaction_type", () => {
|
|
428
|
+
const event = {
|
|
429
|
+
session_id: "test-session",
|
|
430
|
+
epic_id: "bd-123",
|
|
431
|
+
timestamp: new Date().toISOString(),
|
|
432
|
+
event_type: "COMPACTION",
|
|
433
|
+
compaction_type: "invalid_type",
|
|
434
|
+
payload: {},
|
|
435
|
+
};
|
|
436
|
+
|
|
437
|
+
expect(() => CoordinatorEventSchema.parse(event)).toThrow();
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
test("captures full prompt content without truncation", () => {
|
|
441
|
+
const longPrompt = "A".repeat(10000); // 10k chars
|
|
442
|
+
const event: CoordinatorEvent = {
|
|
443
|
+
session_id: "test-session",
|
|
444
|
+
epic_id: "bd-123",
|
|
445
|
+
timestamp: new Date().toISOString(),
|
|
446
|
+
event_type: "COMPACTION",
|
|
447
|
+
compaction_type: "prompt_generated",
|
|
448
|
+
payload: {
|
|
449
|
+
prompt_length: longPrompt.length,
|
|
450
|
+
full_prompt: longPrompt,
|
|
451
|
+
context_type: "full",
|
|
452
|
+
},
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
|
|
456
|
+
expect(event.payload.full_prompt).toBe(longPrompt);
|
|
457
|
+
expect(event.payload.full_prompt.length).toBe(10000);
|
|
458
|
+
});
|
|
459
|
+
});
|
|
460
|
+
|
|
339
461
|
describe("saveSession", () => {
|
|
340
462
|
let sessionDir: string;
|
|
341
463
|
let sessionId: string;
|
|
@@ -388,3 +510,303 @@ describe("saveSession", () => {
|
|
|
388
510
|
expect(session).toBeNull();
|
|
389
511
|
});
|
|
390
512
|
});
|
|
513
|
+
|
|
514
|
+
describe("session_id propagation from ctx.sessionID", () => {
|
|
515
|
+
let sessionDir: string;
|
|
516
|
+
let sessionId: string;
|
|
517
|
+
|
|
518
|
+
beforeEach(() => {
|
|
519
|
+
sessionDir = path.join(os.homedir(), ".config", "swarm-tools", "sessions");
|
|
520
|
+
sessionId = `test-ctx-${Date.now()}`;
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
afterEach(() => {
|
|
524
|
+
// Clean up test session file
|
|
525
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
526
|
+
if (fs.existsSync(sessionPath)) {
|
|
527
|
+
fs.unlinkSync(sessionPath);
|
|
528
|
+
}
|
|
529
|
+
});
|
|
530
|
+
|
|
531
|
+
test("session_id should come from ctx.sessionID, not process.env", () => {
|
|
532
|
+
// GIVEN: process.env.OPENCODE_SESSION_ID is empty (mimics real scenario)
|
|
533
|
+
const oldEnv = process.env.OPENCODE_SESSION_ID;
|
|
534
|
+
delete process.env.OPENCODE_SESSION_ID;
|
|
535
|
+
|
|
536
|
+
try {
|
|
537
|
+
// WHEN: captureCoordinatorEvent is called with session_id from ctx.sessionID
|
|
538
|
+
const event: CoordinatorEvent = {
|
|
539
|
+
session_id: sessionId, // This should come from ctx.sessionID in call sites
|
|
540
|
+
epic_id: "bd-123",
|
|
541
|
+
timestamp: new Date().toISOString(),
|
|
542
|
+
event_type: "DECISION",
|
|
543
|
+
decision_type: "strategy_selected",
|
|
544
|
+
payload: { strategy: "file-based" },
|
|
545
|
+
};
|
|
546
|
+
|
|
547
|
+
captureCoordinatorEvent(event);
|
|
548
|
+
|
|
549
|
+
// THEN: Event should be captured with correct session_id
|
|
550
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
551
|
+
expect(fs.existsSync(sessionPath)).toBe(true);
|
|
552
|
+
|
|
553
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
554
|
+
const parsed = JSON.parse(content.trim());
|
|
555
|
+
expect(parsed.session_id).toBe(sessionId);
|
|
556
|
+
expect(parsed.session_id).not.toBe("unknown");
|
|
557
|
+
} finally {
|
|
558
|
+
// Restore env
|
|
559
|
+
if (oldEnv !== undefined) {
|
|
560
|
+
process.env.OPENCODE_SESSION_ID = oldEnv;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
test("demonstrates call sites must pass ctx.sessionID not process.env", () => {
|
|
566
|
+
// GIVEN: This simulates what happens in real call sites
|
|
567
|
+
const oldEnv = process.env.OPENCODE_SESSION_ID;
|
|
568
|
+
delete process.env.OPENCODE_SESSION_ID; // Empty in real OpenCode environment
|
|
569
|
+
|
|
570
|
+
try {
|
|
571
|
+
// WHEN: Call site uses process.env (CURRENT BAD PATTERN)
|
|
572
|
+
const badSessionId = process.env.OPENCODE_SESSION_ID || "unknown";
|
|
573
|
+
const badEvent: CoordinatorEvent = {
|
|
574
|
+
session_id: badSessionId, // This evaluates to "unknown"
|
|
575
|
+
epic_id: "bd-123",
|
|
576
|
+
timestamp: new Date().toISOString(),
|
|
577
|
+
event_type: "DECISION",
|
|
578
|
+
decision_type: "strategy_selected",
|
|
579
|
+
payload: { strategy: "file-based" },
|
|
580
|
+
};
|
|
581
|
+
|
|
582
|
+
captureCoordinatorEvent(badEvent);
|
|
583
|
+
|
|
584
|
+
// THEN: Event goes to unknown.jsonl (BAD!)
|
|
585
|
+
const unknownPath = path.join(sessionDir, "unknown.jsonl");
|
|
586
|
+
expect(fs.existsSync(unknownPath)).toBe(true);
|
|
587
|
+
|
|
588
|
+
// WHEN: Call site uses ctx.sessionID (CORRECT PATTERN)
|
|
589
|
+
const goodEvent: CoordinatorEvent = {
|
|
590
|
+
session_id: sessionId, // From ctx.sessionID
|
|
591
|
+
epic_id: "bd-123",
|
|
592
|
+
timestamp: new Date().toISOString(),
|
|
593
|
+
event_type: "DECISION",
|
|
594
|
+
decision_type: "strategy_selected",
|
|
595
|
+
payload: { strategy: "file-based" },
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
captureCoordinatorEvent(goodEvent);
|
|
599
|
+
|
|
600
|
+
// THEN: Event goes to correct session file
|
|
601
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
602
|
+
expect(fs.existsSync(sessionPath)).toBe(true);
|
|
603
|
+
} finally {
|
|
604
|
+
if (oldEnv !== undefined) {
|
|
605
|
+
process.env.OPENCODE_SESSION_ID = oldEnv;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
test("verifies all call sites now use ctx.sessionID", () => {
|
|
611
|
+
// This test documents that we've fixed all call sites to use ctx.sessionID
|
|
612
|
+
// instead of process.env.OPENCODE_SESSION_ID
|
|
613
|
+
|
|
614
|
+
// The fix was applied to:
|
|
615
|
+
// 1. src/swarm-orchestrate.ts:1743, 1852 - swarm_complete uses _ctx.sessionID
|
|
616
|
+
// 2. src/swarm-review.ts:515, 565 - swarm_review_feedback uses _ctx.sessionID
|
|
617
|
+
// 3. src/swarm-decompose.ts:780 - swarm_delegate_planning uses _ctx.sessionID
|
|
618
|
+
// 4. src/swarm-prompts.ts:1407 - swarm_spawn_subtask uses _ctx.sessionID
|
|
619
|
+
// 5. src/index.ts:216 - detectCoordinatorViolation uses input.sessionID
|
|
620
|
+
|
|
621
|
+
// With ctx.sessionID, events go to proper session files
|
|
622
|
+
const oldEnv = process.env.OPENCODE_SESSION_ID;
|
|
623
|
+
delete process.env.OPENCODE_SESSION_ID;
|
|
624
|
+
|
|
625
|
+
try {
|
|
626
|
+
// Simulate tool execution with ctx.sessionID
|
|
627
|
+
const mockCtx = { sessionID: sessionId };
|
|
628
|
+
|
|
629
|
+
const event: CoordinatorEvent = {
|
|
630
|
+
session_id: mockCtx.sessionID || "unknown",
|
|
631
|
+
epic_id: "bd-456",
|
|
632
|
+
timestamp: new Date().toISOString(),
|
|
633
|
+
event_type: "OUTCOME",
|
|
634
|
+
outcome_type: "subtask_success",
|
|
635
|
+
payload: { bead_id: "bd-456.1" },
|
|
636
|
+
};
|
|
637
|
+
|
|
638
|
+
captureCoordinatorEvent(event);
|
|
639
|
+
|
|
640
|
+
// Verify event captured with correct session_id
|
|
641
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
642
|
+
expect(fs.existsSync(sessionPath)).toBe(true);
|
|
643
|
+
|
|
644
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
645
|
+
const parsed = JSON.parse(content.trim());
|
|
646
|
+
expect(parsed.session_id).toBe(sessionId);
|
|
647
|
+
} finally {
|
|
648
|
+
if (oldEnv !== undefined) {
|
|
649
|
+
process.env.OPENCODE_SESSION_ID = oldEnv;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
});
|
|
653
|
+
});
|
|
654
|
+
|
|
655
|
+
describe("captureCompactionEvent", () => {
|
|
656
|
+
let sessionDir: string;
|
|
657
|
+
let sessionId: string;
|
|
658
|
+
|
|
659
|
+
beforeEach(() => {
|
|
660
|
+
sessionDir = path.join(os.homedir(), ".config", "swarm-tools", "sessions");
|
|
661
|
+
sessionId = `test-compaction-${Date.now()}`;
|
|
662
|
+
});
|
|
663
|
+
|
|
664
|
+
afterEach(() => {
|
|
665
|
+
// Clean up test session file
|
|
666
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
667
|
+
if (fs.existsSync(sessionPath)) {
|
|
668
|
+
fs.unlinkSync(sessionPath);
|
|
669
|
+
}
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
test("writes detection_complete event to session file", () => {
|
|
673
|
+
captureCompactionEvent({
|
|
674
|
+
session_id: sessionId,
|
|
675
|
+
epic_id: "bd-123",
|
|
676
|
+
compaction_type: "detection_complete",
|
|
677
|
+
payload: {
|
|
678
|
+
confidence: "high",
|
|
679
|
+
context_type: "full",
|
|
680
|
+
epic_id: "bd-456",
|
|
681
|
+
},
|
|
682
|
+
});
|
|
683
|
+
|
|
684
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
685
|
+
expect(fs.existsSync(sessionPath)).toBe(true);
|
|
686
|
+
|
|
687
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
688
|
+
const lines = content.trim().split("\n");
|
|
689
|
+
expect(lines).toHaveLength(1);
|
|
690
|
+
|
|
691
|
+
const parsed = JSON.parse(lines[0]);
|
|
692
|
+
expect(parsed.event_type).toBe("COMPACTION");
|
|
693
|
+
expect(parsed.compaction_type).toBe("detection_complete");
|
|
694
|
+
expect(parsed.payload.confidence).toBe("high");
|
|
695
|
+
});
|
|
696
|
+
|
|
697
|
+
test("writes prompt_generated event with full prompt content", () => {
|
|
698
|
+
const fullPrompt = "You are a coordinator agent. ".repeat(200); // ~6k chars
|
|
699
|
+
|
|
700
|
+
captureCompactionEvent({
|
|
701
|
+
session_id: sessionId,
|
|
702
|
+
epic_id: "bd-123",
|
|
703
|
+
compaction_type: "prompt_generated",
|
|
704
|
+
payload: {
|
|
705
|
+
prompt_length: fullPrompt.length,
|
|
706
|
+
full_prompt: fullPrompt,
|
|
707
|
+
context_type: "full",
|
|
708
|
+
},
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
712
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
713
|
+
const parsed = JSON.parse(content.trim());
|
|
714
|
+
|
|
715
|
+
expect(parsed.payload.full_prompt).toBe(fullPrompt);
|
|
716
|
+
expect(parsed.payload.full_prompt.length).toBe(fullPrompt.length);
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
test("appends multiple compaction events to same session", () => {
|
|
720
|
+
captureCompactionEvent({
|
|
721
|
+
session_id: sessionId,
|
|
722
|
+
epic_id: "bd-123",
|
|
723
|
+
compaction_type: "detection_complete",
|
|
724
|
+
payload: { confidence: "high" },
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
captureCompactionEvent({
|
|
728
|
+
session_id: sessionId,
|
|
729
|
+
epic_id: "bd-123",
|
|
730
|
+
compaction_type: "prompt_generated",
|
|
731
|
+
payload: { prompt_length: 1000, full_prompt: "test" },
|
|
732
|
+
});
|
|
733
|
+
|
|
734
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
735
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
736
|
+
const lines = content.trim().split("\n");
|
|
737
|
+
expect(lines).toHaveLength(2);
|
|
738
|
+
|
|
739
|
+
const event1 = JSON.parse(lines[0]);
|
|
740
|
+
const event2 = JSON.parse(lines[1]);
|
|
741
|
+
|
|
742
|
+
expect(event1.compaction_type).toBe("detection_complete");
|
|
743
|
+
expect(event2.compaction_type).toBe("prompt_generated");
|
|
744
|
+
});
|
|
745
|
+
|
|
746
|
+
test("full compaction lifecycle tracking", () => {
|
|
747
|
+
// Simulate full compaction hook lifecycle
|
|
748
|
+
const lifecycleEvents = [
|
|
749
|
+
{
|
|
750
|
+
compaction_type: "detection_complete" as const,
|
|
751
|
+
payload: {
|
|
752
|
+
confidence: "high",
|
|
753
|
+
context_type: "full",
|
|
754
|
+
epic_id: "bd-789",
|
|
755
|
+
},
|
|
756
|
+
},
|
|
757
|
+
{
|
|
758
|
+
compaction_type: "prompt_generated" as const,
|
|
759
|
+
payload: {
|
|
760
|
+
prompt_length: 3500,
|
|
761
|
+
full_prompt: "You are a coordinator agent...",
|
|
762
|
+
context_type: "full",
|
|
763
|
+
},
|
|
764
|
+
},
|
|
765
|
+
{
|
|
766
|
+
compaction_type: "context_injected" as const,
|
|
767
|
+
payload: {
|
|
768
|
+
context_type: "full",
|
|
769
|
+
injected_sections: ["swarm_status", "mandatory_instructions"],
|
|
770
|
+
},
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
compaction_type: "resumption_started" as const,
|
|
774
|
+
payload: {
|
|
775
|
+
epic_id: "bd-789",
|
|
776
|
+
agent_role: "coordinator",
|
|
777
|
+
context_loaded: true,
|
|
778
|
+
},
|
|
779
|
+
},
|
|
780
|
+
{
|
|
781
|
+
compaction_type: "tool_call_tracked" as const,
|
|
782
|
+
payload: {
|
|
783
|
+
tool_name: "hive_create_epic",
|
|
784
|
+
extracted_data: { epic_id: "bd-789" },
|
|
785
|
+
},
|
|
786
|
+
},
|
|
787
|
+
];
|
|
788
|
+
|
|
789
|
+
// Capture all lifecycle events
|
|
790
|
+
for (const event of lifecycleEvents) {
|
|
791
|
+
captureCompactionEvent({
|
|
792
|
+
session_id: sessionId,
|
|
793
|
+
epic_id: "bd-123",
|
|
794
|
+
...event,
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
// Verify all events captured
|
|
799
|
+
const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
|
|
800
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
801
|
+
const lines = content.trim().split("\n");
|
|
802
|
+
expect(lines).toHaveLength(5);
|
|
803
|
+
|
|
804
|
+
// Verify lifecycle order
|
|
805
|
+
const capturedEvents = lines.map((line) => JSON.parse(line));
|
|
806
|
+
expect(capturedEvents[0].compaction_type).toBe("detection_complete");
|
|
807
|
+
expect(capturedEvents[1].compaction_type).toBe("prompt_generated");
|
|
808
|
+
expect(capturedEvents[2].compaction_type).toBe("context_injected");
|
|
809
|
+
expect(capturedEvents[3].compaction_type).toBe("resumption_started");
|
|
810
|
+
expect(capturedEvents[4].compaction_type).toBe("tool_call_tracked");
|
|
811
|
+
});
|
|
812
|
+
});
|
package/src/eval-capture.ts
CHANGED
|
@@ -9,9 +9,15 @@
|
|
|
9
9
|
* 2. swarm_complete captures: outcome signals per subtask
|
|
10
10
|
* 3. swarm_record_outcome captures: learning signals
|
|
11
11
|
* 4. Human feedback (optional): accept/reject/modify
|
|
12
|
-
* 5. Coordinator events: decisions, violations, outcomes
|
|
12
|
+
* 5. Coordinator events: decisions, violations, outcomes, compaction
|
|
13
13
|
* 6. Session capture: full coordinator session to ~/.config/swarm-tools/sessions/
|
|
14
14
|
*
|
|
15
|
+
* Event types:
|
|
16
|
+
* - DECISION: strategy_selected, worker_spawned, review_completed, decomposition_complete
|
|
17
|
+
* - VIOLATION: coordinator_edited_file, coordinator_ran_tests, coordinator_reserved_files, no_worker_spawned
|
|
18
|
+
* - OUTCOME: subtask_success, subtask_retry, subtask_failed, epic_complete
|
|
19
|
+
* - COMPACTION: detection_complete, prompt_generated, context_injected, resumption_started, tool_call_tracked
|
|
20
|
+
*
|
|
15
21
|
* @module eval-capture
|
|
16
22
|
*/
|
|
17
23
|
import * as fs from "node:fs";
|
|
@@ -123,7 +129,7 @@ export type PartialEvalRecord = Partial<EvalRecord> & {
|
|
|
123
129
|
};
|
|
124
130
|
|
|
125
131
|
/**
|
|
126
|
-
* Coordinator Event - captures coordinator decisions, violations, and
|
|
132
|
+
* Coordinator Event - captures coordinator decisions, violations, outcomes, and compaction
|
|
127
133
|
*/
|
|
128
134
|
export const CoordinatorEventSchema = z.discriminatedUnion("event_type", [
|
|
129
135
|
// DECISION events
|
|
@@ -168,6 +174,21 @@ export const CoordinatorEventSchema = z.discriminatedUnion("event_type", [
|
|
|
168
174
|
]),
|
|
169
175
|
payload: z.any(),
|
|
170
176
|
}),
|
|
177
|
+
// COMPACTION events
|
|
178
|
+
z.object({
|
|
179
|
+
session_id: z.string(),
|
|
180
|
+
epic_id: z.string(),
|
|
181
|
+
timestamp: z.string(),
|
|
182
|
+
event_type: z.literal("COMPACTION"),
|
|
183
|
+
compaction_type: z.enum([
|
|
184
|
+
"detection_complete",
|
|
185
|
+
"prompt_generated",
|
|
186
|
+
"context_injected",
|
|
187
|
+
"resumption_started",
|
|
188
|
+
"tool_call_tracked",
|
|
189
|
+
]),
|
|
190
|
+
payload: z.any(),
|
|
191
|
+
}),
|
|
171
192
|
]);
|
|
172
193
|
export type CoordinatorEvent = z.infer<typeof CoordinatorEventSchema>;
|
|
173
194
|
|
|
@@ -595,6 +616,77 @@ export function captureCoordinatorEvent(event: CoordinatorEvent): void {
|
|
|
595
616
|
fs.appendFileSync(sessionPath, line, "utf-8");
|
|
596
617
|
}
|
|
597
618
|
|
|
619
|
+
/**
|
|
620
|
+
* Capture a compaction event to the session file
|
|
621
|
+
*
|
|
622
|
+
* Helper for capturing COMPACTION events with automatic timestamp generation.
|
|
623
|
+
* Tracks compaction hook lifecycle: detection → prompt generation → context injection → resumption.
|
|
624
|
+
*
|
|
625
|
+
* **Part of eval-driven development pipeline:** Compaction events are used by `compaction-prompt.eval.ts`
|
|
626
|
+
* to score prompt quality (ID specificity, actionability, coordinator identity).
|
|
627
|
+
*
|
|
628
|
+
* **Lifecycle stages:**
|
|
629
|
+
* - `detection_complete` - Compaction detected (confidence level, context type)
|
|
630
|
+
* - `prompt_generated` - Continuation prompt created (FULL content stored for eval)
|
|
631
|
+
* - `context_injected` - Prompt injected into OpenCode context
|
|
632
|
+
* - `resumption_started` - Coordinator resumed from checkpoint
|
|
633
|
+
* - `tool_call_tracked` - First tool called post-compaction (measures discipline)
|
|
634
|
+
*
|
|
635
|
+
* @param params - Compaction event parameters
|
|
636
|
+
* @param params.session_id - Coordinator session ID
|
|
637
|
+
* @param params.epic_id - Epic ID being coordinated
|
|
638
|
+
* @param params.compaction_type - Stage of compaction lifecycle
|
|
639
|
+
* @param params.payload - Event-specific data (full prompt content, detection results, etc.)
|
|
640
|
+
*
|
|
641
|
+
* @example
|
|
642
|
+
* // Capture detection complete
|
|
643
|
+
* captureCompactionEvent({
|
|
644
|
+
* session_id: "session-123",
|
|
645
|
+
* epic_id: "bd-456",
|
|
646
|
+
* compaction_type: "detection_complete",
|
|
647
|
+
* payload: {
|
|
648
|
+
* confidence: "high",
|
|
649
|
+
* context_type: "full",
|
|
650
|
+
* epic_id: "bd-456",
|
|
651
|
+
* },
|
|
652
|
+
* });
|
|
653
|
+
*
|
|
654
|
+
* @example
|
|
655
|
+
* // Capture prompt generated (with full content for eval)
|
|
656
|
+
* captureCompactionEvent({
|
|
657
|
+
* session_id: "session-123",
|
|
658
|
+
* epic_id: "bd-456",
|
|
659
|
+
* compaction_type: "prompt_generated",
|
|
660
|
+
* payload: {
|
|
661
|
+
* prompt_length: 5000,
|
|
662
|
+
* full_prompt: "You are a coordinator...", // Full prompt, not truncated - used for quality scoring
|
|
663
|
+
* context_type: "full",
|
|
664
|
+
* },
|
|
665
|
+
* });
|
|
666
|
+
*/
|
|
667
|
+
export function captureCompactionEvent(params: {
|
|
668
|
+
session_id: string;
|
|
669
|
+
epic_id: string;
|
|
670
|
+
compaction_type:
|
|
671
|
+
| "detection_complete"
|
|
672
|
+
| "prompt_generated"
|
|
673
|
+
| "context_injected"
|
|
674
|
+
| "resumption_started"
|
|
675
|
+
| "tool_call_tracked";
|
|
676
|
+
payload: any;
|
|
677
|
+
}): void {
|
|
678
|
+
const event: CoordinatorEvent = {
|
|
679
|
+
session_id: params.session_id,
|
|
680
|
+
epic_id: params.epic_id,
|
|
681
|
+
timestamp: new Date().toISOString(),
|
|
682
|
+
event_type: "COMPACTION",
|
|
683
|
+
compaction_type: params.compaction_type,
|
|
684
|
+
payload: params.payload,
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
captureCoordinatorEvent(event);
|
|
688
|
+
}
|
|
689
|
+
|
|
598
690
|
/**
|
|
599
691
|
* Read all events from a session file
|
|
600
692
|
*/
|