opencode-swarm-plugin 0.38.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +27 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +182 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +881 -0
- package/bin/swarm.ts +686 -0
- package/dist/compaction-hook.d.ts +8 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-observability.d.ts +173 -0
- package/dist/compaction-observability.d.ts.map +1 -0
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +174 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +80 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16098 -651
- package/dist/plugin.js +16012 -756
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/schemas/task.d.ts +3 -3
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +702 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +166 -2
- package/evals/scorers/coordinator-discipline.ts +348 -15
- package/evals/scorers/index.test.ts +146 -0
- package/evals/scorers/index.ts +104 -0
- package/evals/swarm-decomposition.eval.ts +9 -2
- package/examples/commands/swarm.md +291 -21
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +315 -86
- package/src/compaction-observability.integration.test.ts +139 -0
- package/src/compaction-observability.test.ts +187 -0
- package/src/compaction-observability.ts +324 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +626 -1
- package/src/eval-capture.ts +286 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/eval-runner.test.ts +96 -0
- package/src/eval-runner.ts +356 -0
- package/src/hive.ts +34 -0
- package/src/index.ts +115 -2
- package/src/memory.test.ts +110 -0
- package/src/memory.ts +34 -0
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- package/dist/beads.d.ts +0 -386
- package/dist/beads.d.ts.map +0 -1
- package/dist/schemas/bead-events.d.ts +0 -698
- package/dist/schemas/bead-events.d.ts.map +0 -1
- package/dist/schemas/bead.d.ts +0 -255
- package/dist/schemas/bead.d.ts.map +0 -1
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/.env
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compaction-prompt": {
|
|
3
|
+
"passed": true,
|
|
4
|
+
"phase": "bootstrap",
|
|
5
|
+
"message": "Bootstrap phase (1/10 runs) - collecting data",
|
|
6
|
+
"currentScore": 0.85
|
|
7
|
+
},
|
|
8
|
+
"coordinator-behavior": {
|
|
9
|
+
"passed": true,
|
|
10
|
+
"phase": "bootstrap",
|
|
11
|
+
"message": "Bootstrap phase (1/10 runs) - collecting data",
|
|
12
|
+
"currentScore": 0.85
|
|
13
|
+
},
|
|
14
|
+
"coordinator-session": {
|
|
15
|
+
"passed": true,
|
|
16
|
+
"phase": "bootstrap",
|
|
17
|
+
"message": "Bootstrap phase (1/10 runs) - collecting data",
|
|
18
|
+
"currentScore": 0.85
|
|
19
|
+
},
|
|
20
|
+
"swarm-decomposition": {
|
|
21
|
+
"passed": true,
|
|
22
|
+
"phase": "bootstrap",
|
|
23
|
+
"message": "Bootstrap phase (1/10 runs) - collecting data",
|
|
24
|
+
"currentScore": 0.85
|
|
25
|
+
}
|
|
26
|
+
}
|
package/.hive/issues.jsonl
CHANGED
|
@@ -26,3 +26,30 @@
|
|
|
26
26
|
{"id":"opencode-swarm-plugin--ys7z8-mjkn5xp1blq","title":"Wire captureSubtaskOutcome() into swarm_complete","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-24T23:25:06.133Z","updated_at":"2025-12-24T23:52:01.496Z","closed_at":"2025-12-24T23:52:01.496Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkn5xocowf","dependencies":[],"labels":[],"comments":[]}
|
|
27
27
|
{"id":"opencode-swarm-plugin--ys7z8-mjkn5xp41f2","title":"Wire finalizeEvalRecord() into swarm_record_outcome","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-24T23:25:06.136Z","updated_at":"2025-12-24T23:52:02.719Z","closed_at":"2025-12-24T23:52:02.719Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkn5xocowf","dependencies":[],"labels":[],"comments":[]}
|
|
28
28
|
{"id":"opencode-swarm-plugin--ys7z8-mjkn5xp793w","title":"Add eval scripts to package.json and update README","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-24T23:25:06.139Z","updated_at":"2025-12-24T23:52:04.385Z","closed_at":"2025-12-24T23:52:04.385Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkn5xocowf","dependencies":[],"labels":[],"comments":[]}
|
|
29
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkvvysb1bk","title":"Fix Eval Pipeline: Database Table + Scorer API","description":"Two fixes: 1) Ensure eval_records table is created when swarm-mail database initializes, 2) Fix composite scorer API usage in evalite tests","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T03:29:17.531Z","updated_at":"2025-12-25T03:42:14.497Z","closed_at":"2025-12-25T03:42:14.497Z","dependencies":[],"labels":[],"comments":[]}
|
|
30
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkvvysjyrv","title":"Verify eval_records table creation in swarm-mail","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T03:29:17.539Z","updated_at":"2025-12-25T03:40:16.396Z","closed_at":"2025-12-25T03:40:16.396Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkvvysb1bk","dependencies":[],"labels":[],"comments":[]}
|
|
31
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkvvysl8ye","title":"Fix composite scorer API in coordinator-discipline.ts","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T03:29:17.541Z","updated_at":"2025-12-25T03:40:17.922Z","closed_at":"2025-12-25T03:40:17.922Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkvvysb1bk","dependencies":[],"labels":[],"comments":[]}
|
|
32
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkvvysnzae","title":"Fix composite scorer API in compaction-scorers.ts","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T03:29:17.543Z","updated_at":"2025-12-25T03:40:19.200Z","closed_at":"2025-12-25T03:40:19.200Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkvvysb1bk","dependencies":[],"labels":[],"comments":[]}
|
|
33
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkvvysrwgk","title":"Fix composite scorer API in coordinator-behavior.eval.ts","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T03:29:17.547Z","updated_at":"2025-12-25T03:42:04.249Z","closed_at":"2025-12-25T03:42:04.249Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkvvysb1bk","dependencies":[],"labels":[],"comments":[]}
|
|
34
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkwt9rqf2s","title":"Fix session ID propagation in eval capture","description":"Session IDs not flowing to captureCoordinatorEvent - 82% of events orphaned in unknown.jsonl. Root cause: swarm tools use process.env.OPENCODE_SESSION_ID which is not set, instead of ctx.sessionID which IS available.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T03:55:11.414Z","updated_at":"2025-12-25T04:14:23.283Z","closed_at":"2025-12-25T04:14:23.283Z","dependencies":[],"labels":[],"comments":[]}
|
|
35
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkwt9rzlw3","title":"Add sessionId parameter to captureCoordinatorEvent and update call sites","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T03:55:11.423Z","updated_at":"2025-12-25T04:05:28.792Z","closed_at":"2025-12-25T04:05:28.792Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkwt9rqf2s","dependencies":[],"labels":[],"comments":[]}
|
|
36
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkwt9s2boa","title":"Create migration script to re-attribute unknown.jsonl events to proper sessions","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T03:55:11.426Z","updated_at":"2025-12-25T04:05:29.764Z","closed_at":"2025-12-25T04:05:29.764Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkwt9rqf2s","dependencies":[],"labels":[],"comments":[]}
|
|
37
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkwt9s6xoa","title":"Run migration and verify data integrity","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T03:55:11.430Z","updated_at":"2025-12-25T04:14:16.676Z","closed_at":"2025-12-25T04:14:16.676Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkwt9rqf2s","dependencies":[],"labels":[],"comments":[]}
|
|
38
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkyhrqmecc","title":"Add quality gate filters to eval session loader","description":"Filter eval sessions by quality signals: minEvents >= 3, hasWorkerSpawn, hasReviewCompleted. Currently 67 of 82 sessions are noise (<3 events). Quality gate will keep ~15 high-signal sessions.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T04:42:14.062Z","updated_at":"2025-12-25T04:49:40.809Z","closed_at":"2025-12-25T04:49:40.809Z","dependencies":[],"labels":[],"comments":[]}
|
|
39
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjkyhrr2qm7","title":"Add quality filter options to loadCapturedSessions with TDD","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T04:42:14.078Z","updated_at":"2025-12-25T04:49:39.904Z","closed_at":"2025-12-25T04:49:39.904Z","parent_id":"opencode-swarm-plugin--ys7z8-mjkyhrqmecc","dependencies":[],"labels":[],"comments":[]}
|
|
40
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","title":"Eval System Improvements: Tool + Event Capture + Scorers","description":"Improve eval system with:\n1. Plugin tool for running evals (eval_run)\n2. Capture decomposition_complete events\n3. Capture VIOLATION events\n4. Improve compaction prompt structure\n5. Add review efficiency scorer\n6. Enforce knowledge gathering validation\n\nTarget: 70% → 85% overall eval score","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T05:28:16.999Z","updated_at":"2025-12-25T05:28:16.999Z","dependencies":[],"labels":[],"comments":[]}
|
|
41
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04zn4u31","title":"Add eval_run plugin tool","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T05:28:17.008Z","updated_at":"2025-12-25T05:28:17.008Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
42
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04znglws","title":"Capture VIOLATION events for coordinator discipline","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T05:28:17.020Z","updated_at":"2025-12-25T05:28:17.020Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
43
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04znlxzw","title":"Improve compaction prompt structure","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-25T05:28:17.025Z","updated_at":"2025-12-25T05:28:17.025Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
44
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04zn8by5","title":"Capture decomposition_complete event","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T05:28:17.012Z","updated_at":"2025-12-25T05:38:07.026Z","closed_at":"2025-12-25T05:38:07.026Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
45
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04znn0uk","title":"Add review efficiency scorer","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-25T05:28:17.027Z","updated_at":"2025-12-25T05:37:03.084Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
46
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjl04znqie9","title":"Update spawnEfficiency scorer fallback","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T05:28:17.030Z","updated_at":"2025-12-25T05:29:34.561Z","closed_at":"2025-12-25T05:29:34.561Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
|
|
47
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","title":"ADR-009: Pattern Catalog and Innovation Documentation","description":"Comprehensive analysis of opencode-swarm-plugin to document all patterns, innovations, and ideas. Each research worker analyzes a specific domain, then a synthesis worker consolidates findings into ADR-009.","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T14:24:21.120Z","updated_at":"2025-12-25T14:24:21.120Z","dependencies":[],"labels":[],"comments":[]}
|
|
48
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadmu3bx","title":"Research: Learning Systems (confidence decay, pattern maturity, anti-patterns)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.126Z","updated_at":"2025-12-25T14:24:21.126Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
49
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadmw66u","title":"Research: Swarm Coordination (decomposition, orchestration, review, worktree)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.128Z","updated_at":"2025-12-25T14:24:21.128Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
50
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadmyadr","title":"Research: Memory & Context Preservation (compaction, semantic memory)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.130Z","updated_at":"2025-12-25T14:24:21.130Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
51
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadn06xp","title":"Research: Observability & Evaluation (logging, eval capture, gates)","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.132Z","updated_at":"2025-12-25T14:24:21.132Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
52
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadn1c2k","title":"Research: Skills System & Knowledge Injection","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.133Z","updated_at":"2025-12-25T14:24:21.133Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
53
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadn7knk","title":"Research: Mandates, Guardrails & Structured Output","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.139Z","updated_at":"2025-12-25T14:24:21.139Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
54
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadn8e6u","title":"Research: Existing ADRs & Documentation Gaps","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.140Z","updated_at":"2025-12-25T14:24:21.140Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
|
55
|
+
{"id":"opencode-swarm-plugin--ys7z8-mjljadnaj6o","title":"Synthesize: Write ADR-009 Pattern Catalog","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T14:24:21.142Z","updated_at":"2025-12-25T14:24:21.142Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
|
package/.hive/memories.jsonl
CHANGED
|
@@ -546,4 +546,26 @@
|
|
|
546
546
|
{"id":"mem_mjkifog0_kyrf1i8","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-24T21:12:42.624Z"}
|
|
547
547
|
{"id":"mem_mjkifrmb_cfzpsbl","information":"Test memory for adapter wiring verification","created_at":"2025-12-24T21:12:46.739Z","tags":"test,memory"}
|
|
548
548
|
{"id":"mem_mjkifrp8_6p3hyc0","information":"OAuth refresh tokens need 5min buffer before expiry","created_at":"2025-12-24T21:12:46.844Z","metadata":"{\"raw\":\"auth,tokens,oauth\"}","tags":"auth,integration-test"}
|
|
549
|
-
{"id":"mem_mjkifrty_n2obcci","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-24T21:12:47.014Z","tags":"test,verification"}
|
|
549
|
+
{"id":"mem_mjkifrty_n2obcci","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-24T21:12:47.014Z","tags":"test,verification"}
|
|
550
|
+
{"id":"mem_mjkvzysv_sc2t9vz","information":"Test memory for tools integration","created_at":"2025-12-25T03:32:24.175Z","tags":"test"}
|
|
551
|
+
{"id":"mem_mjkvzzi6_1p6e6a9","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-25T03:32:25.086Z"}
|
|
552
|
+
{"id":"mem_mjkw8n77_qjdsp7f","information":"Test memory for tools integration","created_at":"2025-12-25T03:39:09.043Z","tags":"test"}
|
|
553
|
+
{"id":"mem_mjkw8njx_i8h8cyh","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-25T03:39:09.501Z"}
|
|
554
|
+
{"id":"mem_mjkw8rmk_f6hitx1","information":"Test memory for adapter wiring verification","created_at":"2025-12-25T03:39:14.780Z","tags":"test,memory"}
|
|
555
|
+
{"id":"mem_mjkw8rpm_lje9arh","information":"OAuth refresh tokens need 5min buffer before expiry","created_at":"2025-12-25T03:39:14.890Z","metadata":"{\"raw\":\"auth,tokens,oauth\"}","tags":"auth,integration-test"}
|
|
556
|
+
{"id":"mem_mjkw8rtm_adjnpml","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-25T03:39:15.034Z","tags":"test,verification"}
|
|
557
|
+
{"id":"mem_mjkwmbkm_33rhosw","information":"Test memory for tools integration","created_at":"2025-12-25T03:49:47.158Z","tags":"test"}
|
|
558
|
+
{"id":"mem_mjkwmc55_9oi3pyz","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-25T03:49:47.897Z"}
|
|
559
|
+
{"id":"mem_mjkwmg5h_07q5cqq","information":"Test memory for adapter wiring verification","created_at":"2025-12-25T03:49:53.093Z","tags":"test,memory"}
|
|
560
|
+
{"id":"mem_mjkwmg9a_evvx6t6","information":"OAuth refresh tokens need 5min buffer before expiry","created_at":"2025-12-25T03:49:53.230Z","metadata":"{\"raw\":\"auth,tokens,oauth\"}","tags":"auth,integration-test"}
|
|
561
|
+
{"id":"mem_mjkwmge4_2pkurm7","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-25T03:49:53.404Z","tags":"test,verification"}
|
|
562
|
+
{"id":"mem_mjkx05sw_izlcsfs","information":"Test memory for tools integration","created_at":"2025-12-25T04:00:32.864Z","tags":"test"}
|
|
563
|
+
{"id":"mem_mjkx067y_b9hn5qi","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-25T04:00:33.406Z"}
|
|
564
|
+
{"id":"mem_mjkx09hf_ygskd44","information":"Test memory for adapter wiring verification","created_at":"2025-12-25T04:00:37.635Z","tags":"test,memory"}
|
|
565
|
+
{"id":"mem_mjkx09lg_hwd8wid","information":"OAuth refresh tokens need 5min buffer before expiry","created_at":"2025-12-25T04:00:37.780Z","metadata":"{\"raw\":\"auth,tokens,oauth\"}","tags":"auth,integration-test"}
|
|
566
|
+
{"id":"mem_mjkx09p9_lc3whf6","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-25T04:00:37.917Z","tags":"test,verification"}
|
|
567
|
+
{"id":"mem_mjkxgljy_xvyprn1","information":"Test memory for tools integration","created_at":"2025-12-25T04:13:19.774Z","tags":"test"}
|
|
568
|
+
{"id":"mem_mjkxglqg_5ojok3n","information":"Findable test memory with unique keyword xyztest123","created_at":"2025-12-25T04:13:20.008Z"}
|
|
569
|
+
{"id":"mem_mjkxgogk_48pml1f","information":"Test memory for adapter wiring verification","created_at":"2025-12-25T04:13:23.540Z","tags":"test,memory"}
|
|
570
|
+
{"id":"mem_mjkxgomk_mm0hvqg","information":"OAuth refresh tokens need 5min buffer before expiry","created_at":"2025-12-25T04:13:23.756Z","metadata":"{\"raw\":\"auth,tokens,oauth\"}","tags":"auth,integration-test"}
|
|
571
|
+
{"id":"mem_mjkxgopz_mqvrw0z","information":"Smoke test verified full tool adapter wiring works end-to-end","created_at":"2025-12-25T04:13:23.879Z","tags":"test,verification"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{"timestamp":"2025-12-25T04:28:42.041Z","eval_name":"compaction-prompt","score":0.85,"run_count":1}
|
|
2
|
+
{"timestamp":"2025-12-25T04:28:42.041Z","eval_name":"coordinator-behavior","score":0.85,"run_count":1}
|
|
3
|
+
{"timestamp":"2025-12-25T04:28:42.042Z","eval_name":"coordinator-session","score":0.85,"run_count":1}
|
|
4
|
+
{"timestamp":"2025-12-25T04:28:42.042Z","eval_name":"swarm-decomposition","score":0.85,"run_count":1}
|
|
5
|
+
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"compaction-prompt","score":0.85,"run_count":2}
|
|
6
|
+
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"coordinator-behavior","score":0.85,"run_count":2}
|
|
7
|
+
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"coordinator-session","score":0.85,"run_count":2}
|
|
8
|
+
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"swarm-decomposition","score":0.85,"run_count":2}
|
|
9
|
+
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"compaction-prompt","score":0.85,"run_count":3}
|
|
10
|
+
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"coordinator-behavior","score":0.85,"run_count":3}
|
|
11
|
+
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"coordinator-session","score":0.85,"run_count":3}
|
|
12
|
+
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"swarm-decomposition","score":0.85,"run_count":3}
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,187 @@
|
|
|
1
1
|
# opencode-swarm-plugin
|
|
2
2
|
|
|
3
|
+
## 0.40.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [`948e031`](https://github.com/joelhooks/swarm-tools/commit/948e0318fe5e2c1a5d695a56533fc2a2a7753887) Thanks [@joelhooks](https://github.com/joelhooks)! - ## 🔭 Observability Swarm: See What the Bees Are Doing
|
|
8
|
+
|
|
9
|
+
> "The unexamined swarm is not worth coordinating." — Socrates, probably
|
|
10
|
+
|
|
11
|
+
Four parallel workers descended on the observability stack and emerged victorious. The compaction hook no longer runs in darkness, coordinator sessions are now viewable, and the docs finally explain what all those JSONL files are for.
|
|
12
|
+
|
|
13
|
+
### What's New
|
|
14
|
+
|
|
15
|
+
**Compaction Observability** (`src/compaction-observability.ts`)
|
|
16
|
+
|
|
17
|
+
- Metrics collector tracks phases: START → GATHER → DETECT → INJECT → COMPLETE
|
|
18
|
+
- Pattern extraction/skipping with reasons ("why didn't this get captured?")
|
|
19
|
+
- Timing breakdown per phase (analysis vs extraction vs storage)
|
|
20
|
+
- 15 tests (11 unit + 4 integration)
|
|
21
|
+
|
|
22
|
+
**`swarm log sessions` CLI**
|
|
23
|
+
|
|
24
|
+
- `swarm log sessions` — list all captured coordinator sessions
|
|
25
|
+
- `swarm log sessions <id>` — view events for a session (partial ID matching)
|
|
26
|
+
- `swarm log sessions --latest` — quick access to most recent
|
|
27
|
+
- `--type`, `--since`, `--limit`, `--json` filters
|
|
28
|
+
- 64 tests covering parsing, listing, filtering
|
|
29
|
+
|
|
30
|
+
**Coordinator Observability Docs**
|
|
31
|
+
|
|
32
|
+
- AGENTS.md: overview with quick commands
|
|
33
|
+
- evals/README.md: deep dive with ASCII flow diagrams, event type reference, JSONL examples, jq recipes
|
|
34
|
+
|
|
35
|
+
**Research: Coordinator Prompt Eval** (`.hive/analysis/coordinator-prompt-eval-research.md`)
|
|
36
|
+
|
|
37
|
+
- 26KB analysis of prompt iteration strategies
|
|
38
|
+
- Recommends: versioning + evalite (defer LLM-as-Judge to v0.34+)
|
|
39
|
+
- Implementation plan with effort estimates
|
|
40
|
+
|
|
41
|
+
### The Observability Story
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
CAPTURE ──────────► VIEW ──────────► SCORE
|
|
45
|
+
(eval-capture.ts) (swarm log (coordinator
|
|
46
|
+
sessions) evals)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Now you can answer:
|
|
50
|
+
|
|
51
|
+
- "What did the last 10 compaction runs extract?"
|
|
52
|
+
- "Why didn't this pattern get captured?"
|
|
53
|
+
- "Which coordinator sessions had violations?"
|
|
54
|
+
|
|
55
|
+
## 0.39.1
|
|
56
|
+
|
|
57
|
+
### Patch Changes
|
|
58
|
+
|
|
59
|
+
- [`19a6557`](https://github.com/joelhooks/swarm-tools/commit/19a6557cee9878858e7f61e2aba86b37a3ec10ad) Thanks [@joelhooks](https://github.com/joelhooks)! - ## 🐝 Eval Quality Gates: Signal Over Noise
|
|
60
|
+
|
|
61
|
+
The eval system now filters coordinator sessions to focus on high-quality data.
|
|
62
|
+
|
|
63
|
+
**Problem:** 67 of 82 captured sessions had <3 events - noise from aborted runs, test pokes, and incomplete swarms. This diluted eval scores and made metrics unreliable.
|
|
64
|
+
|
|
65
|
+
**Solution:** Quality filters applied BEFORE sampling:
|
|
66
|
+
|
|
67
|
+
| Filter | Default | Purpose |
|
|
68
|
+
| -------------------- | ------- | --------------------------------- |
|
|
69
|
+
| `minEvents` | 3 | Skip incomplete/aborted sessions |
|
|
70
|
+
| `requireWorkerSpawn` | true | Ensure coordinator delegated work |
|
|
71
|
+
| `requireReview` | true | Ensure full swarm lifecycle |
|
|
72
|
+
|
|
73
|
+
**Impact:**
|
|
74
|
+
|
|
75
|
+
- Filters 93 noisy sessions automatically
|
|
76
|
+
- Overall eval score: 63% → 71% (true signal, not diluted)
|
|
77
|
+
- Coordinator discipline: 47% → 57% (accurate measurement)
|
|
78
|
+
|
|
79
|
+
**Usage:**
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
// Default: high-quality sessions only
|
|
83
|
+
const sessions = await loadCapturedSessions();
|
|
84
|
+
|
|
85
|
+
// Override for specific analysis
|
|
86
|
+
const allSessions = await loadCapturedSessions({
|
|
87
|
+
minEvents: 1,
|
|
88
|
+
requireWorkerSpawn: false,
|
|
89
|
+
requireReview: false,
|
|
90
|
+
});
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Includes 7 unit tests covering filter logic and edge cases.
|
|
94
|
+
|
|
95
|
+
## 0.39.0
|
|
96
|
+
|
|
97
|
+
### Minor Changes
|
|
98
|
+
|
|
99
|
+
- [`aa12943`](https://github.com/joelhooks/swarm-tools/commit/aa12943f3edc8d5e23878b22f44073e4c71367c5) Thanks [@joelhooks](https://github.com/joelhooks)! - ## 🐝 Eval-Driven Development: The System That Scores Itself
|
|
100
|
+
|
|
101
|
+
> "What gets measured gets managed." — Peter Drucker
|
|
102
|
+
> "What gets scored gets improved." — The Swarm
|
|
103
|
+
|
|
104
|
+
The plugin now evaluates its own output quality through a progressive gate system. Every compaction prompt gets scored, tracked, and learned from. Regressions become impossible to ignore.
|
|
105
|
+
|
|
106
|
+
### The Pipeline
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
CAPTURE → SCORE → STORE → GATE → LEARN → IMPROVE
|
|
110
|
+
↑ ↓
|
|
111
|
+
└──────────────────────────────────────┘
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### What's New
|
|
115
|
+
|
|
116
|
+
**Event Capture** (5 integration points)
|
|
117
|
+
|
|
118
|
+
- `detection_triggered` - When compaction is detected
|
|
119
|
+
- `prompt_generated` - Full LLM prompt captured
|
|
120
|
+
- `context_injected` - Final content before injection
|
|
121
|
+
- All events stored to `~/.config/swarm-tools/sessions/{session_id}.jsonl`
|
|
122
|
+
|
|
123
|
+
**5 Compaction Prompt Scorers**
|
|
124
|
+
|
|
125
|
+
- `epicIdSpecificity` - Real IDs, not placeholders (20%)
|
|
126
|
+
- `actionability` - Specific tool calls with values (20%)
|
|
127
|
+
- `coordinatorIdentity` - ASCII header + mandates (25%)
|
|
128
|
+
- `forbiddenToolsPresent` - Lists what NOT to do (15%)
|
|
129
|
+
- `postCompactionDiscipline` - First tool is correct (20%)
|
|
130
|
+
|
|
131
|
+
**Progressive Gates**
|
|
132
|
+
| Phase | Threshold | Behavior |
|
|
133
|
+
|-------|-----------|----------|
|
|
134
|
+
| Bootstrap | N/A | Always pass, building baseline |
|
|
135
|
+
| Stabilization | 0.6 | Warn but pass |
|
|
136
|
+
| Production | 0.7 | Fail CI on regression |
|
|
137
|
+
|
|
138
|
+
**CLI Commands**
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
swarm eval status # Current phase, thresholds, scores
|
|
142
|
+
swarm eval history # Trends with sparklines ▁▂▃▄▅▆▇█
|
|
143
|
+
swarm eval run [--ci] # Execute evals, gate check
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**CI Integration**
|
|
147
|
+
|
|
148
|
+
- Runs after tests pass
|
|
149
|
+
- Posts results as PR comment with emoji status
|
|
150
|
+
- Only fails in production phase with actual regression
|
|
151
|
+
|
|
152
|
+
**Learning Feedback Loop**
|
|
153
|
+
|
|
154
|
+
- Significant score drops auto-stored to semantic memory
|
|
155
|
+
- Future agents learn from past failures
|
|
156
|
+
- Pattern maturity tracking
|
|
157
|
+
|
|
158
|
+
### Breaking Changes
|
|
159
|
+
|
|
160
|
+
None. All new functionality is additive.
|
|
161
|
+
|
|
162
|
+
### Files Changed
|
|
163
|
+
|
|
164
|
+
- `src/eval-capture.ts` - Event capture with Zod schemas
|
|
165
|
+
- `src/eval-gates.ts` - Progressive gate logic
|
|
166
|
+
- `src/eval-history.ts` - Score tracking over time
|
|
167
|
+
- `src/eval-learning.ts` - Failure-to-learning extraction
|
|
168
|
+
- `src/compaction-prompt-scoring.ts` - 5 pure scoring functions
|
|
169
|
+
- `evals/compaction-prompt.eval.ts` - Evalite integration
|
|
170
|
+
- `bin/swarm.ts` - CLI commands
|
|
171
|
+
- `.github/workflows/ci.yml` - CI integration
|
|
172
|
+
|
|
173
|
+
### Test Coverage
|
|
174
|
+
|
|
175
|
+
- 422 new tests for eval-capture
|
|
176
|
+
- 48 CLI tests
|
|
177
|
+
- 7 integration tests for capture wiring
|
|
178
|
+
- All existing tests still passing
|
|
179
|
+
|
|
180
|
+
### Patch Changes
|
|
181
|
+
|
|
182
|
+
- Updated dependencies [[`aa12943`](https://github.com/joelhooks/swarm-tools/commit/aa12943f3edc8d5e23878b22f44073e4c71367c5)]:
|
|
183
|
+
- swarm-mail@1.5.2
|
|
184
|
+
|
|
3
185
|
## 0.38.0
|
|
4
186
|
|
|
5
187
|
### Minor Changes
|
package/README.md
CHANGED
|
@@ -242,27 +242,44 @@ bun run eval:run
|
|
|
242
242
|
# Run specific suites
|
|
243
243
|
bun run eval:decomposition # Task decomposition quality
|
|
244
244
|
bun run eval:coordinator # Coordinator protocol compliance
|
|
245
|
+
bun run eval:compaction # Compaction prompt quality
|
|
246
|
+
|
|
247
|
+
# Check eval status (progressive gates)
|
|
248
|
+
swarm eval status [eval-name]
|
|
249
|
+
|
|
250
|
+
# View history with trends
|
|
251
|
+
swarm eval history
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Progressive Gates:**
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
Phase Runs Gate Behavior
|
|
258
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
259
|
+
Bootstrap <10 ✅ Always pass (collect data)
|
|
260
|
+
Stabilization 10-50 ⚠️ Warn on >10% regression
|
|
261
|
+
Production >50 ❌ Fail on >5% regression
|
|
245
262
|
```
|
|
246
263
|
|
|
247
264
|
**What gets evaluated:**
|
|
248
265
|
|
|
249
|
-
| Eval Suite
|
|
250
|
-
|
|
251
|
-
| `swarm-decomposition` | Subtask independence, complexity balance, coverage, clarity
|
|
252
|
-
| `coordinator-session` | Violation count, spawn efficiency, review thoroughness
|
|
266
|
+
| Eval Suite | Measures | Data Source |
|
|
267
|
+
| --------------------- | ------------------------------------------------------------- | ------------------------------------------------ |
|
|
268
|
+
| `swarm-decomposition` | Subtask independence, complexity balance, coverage, clarity | Fixtures + `.opencode/eval-data.jsonl` |
|
|
269
|
+
| `coordinator-session` | Violation count, spawn efficiency, review thoroughness | `~/.config/swarm-tools/sessions/*.jsonl` |
|
|
270
|
+
| `compaction-prompt` | ID specificity, actionability, identity, forbidden tools | Session compaction events |
|
|
271
|
+
|
|
272
|
+
**Learning Feedback Loop:**
|
|
273
|
+
|
|
274
|
+
When eval scores drop >15% from baseline, failure context is automatically stored to semantic memory. Future prompts query these learnings for context.
|
|
253
275
|
|
|
254
276
|
**Data capture locations:**
|
|
255
277
|
- Decomposition inputs/outputs: `.opencode/eval-data.jsonl`
|
|
278
|
+
- Eval history: `.opencode/eval-history.jsonl`
|
|
256
279
|
- Coordinator sessions: `~/.config/swarm-tools/sessions/*.jsonl`
|
|
257
|
-
- Subtask outcomes: swarm-mail database
|
|
258
|
-
|
|
259
|
-
**Custom scorers:**
|
|
260
|
-
- Subtask independence (0-1): Files don't overlap between subtasks
|
|
261
|
-
- Complexity balance (0-1): Subtasks have similar estimated complexity
|
|
262
|
-
- Coverage completeness (0-1): Required files are covered
|
|
263
|
-
- Instruction clarity (0-1): Descriptions are specific and actionable
|
|
280
|
+
- Subtask outcomes: swarm-mail database
|
|
264
281
|
|
|
265
|
-
See [evals/README.md](./evals/README.md) for scorer details and how to write new evals.
|
|
282
|
+
See **[evals/README.md](./evals/README.md)** for full architecture, scorer details, CI integration, and how to write new evals.
|
|
266
283
|
|
|
267
284
|
---
|
|
268
285
|
|