opencode-swarm-plugin 0.38.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +27 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +182 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +881 -0
- package/bin/swarm.ts +686 -0
- package/dist/compaction-hook.d.ts +8 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-observability.d.ts +173 -0
- package/dist/compaction-observability.d.ts.map +1 -0
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +174 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +80 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16098 -651
- package/dist/plugin.js +16012 -756
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/schemas/task.d.ts +3 -3
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +702 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +166 -2
- package/evals/scorers/coordinator-discipline.ts +348 -15
- package/evals/scorers/index.test.ts +146 -0
- package/evals/scorers/index.ts +104 -0
- package/evals/swarm-decomposition.eval.ts +9 -2
- package/examples/commands/swarm.md +291 -21
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +315 -86
- package/src/compaction-observability.integration.test.ts +139 -0
- package/src/compaction-observability.test.ts +187 -0
- package/src/compaction-observability.ts +324 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +626 -1
- package/src/eval-capture.ts +286 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/eval-runner.test.ts +96 -0
- package/src/eval-runner.ts +356 -0
- package/src/hive.ts +34 -0
- package/src/index.ts +115 -2
- package/src/memory.test.ts +110 -0
- package/src/memory.ts +34 -0
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- package/dist/beads.d.ts +0 -386
- package/dist/beads.d.ts.map +0 -1
- package/dist/schemas/bead-events.d.ts +0 -698
- package/dist/schemas/bead-events.d.ts.map +0 -1
- package/dist/schemas/bead.d.ts +0 -255
- package/dist/schemas/bead.d.ts.map +0 -1
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
|
@@ -37,8 +37,15 @@
|
|
|
37
37
|
*
|
|
38
38
|
* This is NOT about preserving state for a human - it's about the swarm continuing
|
|
39
39
|
* autonomously after context compression.
|
|
40
|
+
*
|
|
41
|
+
* Structure optimized for eval scores:
|
|
42
|
+
* 1. ASCII header (visual anchor, coordinatorIdentity scorer)
|
|
43
|
+
* 2. Immediate actions (actionable tool calls, postCompactionDiscipline scorer)
|
|
44
|
+
* 3. Forbidden tools (explicit list, forbiddenToolsPresent scorer)
|
|
45
|
+
* 4. Role & mandates (strong language, coordinatorIdentity scorer)
|
|
46
|
+
* 5. Reference sections (supporting material)
|
|
40
47
|
*/
|
|
41
|
-
export declare const SWARM_COMPACTION_CONTEXT = "
|
|
48
|
+
export declare const SWARM_COMPACTION_CONTEXT = "\n\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 \u2502\n\u2502 \uD83D\uDC1D YOU ARE THE COORDINATOR \uD83D\uDC1D \u2502\n\u2502 \u2502\n\u2502 NOT A WORKER. NOT AN IMPLEMENTER. \u2502\n\u2502 YOU ORCHESTRATE. \u2502\n\u2502 \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n\nContext was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**\n\nYour role is ORCHESTRATION, not implementation. The resume steps above (if present) tell you exactly what to do first.\n\n---\n\n## \uD83D\uDEAB FORBIDDEN TOOLS (NEVER Use These Directly)\n\nCoordinators do NOT do implementation work. These tools are **FORBIDDEN**:\n\n### File Modification (ALWAYS spawn workers instead)\n- `Edit` - SPAWN A WORKER\n- `Write` - SPAWN A WORKER\n- `bash` (for file modifications) - SPAWN A WORKER\n- `swarmmail_reserve` - Workers reserve their own files\n- `git commit` - Workers commit their own changes\n\n### External Data Fetching (SPAWN A RESEARCHER instead)\n\n**Repository fetching:**\n- `repo-crawl_file`, `repo-crawl_readme`, `repo-crawl_search`, `repo-crawl_structure`, `repo-crawl_tree`\n- `repo-autopsy_*` (all repo-autopsy tools)\n\n**Web/documentation fetching:**\n- `webfetch`, `fetch_fetch`\n- `context7_resolve-library-id`, `context7_get-library-docs`\n\n**Knowledge base:**\n- `pdf-brain_search`, `pdf-brain_read`\n\n**Instead:** Use `swarm_spawn_researcher` with a clear research task. The researcher will fetch, summarize, and return findings.\n\n---\n\n## \uD83D\uDCBC YOUR ROLE (Non-Negotiable)\n\nYou are the **COORDINATOR**. Your job is ORCHESTRATION, not implementation.\n\n### What Coordinators Do:\n- \u2705 Spawn workers for implementation tasks\n- \u2705 Monitor worker progress via `swarm_status` and `swarmmail_inbox`\n- \u2705 Review completed work with `swarm_review`\n- \u2705 Unblock dependencies and resolve conflicts\n- \u2705 Close the loop when epics complete\n\n### What Coordinators NEVER Do:\n- \u274C **NEVER** edit or write files directly\n- \u274C **NEVER** run tests with `bash`\n- \u274C **NEVER** \"just do it myself to save time\"\n- \u274C **NEVER** reserve files (workers reserve)\n- \u274C **NEVER** fetch external data directly (spawn researchers)\n\n**If you catch yourself about to edit a file, STOP. Use `swarm_spawn_subtask` instead.**\n\n### Strong Mandates:\n- **ALWAYS** spawn workers for implementation tasks\n- **ALWAYS** check status and inbox before decisions\n- **ALWAYS** review worker output before accepting\n- **NON-NEGOTIABLE:** You orchestrate. You do NOT implement.\n\n---\n\n## \uD83D\uDCDD SUMMARY FORMAT (Preserve This State)\n\nWhen compaction occurs, extract and preserve this structure:\n\n```\n## \uD83D\uDC1D Swarm State\n\n**Epic:** CELL_ID - TITLE\n**Project:** PROJECT_PATH\n**Progress:** X/Y subtasks complete\n\n**Active:**\n- CELL_ID: TITLE [in_progress] \u2192 AGENT working on FILES\n\n**Blocked:**\n- CELL_ID: TITLE - BLOCKED: REASON\n\n**Completed:**\n- CELL_ID: TITLE \u2713\n\n**Ready to Spawn:**\n- CELL_ID: TITLE (files: FILES)\n```\n\n### What to Extract:\n1. **Epic & Subtasks** - IDs, titles, status, file assignments\n2. **What's Running** - Active agents and their current work\n3. **What's Blocked** - Blockers and what's needed to unblock\n4. **What's Done** - Completed work and follow-ups\n5. **What's Next** - Pending subtasks ready to spawn\n\n---\n\n## \uD83D\uDCCB REFERENCE: Full Coordinator Workflow\n\nYou are ALWAYS swarming. Use this workflow for any new work:\n\n### Phase 1.5: Research (For Complex Tasks)\n\nIf the task requires unfamiliar technologies, spawn a researcher FIRST:\n\n```\nswarm_spawn_researcher(\n research_id=\"research-TOPIC\",\n epic_id=\"mjkw...\", # your epic ID\n tech_stack=[\"TECHNOLOGY\"],\n project_path=\"PROJECT_PATH\"\n)\n// Then spawn with Task(subagent_type=\"swarm/researcher\", prompt=\"...\")\n```\n\n### Phase 2: Knowledge Gathering\n\n```\nsemantic-memory_find(query=\"TASK_KEYWORDS\", limit=5) # Past learnings\ncass_search(query=\"TASK_DESCRIPTION\", limit=5) # Similar past tasks\nskills_list() # Available skills\n```\n\n### Phase 3: Decompose\n\n```\nswarm_select_strategy(task=\"TASK\")\nswarm_plan_prompt(task=\"TASK\", context=\"KNOWLEDGE\")\nswarm_validate_decomposition(response=\"CELLTREE_JSON\")\n```\n\n### Phase 4: Create Cells\n\n`hive_create_epic(epic_title=\"TASK\", subtasks=[...])`\n\n### Phase 5: File Reservations\n\n> **\u26A0\uFE0F Coordinator NEVER reserves files.** Workers reserve their own files with `swarmmail_reserve`.\n\n### Phase 6: Spawn Workers\n\n```\nswarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)\nTask(subagent_type=\"swarm/worker\", prompt=\"GENERATED_PROMPT\")\n```\n\n### Phase 7: Review Loop (MANDATORY)\n\n**AFTER EVERY Task() RETURNS:**\n\n1. `swarmmail_inbox()` - Check for messages\n2. `swarm_review(project_key, epic_id, task_id, files_touched)` - Generate review\n3. Evaluate against epic goals\n4. `swarm_review_feedback(project_key, task_id, worker_id, status, issues)`\n\n**If needs_changes:**\n```\nswarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)\n// Spawn NEW worker with Task() using retry prompt\n// Max 3 attempts before marking task blocked\n```\n\n### Phase 8: Complete\n\n`hive_sync()` - Sync all cells to git\n\n---\n\n## \uD83D\uDCCA REFERENCE: Decomposition Strategies\n\n| Strategy | Best For | Keywords |\n| -------------- | ------------------------ | -------------------------------------- |\n| file-based | Refactoring, migrations | refactor, migrate, rename, update all |\n| feature-based | New features | add, implement, build, create, feature |\n| risk-based | Bug fixes, security | fix, bug, security, critical, urgent |\n\n---\n\n**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**\n";
|
|
42
49
|
/**
|
|
43
50
|
* Fallback detection prompt - tells the compactor what to look for
|
|
44
51
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compaction-hook.d.ts","sourceRoot":"","sources":["../src/compaction-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;
|
|
1
|
+
{"version":3,"file":"compaction-hook.d.ts","sourceRoot":"","sources":["../src/compaction-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAwCH;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,wBAAwB,mwNA2LpC,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,wBAAwB,0nCAiCpC,CAAC;AA2FF;;;;;;;;GAQG;AACH,MAAM,MAAM,cAAc,GAAG,OAAO,CAAC;AAErC;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,GAAG,CACX,MAAM,EACN;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CACrE,CAAC;IACF,UAAU,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,OAAO,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC;CACjE;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,cAAc,EACtB,SAAS,EAAE,MAAM,EACjB,KAAK,GAAE,MAAY,GAClB,OAAO,CAAC,iBAAiB,CAAC,CAgJ5B;AAwVD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,IAExD,OAAO;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,EAC5B,QAAQ;IAAE,OAAO,EAAE,MAAM,EAAE,CAAA;CAAE,KAC5B,OAAO,CAAC,IAAI,CAAC,CAqLjB"}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compaction Hook Observability
|
|
3
|
+
*
|
|
4
|
+
* Structured logging, metrics, and queryable history for the pre-compaction hook.
|
|
5
|
+
*
|
|
6
|
+
* **Philosophy:** Make the invisible visible. When patterns aren't extracted,
|
|
7
|
+
* when detection fails, when timing explodes - we need to know WHY.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* const metrics = createMetricsCollector({ session_id: "abc123" });
|
|
12
|
+
*
|
|
13
|
+
* recordPhaseStart(metrics, CompactionPhase.DETECT);
|
|
14
|
+
* // ... detection logic ...
|
|
15
|
+
* recordPhaseComplete(metrics, CompactionPhase.DETECT, { confidence: "high" });
|
|
16
|
+
*
|
|
17
|
+
* recordPatternExtracted(metrics, "epic_state", "Found epic bd-123");
|
|
18
|
+
*
|
|
19
|
+
* const summary = getMetricsSummary(metrics);
|
|
20
|
+
* console.log(`Detected: ${summary.detected}, Confidence: ${summary.confidence}`);
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
/**
|
|
24
|
+
* Compaction phases - aligned with existing log structure
|
|
25
|
+
*
|
|
26
|
+
* From compaction-hook.ts:
|
|
27
|
+
* - START: session_id, trigger
|
|
28
|
+
* - GATHER: source (swarm-mail|hive), duration_ms, stats/counts
|
|
29
|
+
* - DETECT: confidence, detected, reason_count, reasons
|
|
30
|
+
* - INJECT: confidence, context_length, context_type (full|fallback|none)
|
|
31
|
+
* - COMPLETE: duration_ms, success, detected, confidence, context_injected
|
|
32
|
+
*/
|
|
33
|
+
export declare enum CompactionPhase {
|
|
34
|
+
START = "START",
|
|
35
|
+
GATHER_SWARM_MAIL = "GATHER_SWARM_MAIL",
|
|
36
|
+
GATHER_HIVE = "GATHER_HIVE",
|
|
37
|
+
DETECT = "DETECT",
|
|
38
|
+
INJECT = "INJECT",
|
|
39
|
+
COMPLETE = "COMPLETE"
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Phase timing and outcome
|
|
43
|
+
*/
|
|
44
|
+
interface PhaseMetrics {
|
|
45
|
+
duration_ms: number;
|
|
46
|
+
success: boolean;
|
|
47
|
+
error?: string;
|
|
48
|
+
/** Additional phase-specific data */
|
|
49
|
+
metadata?: Record<string, unknown>;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Pattern extraction record
|
|
53
|
+
*/
|
|
54
|
+
interface PatternRecord {
|
|
55
|
+
pattern_type: string;
|
|
56
|
+
reason: string;
|
|
57
|
+
/** Debug details (only captured if debug mode enabled) */
|
|
58
|
+
details?: Record<string, unknown>;
|
|
59
|
+
timestamp: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Compaction metrics collector
|
|
63
|
+
*
|
|
64
|
+
* Mutable state object that accumulates metrics during a compaction run.
|
|
65
|
+
*/
|
|
66
|
+
export interface CompactionMetrics {
|
|
67
|
+
/** Session metadata */
|
|
68
|
+
session_id?: string;
|
|
69
|
+
has_sdk_client?: boolean;
|
|
70
|
+
debug?: boolean;
|
|
71
|
+
/** Phase timings */
|
|
72
|
+
phases: Map<CompactionPhase, {
|
|
73
|
+
start_time: number;
|
|
74
|
+
end_time?: number;
|
|
75
|
+
metadata?: Record<string, unknown>;
|
|
76
|
+
error?: string;
|
|
77
|
+
}>;
|
|
78
|
+
/** Pattern extraction tracking */
|
|
79
|
+
extracted: PatternRecord[];
|
|
80
|
+
skipped: PatternRecord[];
|
|
81
|
+
/** Final detection result */
|
|
82
|
+
confidence?: "high" | "medium" | "low" | "none";
|
|
83
|
+
detected?: boolean;
|
|
84
|
+
/** Overall timing */
|
|
85
|
+
start_time: number;
|
|
86
|
+
end_time?: number;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Metrics summary (read-only snapshot)
|
|
90
|
+
*/
|
|
91
|
+
export interface CompactionMetricsSummary {
|
|
92
|
+
session_id?: string;
|
|
93
|
+
has_sdk_client?: boolean;
|
|
94
|
+
/** Phase breakdown */
|
|
95
|
+
phases: Record<string, PhaseMetrics>;
|
|
96
|
+
/** Pattern extraction stats */
|
|
97
|
+
patterns_extracted: number;
|
|
98
|
+
patterns_skipped: number;
|
|
99
|
+
extraction_success_rate: number;
|
|
100
|
+
extracted_patterns: string[];
|
|
101
|
+
skipped_patterns: string[];
|
|
102
|
+
/** Detection outcome */
|
|
103
|
+
confidence?: "high" | "medium" | "low" | "none";
|
|
104
|
+
detected?: boolean;
|
|
105
|
+
/** Timing */
|
|
106
|
+
total_duration_ms: number;
|
|
107
|
+
/** Debug info (only if debug mode enabled) */
|
|
108
|
+
debug_info?: Array<{
|
|
109
|
+
phase: string;
|
|
110
|
+
pattern: string;
|
|
111
|
+
details: Record<string, unknown>;
|
|
112
|
+
}>;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Create a metrics collector
|
|
116
|
+
*
|
|
117
|
+
* @param metadata - Session metadata to capture
|
|
118
|
+
* @returns Mutable metrics collector
|
|
119
|
+
*/
|
|
120
|
+
export declare function createMetricsCollector(metadata?: {
|
|
121
|
+
session_id?: string;
|
|
122
|
+
has_sdk_client?: boolean;
|
|
123
|
+
debug?: boolean;
|
|
124
|
+
}): CompactionMetrics;
|
|
125
|
+
/**
|
|
126
|
+
* Record phase start
|
|
127
|
+
*
|
|
128
|
+
* @param metrics - Metrics collector
|
|
129
|
+
* @param phase - Phase being started
|
|
130
|
+
*/
|
|
131
|
+
export declare function recordPhaseStart(metrics: CompactionMetrics, phase: CompactionPhase): void;
|
|
132
|
+
/**
|
|
133
|
+
* Record phase completion
|
|
134
|
+
*
|
|
135
|
+
* @param metrics - Metrics collector
|
|
136
|
+
* @param phase - Phase being completed
|
|
137
|
+
* @param result - Phase outcome
|
|
138
|
+
*/
|
|
139
|
+
export declare function recordPhaseComplete(metrics: CompactionMetrics, phase: CompactionPhase, result?: {
|
|
140
|
+
success?: boolean;
|
|
141
|
+
error?: string;
|
|
142
|
+
confidence?: "high" | "medium" | "low" | "none";
|
|
143
|
+
detected?: boolean;
|
|
144
|
+
[key: string]: unknown;
|
|
145
|
+
}): void;
|
|
146
|
+
/**
|
|
147
|
+
* Record an extracted pattern
|
|
148
|
+
*
|
|
149
|
+
* @param metrics - Metrics collector
|
|
150
|
+
* @param pattern_type - Type of pattern extracted (e.g., "epic_state", "agent_name")
|
|
151
|
+
* @param reason - Human-readable reason for extraction
|
|
152
|
+
* @param details - Debug details (only captured if debug mode enabled)
|
|
153
|
+
*/
|
|
154
|
+
export declare function recordPatternExtracted(metrics: CompactionMetrics, pattern_type: string, reason: string, details?: Record<string, unknown>): void;
|
|
155
|
+
/**
|
|
156
|
+
* Record a skipped pattern
|
|
157
|
+
*
|
|
158
|
+
* @param metrics - Metrics collector
|
|
159
|
+
* @param pattern_type - Type of pattern that was skipped
|
|
160
|
+
* @param reason - Human-readable reason for skipping
|
|
161
|
+
*/
|
|
162
|
+
export declare function recordPatternSkipped(metrics: CompactionMetrics, pattern_type: string, reason: string): void;
|
|
163
|
+
/**
|
|
164
|
+
* Get metrics summary (read-only snapshot)
|
|
165
|
+
*
|
|
166
|
+
* Computes derived metrics like success rates and total duration.
|
|
167
|
+
*
|
|
168
|
+
* @param metrics - Metrics collector
|
|
169
|
+
* @returns Immutable summary
|
|
170
|
+
*/
|
|
171
|
+
export declare function getMetricsSummary(metrics: CompactionMetrics): CompactionMetricsSummary;
|
|
172
|
+
export {};
|
|
173
|
+
//# sourceMappingURL=compaction-observability.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compaction-observability.d.ts","sourceRoot":"","sources":["../src/compaction-observability.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;;;;;;;;GASG;AACH,oBAAY,eAAe;IACzB,KAAK,UAAU;IACf,iBAAiB,sBAAsB;IACvC,WAAW,gBAAgB;IAC3B,MAAM,WAAW;IACjB,MAAM,WAAW;IACjB,QAAQ,aAAa;CACtB;AAED;;GAEG;AACH,UAAU,YAAY;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;GAIG;AACH,MAAM,WAAW,iBAAiB;IAChC,uBAAuB;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,KAAK,CAAC,EAAE,OAAO,CAAC;IAEhB,oBAAoB;IACpB,MAAM,EAAE,GAAG,CAAC,eAAe,EAAE;QAC3B,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC,CAAC;IAEH,kCAAkC;IAClC,SAAS,EAAE,aAAa,EAAE,CAAC;IAC3B,OAAO,EAAE,aAAa,EAAE,CAAC;IAEzB,6BAA6B;IAC7B,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;IAChD,QAAQ,CAAC,EAAE,OAAO,CAAC;IAEnB,qBAAqB;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAErC,+BAA+B;IAC/B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;IAChC,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAE3B,wBAAwB;IACxB,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;IAChD,QAAQ,CAAC,EAAE,OAAO,CAAC;IAEnB,aAAa;IACb,iBAAiB,EAAE,MAAM,CAAC;IAE1B,8CAA8C;IAC9C,UAAU,CAAC,EAAE,KAAK,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAClC,CAAC,CAAC;CACJ;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,CAAC,EAAE;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB,GAAG,iBAAiB,CAUpB;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,iBAAiB,EAC1B,KAAK,EAAE,eAAe,GACrB,IAAI,CAIN;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,iBAAiB,EAC1B,KAAK,EAAE,eAAe,EACtB,MAAM,CAAC,EAAE;IACP,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;IAChD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB,GACA,IAAI,CAmCN;AAED;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,iBAAiB,EAC1B,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAChC,IAAI,CAYN;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,iBAAiB,EAC1B,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,MAAM,GACb,IAAI,CAMN;AAED;;;;;;;GAOG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,iBAAiB,GAAG,wBAAwB,CAkDtF"}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compaction Prompt Quality Scoring - Pure Functions
|
|
3
|
+
*
|
|
4
|
+
* Evaluates the quality of continuation prompts generated after context compaction.
|
|
5
|
+
* **Problem**: Post-compaction coordinators often "wake up" confused, forget their role,
|
|
6
|
+
* and start editing files instead of checking worker status.
|
|
7
|
+
*
|
|
8
|
+
* **Solution**: Score prompts on 5 dimensions that predict coordinator success:
|
|
9
|
+
*
|
|
10
|
+
* 1. **Epic ID Specificity (0.20)**: Real IDs (`mjkw...`) not placeholders (`<epic-id>`, `bd-xxx`)
|
|
11
|
+
* - Placeholders = coordinator can't check actual swarm status
|
|
12
|
+
*
|
|
13
|
+
* 2. **Actionability (0.20)**: Tool calls with real values (e.g., `swarm_status(epic_id='mjkw81rkq4c')`)
|
|
14
|
+
* - Generic instructions like "check status" don't work
|
|
15
|
+
*
|
|
16
|
+
* 3. **Coordinator Identity (0.25)**: ASCII header + strong mandates (NEVER/ALWAYS)
|
|
17
|
+
* - Visual + semantic cues reinforce role post-compaction
|
|
18
|
+
*
|
|
19
|
+
* 4. **Forbidden Tools Listed (0.15)**: Explicitly lists Edit, Write, swarmmail_reserve, git commit
|
|
20
|
+
* - Naming forbidden tools reduces violations
|
|
21
|
+
*
|
|
22
|
+
* 5. **Post-Compaction Discipline (0.20)**: First suggested tool is swarm_status or inbox (not Edit)
|
|
23
|
+
* - First tool sets the pattern - "check status" vs "dive into code"
|
|
24
|
+
*
|
|
25
|
+
* **Pure functions**: These can be tested without evalite. The evalite wrappers are in
|
|
26
|
+
* `evals/scorers/compaction-prompt-scorers.ts`.
|
|
27
|
+
*
|
|
28
|
+
* **Data source**: Captured from `captureCompactionEvent()` with `compaction_type: "prompt_generated"`.
|
|
29
|
+
* The payload includes the FULL prompt content (not truncated) for scoring.
|
|
30
|
+
*
|
|
31
|
+
* **Integration**: `compaction-prompt.eval.ts` uses these scorers to track prompt quality over time.
|
|
32
|
+
* Progressive gates enforce quality: bootstrap → stabilization → production.
|
|
33
|
+
*
|
|
34
|
+
* @module compaction-prompt-scoring
|
|
35
|
+
*/
|
|
36
|
+
/**
|
|
37
|
+
* Compaction prompt structure (from LLM generation)
|
|
38
|
+
*/
|
|
39
|
+
export interface CompactionPrompt {
|
|
40
|
+
content: string;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Scorer result type
|
|
44
|
+
*/
|
|
45
|
+
export interface ScorerResult {
|
|
46
|
+
score: number;
|
|
47
|
+
message: string;
|
|
48
|
+
}
|
|
49
|
+
/** Matches real epic/cell IDs (mjkw prefix + 7+ base36 chars) */
|
|
50
|
+
export declare const REAL_EPIC_ID: RegExp;
|
|
51
|
+
/** Matches common placeholder patterns */
|
|
52
|
+
export declare const PLACEHOLDERS: RegExp[];
|
|
53
|
+
/** Matches ASCII box-drawing characters (for headers) */
|
|
54
|
+
export declare const ASCII_BOX: RegExp;
|
|
55
|
+
/** Matches strong mandate language */
|
|
56
|
+
export declare const STRONG_LANGUAGE: RegExp[];
|
|
57
|
+
/**
|
|
58
|
+
* Score epic ID specificity
|
|
59
|
+
*
|
|
60
|
+
* Validates that epic IDs are REAL, not placeholders.
|
|
61
|
+
* Placeholders like <epic-id>, bd-xxx, <path> indicate
|
|
62
|
+
* the prompt generator failed to inject actual values.
|
|
63
|
+
*
|
|
64
|
+
* @returns 1.0 if real IDs, 0.0 if placeholders found
|
|
65
|
+
*/
|
|
66
|
+
export declare function scoreEpicIdSpecificity(prompt: CompactionPrompt): ScorerResult;
|
|
67
|
+
/**
|
|
68
|
+
* Score actionability of tool calls
|
|
69
|
+
*
|
|
70
|
+
* Validates that the prompt includes SPECIFIC actionable tool calls.
|
|
71
|
+
* Generic instructions like "check status" are useless.
|
|
72
|
+
* Good: swarm_status(epic_id='mjkw81rkq4c', project_key='/path')
|
|
73
|
+
* Bad: "Check the status of workers"
|
|
74
|
+
*
|
|
75
|
+
* @returns 1.0 if actionable tool calls with real values, 0.0 otherwise
|
|
76
|
+
*/
|
|
77
|
+
export declare function scoreActionability(prompt: CompactionPrompt): ScorerResult;
|
|
78
|
+
/**
|
|
79
|
+
* Score coordinator identity reinforcement
|
|
80
|
+
*
|
|
81
|
+
* Validates that the prompt has STRONG coordinator identity reinforcement.
|
|
82
|
+
* Post-compaction coordinators lose their identity without visual+semantic cues.
|
|
83
|
+
*
|
|
84
|
+
* Checks:
|
|
85
|
+
* 1. ASCII box header (visual anchor)
|
|
86
|
+
* 2. Strong language (NEVER/ALWAYS, not "should"/"consider")
|
|
87
|
+
*
|
|
88
|
+
* @returns 1.0 for ASCII header + strong mandates, 0.5 for header only, 0.0 otherwise
|
|
89
|
+
*/
|
|
90
|
+
export declare function scoreCoordinatorIdentity(prompt: CompactionPrompt): ScorerResult;
|
|
91
|
+
/**
|
|
92
|
+
* Score forbidden tools listing
|
|
93
|
+
*
|
|
94
|
+
* Validates that the prompt LISTS forbidden tools by name.
|
|
95
|
+
* Coordinators must know exactly which tools to avoid.
|
|
96
|
+
*
|
|
97
|
+
* Required forbidden tools:
|
|
98
|
+
* 1. Edit
|
|
99
|
+
* 2. Write
|
|
100
|
+
* 3. swarmmail_reserve (only workers reserve)
|
|
101
|
+
* 4. git commit (workers commit)
|
|
102
|
+
*
|
|
103
|
+
* @returns ratio of forbidden tools mentioned (0.0 to 1.0)
|
|
104
|
+
*/
|
|
105
|
+
export declare function scoreForbiddenToolsPresent(prompt: CompactionPrompt): ScorerResult;
|
|
106
|
+
/**
|
|
107
|
+
* Score post-compaction discipline (first tool correctness)
|
|
108
|
+
*
|
|
109
|
+
* Validates that the FIRST suggested tool is correct.
|
|
110
|
+
* Coordinators should check status FIRST, not edit files.
|
|
111
|
+
*
|
|
112
|
+
* Good first tools:
|
|
113
|
+
* - swarm_status
|
|
114
|
+
* - swarmmail_inbox
|
|
115
|
+
*
|
|
116
|
+
* Bad first tools:
|
|
117
|
+
* - Edit
|
|
118
|
+
* - Write
|
|
119
|
+
* - Read (should check status first)
|
|
120
|
+
*
|
|
121
|
+
* @returns 1.0 if first tool is swarm_status or inbox, 0.0 otherwise
|
|
122
|
+
*/
|
|
123
|
+
export declare function scorePostCompactionDiscipline(prompt: CompactionPrompt): ScorerResult;
|
|
124
|
+
//# sourceMappingURL=compaction-prompt-scoring.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compaction-prompt-scoring.d.ts","sourceRoot":"","sources":["../src/compaction-prompt-scoring.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CAChB;AAID,iEAAiE;AACjE,eAAO,MAAM,YAAY,QAAqB,CAAC;AAE/C,0CAA0C;AAC1C,eAAO,MAAM,YAAY,UAKxB,CAAC;AAEF,yDAAyD;AACzD,eAAO,MAAM,SAAS,QAAiB,CAAC;AAExC,sCAAsC;AACtC,eAAO,MAAM,eAAe,UAAoD,CAAC;AAIjF;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,gBAAgB,GAAG,YAAY,CAuB7E;AAED;;;;;;;;;GASG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,gBAAgB,GAAG,YAAY,CA+BzE;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,wBAAwB,CACvC,MAAM,EAAE,gBAAgB,GACtB,YAAY,CA6Bd;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,0BAA0B,CACzC,MAAM,EAAE,gBAAgB,GACtB,YAAY,CAiCd;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,6BAA6B,CAC5C,MAAM,EAAE,gBAAgB,GACtB,YAAY,CAiCd"}
|
package/dist/eval-capture.d.ts
CHANGED
|
@@ -70,7 +70,7 @@ export type PartialEvalRecord = Partial<EvalRecord> & {
|
|
|
70
70
|
task: string;
|
|
71
71
|
};
|
|
72
72
|
/**
|
|
73
|
-
* Coordinator Event - captures coordinator decisions, violations, and
|
|
73
|
+
* Coordinator Event - captures coordinator decisions, violations, outcomes, and compaction
|
|
74
74
|
*/
|
|
75
75
|
export declare const CoordinatorEventSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
76
76
|
session_id: z.ZodString;
|
|
@@ -82,6 +82,12 @@ export declare const CoordinatorEventSchema: z.ZodDiscriminatedUnion<[z.ZodObjec
|
|
|
82
82
|
worker_spawned: "worker_spawned";
|
|
83
83
|
review_completed: "review_completed";
|
|
84
84
|
decomposition_complete: "decomposition_complete";
|
|
85
|
+
researcher_spawned: "researcher_spawned";
|
|
86
|
+
skill_loaded: "skill_loaded";
|
|
87
|
+
inbox_checked: "inbox_checked";
|
|
88
|
+
blocker_resolved: "blocker_resolved";
|
|
89
|
+
scope_change_approved: "scope_change_approved";
|
|
90
|
+
scope_change_rejected: "scope_change_rejected";
|
|
85
91
|
}>;
|
|
86
92
|
payload: z.ZodAny;
|
|
87
93
|
}, z.core.$strip>, z.ZodObject<{
|
|
@@ -106,6 +112,20 @@ export declare const CoordinatorEventSchema: z.ZodDiscriminatedUnion<[z.ZodObjec
|
|
|
106
112
|
subtask_retry: "subtask_retry";
|
|
107
113
|
subtask_failed: "subtask_failed";
|
|
108
114
|
epic_complete: "epic_complete";
|
|
115
|
+
blocker_detected: "blocker_detected";
|
|
116
|
+
}>;
|
|
117
|
+
payload: z.ZodAny;
|
|
118
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
119
|
+
session_id: z.ZodString;
|
|
120
|
+
epic_id: z.ZodString;
|
|
121
|
+
timestamp: z.ZodString;
|
|
122
|
+
event_type: z.ZodLiteral<"COMPACTION">;
|
|
123
|
+
compaction_type: z.ZodEnum<{
|
|
124
|
+
detection_complete: "detection_complete";
|
|
125
|
+
prompt_generated: "prompt_generated";
|
|
126
|
+
context_injected: "context_injected";
|
|
127
|
+
resumption_started: "resumption_started";
|
|
128
|
+
tool_call_tracked: "tool_call_tracked";
|
|
109
129
|
}>;
|
|
110
130
|
payload: z.ZodAny;
|
|
111
131
|
}, z.core.$strip>], "event_type">;
|
|
@@ -128,6 +148,12 @@ export declare const CoordinatorSessionSchema: z.ZodObject<{
|
|
|
128
148
|
worker_spawned: "worker_spawned";
|
|
129
149
|
review_completed: "review_completed";
|
|
130
150
|
decomposition_complete: "decomposition_complete";
|
|
151
|
+
researcher_spawned: "researcher_spawned";
|
|
152
|
+
skill_loaded: "skill_loaded";
|
|
153
|
+
inbox_checked: "inbox_checked";
|
|
154
|
+
blocker_resolved: "blocker_resolved";
|
|
155
|
+
scope_change_approved: "scope_change_approved";
|
|
156
|
+
scope_change_rejected: "scope_change_rejected";
|
|
131
157
|
}>;
|
|
132
158
|
payload: z.ZodAny;
|
|
133
159
|
}, z.core.$strip>, z.ZodObject<{
|
|
@@ -152,6 +178,20 @@ export declare const CoordinatorSessionSchema: z.ZodObject<{
|
|
|
152
178
|
subtask_retry: "subtask_retry";
|
|
153
179
|
subtask_failed: "subtask_failed";
|
|
154
180
|
epic_complete: "epic_complete";
|
|
181
|
+
blocker_detected: "blocker_detected";
|
|
182
|
+
}>;
|
|
183
|
+
payload: z.ZodAny;
|
|
184
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
185
|
+
session_id: z.ZodString;
|
|
186
|
+
epic_id: z.ZodString;
|
|
187
|
+
timestamp: z.ZodString;
|
|
188
|
+
event_type: z.ZodLiteral<"COMPACTION">;
|
|
189
|
+
compaction_type: z.ZodEnum<{
|
|
190
|
+
detection_complete: "detection_complete";
|
|
191
|
+
prompt_generated: "prompt_generated";
|
|
192
|
+
context_injected: "context_injected";
|
|
193
|
+
resumption_started: "resumption_started";
|
|
194
|
+
tool_call_tracked: "tool_call_tracked";
|
|
155
195
|
}>;
|
|
156
196
|
payload: z.ZodAny;
|
|
157
197
|
}, z.core.$strip>], "event_type">>;
|
|
@@ -294,6 +334,139 @@ export declare function ensureSessionDir(): void;
|
|
|
294
334
|
* Appends the event as a JSONL line to ~/.config/swarm-tools/sessions/{session_id}.jsonl
|
|
295
335
|
*/
|
|
296
336
|
export declare function captureCoordinatorEvent(event: CoordinatorEvent): void;
|
|
337
|
+
/**
|
|
338
|
+
* Capture a compaction event to the session file
|
|
339
|
+
*
|
|
340
|
+
* Helper for capturing COMPACTION events with automatic timestamp generation.
|
|
341
|
+
* Tracks compaction hook lifecycle: detection → prompt generation → context injection → resumption.
|
|
342
|
+
*
|
|
343
|
+
* **Part of eval-driven development pipeline:** Compaction events are used by `compaction-prompt.eval.ts`
|
|
344
|
+
* to score prompt quality (ID specificity, actionability, coordinator identity).
|
|
345
|
+
*
|
|
346
|
+
* **Lifecycle stages:**
|
|
347
|
+
* - `detection_complete` - Compaction detected (confidence level, context type)
|
|
348
|
+
* - `prompt_generated` - Continuation prompt created (FULL content stored for eval)
|
|
349
|
+
* - `context_injected` - Prompt injected into OpenCode context
|
|
350
|
+
* - `resumption_started` - Coordinator resumed from checkpoint
|
|
351
|
+
* - `tool_call_tracked` - First tool called post-compaction (measures discipline)
|
|
352
|
+
*
|
|
353
|
+
* @param params - Compaction event parameters
|
|
354
|
+
* @param params.session_id - Coordinator session ID
|
|
355
|
+
* @param params.epic_id - Epic ID being coordinated
|
|
356
|
+
* @param params.compaction_type - Stage of compaction lifecycle
|
|
357
|
+
* @param params.payload - Event-specific data (full prompt content, detection results, etc.)
|
|
358
|
+
*
|
|
359
|
+
* @example
|
|
360
|
+
* // Capture detection complete
|
|
361
|
+
* captureCompactionEvent({
|
|
362
|
+
* session_id: "session-123",
|
|
363
|
+
* epic_id: "bd-456",
|
|
364
|
+
* compaction_type: "detection_complete",
|
|
365
|
+
* payload: {
|
|
366
|
+
* confidence: "high",
|
|
367
|
+
* context_type: "full",
|
|
368
|
+
* epic_id: "bd-456",
|
|
369
|
+
* },
|
|
370
|
+
* });
|
|
371
|
+
*
|
|
372
|
+
* @example
|
|
373
|
+
* // Capture prompt generated (with full content for eval)
|
|
374
|
+
* captureCompactionEvent({
|
|
375
|
+
* session_id: "session-123",
|
|
376
|
+
* epic_id: "bd-456",
|
|
377
|
+
* compaction_type: "prompt_generated",
|
|
378
|
+
* payload: {
|
|
379
|
+
* prompt_length: 5000,
|
|
380
|
+
* full_prompt: "You are a coordinator...", // Full prompt, not truncated - used for quality scoring
|
|
381
|
+
* context_type: "full",
|
|
382
|
+
* },
|
|
383
|
+
* });
|
|
384
|
+
*/
|
|
385
|
+
export declare function captureCompactionEvent(params: {
|
|
386
|
+
session_id: string;
|
|
387
|
+
epic_id: string;
|
|
388
|
+
compaction_type: "detection_complete" | "prompt_generated" | "context_injected" | "resumption_started" | "tool_call_tracked";
|
|
389
|
+
payload: any;
|
|
390
|
+
}): void;
|
|
391
|
+
/**
|
|
392
|
+
* Capture a researcher spawned event
|
|
393
|
+
*
|
|
394
|
+
* Called when coordinator spawns a swarm-researcher to handle unfamiliar technology
|
|
395
|
+
* or gather documentation before decomposition.
|
|
396
|
+
*/
|
|
397
|
+
export declare function captureResearcherSpawned(params: {
|
|
398
|
+
session_id: string;
|
|
399
|
+
epic_id: string;
|
|
400
|
+
researcher_id: string;
|
|
401
|
+
research_topic: string;
|
|
402
|
+
tools_used?: string[];
|
|
403
|
+
}): void;
|
|
404
|
+
/**
|
|
405
|
+
* Capture a skill loaded event
|
|
406
|
+
*
|
|
407
|
+
* Called when coordinator loads domain knowledge via skills_use().
|
|
408
|
+
*/
|
|
409
|
+
export declare function captureSkillLoaded(params: {
|
|
410
|
+
session_id: string;
|
|
411
|
+
epic_id: string;
|
|
412
|
+
skill_name: string;
|
|
413
|
+
context?: string;
|
|
414
|
+
}): void;
|
|
415
|
+
/**
|
|
416
|
+
* Capture an inbox checked event
|
|
417
|
+
*
|
|
418
|
+
* Called when coordinator checks swarmmail inbox for worker messages.
|
|
419
|
+
* Tracks monitoring frequency and responsiveness.
|
|
420
|
+
*/
|
|
421
|
+
export declare function captureInboxChecked(params: {
|
|
422
|
+
session_id: string;
|
|
423
|
+
epic_id: string;
|
|
424
|
+
message_count: number;
|
|
425
|
+
urgent_count: number;
|
|
426
|
+
}): void;
|
|
427
|
+
/**
|
|
428
|
+
* Capture a blocker resolved event
|
|
429
|
+
*
|
|
430
|
+
* Called when coordinator successfully unblocks a worker.
|
|
431
|
+
*/
|
|
432
|
+
export declare function captureBlockerResolved(params: {
|
|
433
|
+
session_id: string;
|
|
434
|
+
epic_id: string;
|
|
435
|
+
worker_id: string;
|
|
436
|
+
subtask_id: string;
|
|
437
|
+
blocker_type: string;
|
|
438
|
+
resolution: string;
|
|
439
|
+
}): void;
|
|
440
|
+
/**
|
|
441
|
+
* Capture a scope change decision event
|
|
442
|
+
*
|
|
443
|
+
* Called when coordinator approves or rejects a worker's scope expansion request.
|
|
444
|
+
*/
|
|
445
|
+
export declare function captureScopeChangeDecision(params: {
|
|
446
|
+
session_id: string;
|
|
447
|
+
epic_id: string;
|
|
448
|
+
worker_id: string;
|
|
449
|
+
subtask_id: string;
|
|
450
|
+
approved: boolean;
|
|
451
|
+
original_scope?: string;
|
|
452
|
+
new_scope?: string;
|
|
453
|
+
requested_scope?: string;
|
|
454
|
+
rejection_reason?: string;
|
|
455
|
+
estimated_time_add?: number;
|
|
456
|
+
}): void;
|
|
457
|
+
/**
|
|
458
|
+
* Capture a blocker detected event
|
|
459
|
+
*
|
|
460
|
+
* Called when a worker reports being blocked (OUTCOME event, not DECISION).
|
|
461
|
+
*/
|
|
462
|
+
export declare function captureBlockerDetected(params: {
|
|
463
|
+
session_id: string;
|
|
464
|
+
epic_id: string;
|
|
465
|
+
worker_id: string;
|
|
466
|
+
subtask_id: string;
|
|
467
|
+
blocker_type: string;
|
|
468
|
+
blocker_description: string;
|
|
469
|
+
}): void;
|
|
297
470
|
/**
|
|
298
471
|
* Read all events from a session file
|
|
299
472
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval-capture.d.ts","sourceRoot":"","sources":["../src/eval-capture.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"eval-capture.d.ts","sourceRoot":"","sources":["../src/eval-capture.ts"],"names":[],"mappings":"AAyBA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;;;;;;iBAmB/B,CAAC;AACH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAElE;;GAEG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBA2D3B,CAAC;AACH,MAAM,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,CAAC;AAE1D;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,OAAO,CAAC,UAAU,CAAC,GAAG;IACpD,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iCAiEjC,CAAC;AACH,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAEtE;;GAEG;AACH,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAMnC,CAAC;AACH,MAAM,MAAM,kBAAkB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAAC,CAAC;AAM1E;;GAEG;AACH,eAAO,MAAM,sBAAsB,8BAA8B,CAAC;AAElE;;GAEG;AACH,wBAAgB,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAM3D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,UAAU,GAAG,iBAAiB,GACrC,IAAI,CAKN;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,UAAU,EAAE,CAajE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAU3E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,WAAW,EAAE,MAAM,EACnB,EAAE,EAAE,MAAM,EACV,OAAO,EAAE,OAAO,CAAC,UAAU,CAAC,GAC3B,OAAO,CAgBT;AAWD;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,YAAY,GAAG,eAAe,GAAG,YAAY,GAAG,MAAM,CAAC;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;QACxB,oBAAoB,CAAC,EAAE,MAAM,CAAC;KAC/B,CAAC,CAAC;CACJ,GAAG,iBAAiB,CAsBpB;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE;IAC5C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,IAAI,CAwBP;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB,GAAG,UAAU,GAAG,IAAI,CAoDpB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;IAClB,QAAQ,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,IAAI,CAMP;AAMD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,KAAK,CAAC;IAC3D,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC1C,QAAQ,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,CAAC;IACF,MAAM,EAAE,UAAU,CAAC;CACpB,CAAC,CAkBD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;CACxB,CAuCA;AAMD;;GAEG;AACH,wBAAgB,aAAa,IAAI,MAAM,CAEtC;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAExD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,IAAI,CAKvC;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,gBAAgB,GAAG,IAAI,CAWrE;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EACX,oBAAoB,GACpB,kBAAkB,GAClB,kBAAkB,GAClB,oBAAoB,GACpB,mBAAmB,CAAC;IACxB,OAAO,EAAE,GAAG,CAAC;CACd,GAAG,IAAI,CAWP;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE;IAC/C,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB,GAAG,IAAI,CAeP;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE;IACzC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,GAAG,IAAI,CAcP;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,IAAI,CAcP;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,IAAI,CAgBP;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CAAC,MAAM,EAAE;IACjD,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,GAAG,IAAI,CAwBP;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,mBAAmB,EAAE,MAAM,CAAC;CAC7B,GAAG,IAAI,CAiBP;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,gBAAgB,EAAE,CAavE;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG,kBAAkB,GAAG,IAAI,CAoB5B"}
|