opencode-swarm-plugin 0.38.0 → 0.39.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +11 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +130 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +475 -0
- package/bin/swarm.ts +383 -0
- package/dist/compaction-hook.d.ts +1 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +81 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +370 -13
- package/dist/plugin.js +203 -13
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +589 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
- package/evals/scorers/coordinator-discipline.ts +13 -13
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +81 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +422 -0
- package/src/eval-capture.ts +94 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/index.ts +61 -1
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/dist/index.js
CHANGED
|
@@ -22198,6 +22198,7 @@ __export(exports_eval_capture, {
|
|
|
22198
22198
|
captureHumanFeedback: () => captureHumanFeedback,
|
|
22199
22199
|
captureDecomposition: () => captureDecomposition,
|
|
22200
22200
|
captureCoordinatorEvent: () => captureCoordinatorEvent,
|
|
22201
|
+
captureCompactionEvent: () => captureCompactionEvent,
|
|
22201
22202
|
appendEvalRecord: () => appendEvalRecord,
|
|
22202
22203
|
SubtaskOutcomeSchema: () => SubtaskOutcomeSchema,
|
|
22203
22204
|
EvalRecordSchema: () => EvalRecordSchema,
|
|
@@ -22406,6 +22407,17 @@ function captureCoordinatorEvent(event) {
|
|
|
22406
22407
|
`;
|
|
22407
22408
|
fs.appendFileSync(sessionPath, line, "utf-8");
|
|
22408
22409
|
}
|
|
22410
|
+
function captureCompactionEvent(params) {
|
|
22411
|
+
const event = {
|
|
22412
|
+
session_id: params.session_id,
|
|
22413
|
+
epic_id: params.epic_id,
|
|
22414
|
+
timestamp: new Date().toISOString(),
|
|
22415
|
+
event_type: "COMPACTION",
|
|
22416
|
+
compaction_type: params.compaction_type,
|
|
22417
|
+
payload: params.payload
|
|
22418
|
+
};
|
|
22419
|
+
captureCoordinatorEvent(event);
|
|
22420
|
+
}
|
|
22409
22421
|
function readSessionEvents(sessionId) {
|
|
22410
22422
|
const sessionPath = getSessionPath(sessionId);
|
|
22411
22423
|
if (!fs.existsSync(sessionPath)) {
|
|
@@ -22517,6 +22529,20 @@ var init_eval_capture = __esm(() => {
|
|
|
22517
22529
|
"epic_complete"
|
|
22518
22530
|
]),
|
|
22519
22531
|
payload: exports_external.any()
|
|
22532
|
+
}),
|
|
22533
|
+
exports_external.object({
|
|
22534
|
+
session_id: exports_external.string(),
|
|
22535
|
+
epic_id: exports_external.string(),
|
|
22536
|
+
timestamp: exports_external.string(),
|
|
22537
|
+
event_type: exports_external.literal("COMPACTION"),
|
|
22538
|
+
compaction_type: exports_external.enum([
|
|
22539
|
+
"detection_complete",
|
|
22540
|
+
"prompt_generated",
|
|
22541
|
+
"context_injected",
|
|
22542
|
+
"resumption_started",
|
|
22543
|
+
"tool_call_tracked"
|
|
22544
|
+
]),
|
|
22545
|
+
payload: exports_external.any()
|
|
22520
22546
|
})
|
|
22521
22547
|
]);
|
|
22522
22548
|
CoordinatorSessionSchema = exports_external.object({
|
|
@@ -42685,7 +42711,7 @@ var swarm_delegate_planning = tool({
|
|
|
42685
42711
|
strategy: tool.schema.enum(["auto", "file-based", "feature-based", "risk-based"]).optional().default("auto").describe("Decomposition strategy (default: auto-detect)"),
|
|
42686
42712
|
query_cass: tool.schema.boolean().optional().default(true).describe("Query CASS for similar past tasks (default: true)")
|
|
42687
42713
|
},
|
|
42688
|
-
async execute(args) {
|
|
42714
|
+
async execute(args, _ctx) {
|
|
42689
42715
|
const { selectStrategy: selectStrategy2, formatStrategyGuidelines: formatStrategyGuidelines2 } = await Promise.resolve().then(() => (init_swarm_strategies(), exports_swarm_strategies));
|
|
42690
42716
|
const { formatMemoryQueryForDecomposition: formatMemoryQueryForDecomposition2 } = await Promise.resolve().then(() => (init_learning(), exports_learning));
|
|
42691
42717
|
const { listSkills: listSkills2, getSkillsContextForSwarm: getSkillsContextForSwarm2, findRelevantSkills: findRelevantSkills2 } = await Promise.resolve().then(() => (init_skills(), exports_skills));
|
|
@@ -42701,7 +42727,7 @@ var swarm_delegate_planning = tool({
|
|
|
42701
42727
|
}
|
|
42702
42728
|
try {
|
|
42703
42729
|
captureCoordinatorEvent({
|
|
42704
|
-
session_id:
|
|
42730
|
+
session_id: _ctx.sessionID || "unknown",
|
|
42705
42731
|
epic_id: "planning",
|
|
42706
42732
|
timestamp: new Date().toISOString(),
|
|
42707
42733
|
event_type: "DECISION",
|
|
@@ -45036,7 +45062,7 @@ var swarm_review_feedback = tool({
|
|
|
45036
45062
|
summary: exports_external.string().optional().describe("Review summary"),
|
|
45037
45063
|
issues: exports_external.string().optional().describe("JSON array of ReviewIssue objects (for needs_changes)")
|
|
45038
45064
|
},
|
|
45039
|
-
async execute(args) {
|
|
45065
|
+
async execute(args, _ctx) {
|
|
45040
45066
|
let parsedIssues = [];
|
|
45041
45067
|
if (args.issues) {
|
|
45042
45068
|
try {
|
|
@@ -45059,7 +45085,7 @@ var swarm_review_feedback = tool({
|
|
|
45059
45085
|
markReviewApproved(args.task_id);
|
|
45060
45086
|
try {
|
|
45061
45087
|
captureCoordinatorEvent({
|
|
45062
|
-
session_id:
|
|
45088
|
+
session_id: _ctx.sessionID || "unknown",
|
|
45063
45089
|
epic_id: epicId,
|
|
45064
45090
|
timestamp: new Date().toISOString(),
|
|
45065
45091
|
event_type: "DECISION",
|
|
@@ -45097,7 +45123,7 @@ You may now complete the task with \`swarm_complete\`.`,
|
|
|
45097
45123
|
const remaining = MAX_REVIEW_ATTEMPTS - attemptNumber;
|
|
45098
45124
|
try {
|
|
45099
45125
|
captureCoordinatorEvent({
|
|
45100
|
-
session_id:
|
|
45126
|
+
session_id: _ctx.sessionID || "unknown",
|
|
45101
45127
|
epic_id: epicId,
|
|
45102
45128
|
timestamp: new Date().toISOString(),
|
|
45103
45129
|
event_type: "DECISION",
|
|
@@ -46069,10 +46095,29 @@ Files touched: ${args.files_touched?.join(", ") || "none recorded"}`,
|
|
|
46069
46095
|
reason: "No files_owned contract found (non-epic subtask or decomposition event missing)"
|
|
46070
46096
|
}
|
|
46071
46097
|
};
|
|
46098
|
+
try {
|
|
46099
|
+
const { captureSubtaskOutcome: captureSubtaskOutcome2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
|
|
46100
|
+
const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
|
|
46101
|
+
const evalEpicId = cell.parent_id || epicId2;
|
|
46102
|
+
captureSubtaskOutcome2({
|
|
46103
|
+
epicId: evalEpicId,
|
|
46104
|
+
projectPath: args.project_key,
|
|
46105
|
+
beadId: args.bead_id,
|
|
46106
|
+
title: cell.title,
|
|
46107
|
+
plannedFiles: args.planned_files || [],
|
|
46108
|
+
actualFiles: args.files_touched || [],
|
|
46109
|
+
durationMs: durationMs2,
|
|
46110
|
+
errorCount: args.error_count || 0,
|
|
46111
|
+
retryCount: args.retry_count || 0,
|
|
46112
|
+
success: true
|
|
46113
|
+
});
|
|
46114
|
+
} catch (error45) {
|
|
46115
|
+
console.warn("[swarm_complete] Failed to capture subtask outcome:", error45);
|
|
46116
|
+
}
|
|
46072
46117
|
try {
|
|
46073
46118
|
const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
|
|
46074
46119
|
captureCoordinatorEvent({
|
|
46075
|
-
session_id:
|
|
46120
|
+
session_id: _ctx.sessionID || "unknown",
|
|
46076
46121
|
epic_id: epicId2,
|
|
46077
46122
|
timestamp: new Date().toISOString(),
|
|
46078
46123
|
event_type: "OUTCOME",
|
|
@@ -46154,7 +46199,7 @@ ${errorStack.slice(0, 1000)}
|
|
|
46154
46199
|
try {
|
|
46155
46200
|
const durationMs = args.start_time ? Date.now() - args.start_time : 0;
|
|
46156
46201
|
captureCoordinatorEvent({
|
|
46157
|
-
session_id:
|
|
46202
|
+
session_id: _ctx.sessionID || "unknown",
|
|
46158
46203
|
epic_id: epicId,
|
|
46159
46204
|
timestamp: new Date().toISOString(),
|
|
46160
46205
|
event_type: "OUTCOME",
|
|
@@ -46221,7 +46266,9 @@ var swarm_record_outcome = tool({
|
|
|
46221
46266
|
"user_cancelled",
|
|
46222
46267
|
"unknown"
|
|
46223
46268
|
]).optional().describe("Failure classification (only when success=false). Auto-classified if not provided."),
|
|
46224
|
-
failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)")
|
|
46269
|
+
failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)"),
|
|
46270
|
+
project_path: tool.schema.string().optional().describe("Project path (for finalizing eval records when all subtasks complete)"),
|
|
46271
|
+
epic_id: tool.schema.string().optional().describe("Epic ID (for finalizing eval records when all subtasks complete)")
|
|
46225
46272
|
},
|
|
46226
46273
|
async execute(args) {
|
|
46227
46274
|
const signals = {
|
|
@@ -46243,6 +46290,18 @@ var swarm_record_outcome = tool({
|
|
|
46243
46290
|
const validated = OutcomeSignalsSchema.parse(signals);
|
|
46244
46291
|
const scored = scoreImplicitFeedback(validated, DEFAULT_LEARNING_CONFIG);
|
|
46245
46292
|
const errorStats = await globalErrorAccumulator.getErrorStats(args.bead_id);
|
|
46293
|
+
let finalizedRecord = null;
|
|
46294
|
+
if (args.project_path && args.epic_id) {
|
|
46295
|
+
try {
|
|
46296
|
+
const { finalizeEvalRecord: finalizeEvalRecord2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
|
|
46297
|
+
finalizedRecord = finalizeEvalRecord2({
|
|
46298
|
+
epicId: args.epic_id,
|
|
46299
|
+
projectPath: args.project_path
|
|
46300
|
+
});
|
|
46301
|
+
} catch (error45) {
|
|
46302
|
+
console.warn("[swarm_record_outcome] Failed to finalize eval record:", error45);
|
|
46303
|
+
}
|
|
46304
|
+
}
|
|
46246
46305
|
const criteriaToScore = args.criteria ?? [
|
|
46247
46306
|
"type_safe",
|
|
46248
46307
|
"no_bugs",
|
|
@@ -46284,6 +46343,7 @@ var swarm_record_outcome = tool({
|
|
|
46284
46343
|
accumulated_errors: errorStats.total,
|
|
46285
46344
|
unresolved_errors: errorStats.unresolved
|
|
46286
46345
|
},
|
|
46346
|
+
finalized_eval_record: finalizedRecord || undefined,
|
|
46287
46347
|
note: "Feedback events should be stored for criterion weight calculation. Use learning.ts functions to apply weights."
|
|
46288
46348
|
}, null, 2);
|
|
46289
46349
|
}
|
|
@@ -46315,12 +46375,31 @@ async function runResearchPhase(task, projectPath, options2) {
|
|
|
46315
46375
|
if (techStack.length === 0) {
|
|
46316
46376
|
return {
|
|
46317
46377
|
tech_stack: [],
|
|
46378
|
+
spawn_instructions: [],
|
|
46318
46379
|
summaries: {},
|
|
46319
46380
|
memory_ids: []
|
|
46320
46381
|
};
|
|
46321
46382
|
}
|
|
46383
|
+
const spawnInstructions = [];
|
|
46384
|
+
for (const tech of techStack) {
|
|
46385
|
+
const researchId = `research-${tech}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
|
|
46386
|
+
const prompt = formatResearcherPrompt({
|
|
46387
|
+
research_id: researchId,
|
|
46388
|
+
epic_id: "standalone-research",
|
|
46389
|
+
tech_stack: [tech],
|
|
46390
|
+
project_path: projectPath,
|
|
46391
|
+
check_upgrades: options2?.checkUpgrades ?? false
|
|
46392
|
+
});
|
|
46393
|
+
spawnInstructions.push({
|
|
46394
|
+
research_id: researchId,
|
|
46395
|
+
tech,
|
|
46396
|
+
prompt,
|
|
46397
|
+
subagent_type: "swarm/researcher"
|
|
46398
|
+
});
|
|
46399
|
+
}
|
|
46322
46400
|
return {
|
|
46323
46401
|
tech_stack: techStack,
|
|
46402
|
+
spawn_instructions: spawnInstructions,
|
|
46324
46403
|
summaries: {},
|
|
46325
46404
|
memory_ids: []
|
|
46326
46405
|
};
|
|
@@ -47554,7 +47633,7 @@ var swarm_spawn_subtask = tool({
|
|
|
47554
47633
|
}).optional().describe("Recovery context from checkpoint compaction"),
|
|
47555
47634
|
model: tool.schema.string().optional().describe("Optional explicit model override (auto-selected if not provided)")
|
|
47556
47635
|
},
|
|
47557
|
-
async execute(args) {
|
|
47636
|
+
async execute(args, _ctx) {
|
|
47558
47637
|
const prompt = formatSubtaskPromptV2({
|
|
47559
47638
|
bead_id: args.bead_id,
|
|
47560
47639
|
epic_id: args.epic_id,
|
|
@@ -47583,7 +47662,7 @@ var swarm_spawn_subtask = tool({
|
|
|
47583
47662
|
const postCompletionInstructions = COORDINATOR_POST_WORKER_CHECKLIST.replace(/{project_key}/g, args.project_path || "$PWD").replace(/{epic_id}/g, args.epic_id).replace(/{task_id}/g, args.bead_id).replace(/{files_touched}/g, filesJoined).replace(/{worker_id}/g, "worker");
|
|
47584
47663
|
try {
|
|
47585
47664
|
captureCoordinatorEvent({
|
|
47586
|
-
session_id:
|
|
47665
|
+
session_id: _ctx.sessionID || "unknown",
|
|
47587
47666
|
epic_id: args.epic_id,
|
|
47588
47667
|
timestamp: new Date().toISOString(),
|
|
47589
47668
|
event_type: "DECISION",
|
|
@@ -63790,9 +63869,21 @@ function getLog() {
|
|
|
63790
63869
|
}
|
|
63791
63870
|
return _logger;
|
|
63792
63871
|
}
|
|
63793
|
-
var SWARM_COMPACTION_CONTEXT =
|
|
63872
|
+
var SWARM_COMPACTION_CONTEXT = `
|
|
63873
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
63874
|
+
│ │
|
|
63875
|
+
│ \uD83D\uDC1D YOU ARE THE COORDINATOR \uD83D\uDC1D │
|
|
63876
|
+
│ │
|
|
63877
|
+
│ NOT A WORKER. NOT AN IMPLEMENTER. │
|
|
63878
|
+
│ YOU ORCHESTRATE. │
|
|
63879
|
+
│ │
|
|
63880
|
+
└─────────────────────────────────────────────────────────────┘
|
|
63881
|
+
|
|
63882
|
+
## \uD83C\uDFAF NON-NEGOTIABLE: YOU ARE THE COORDINATOR
|
|
63794
63883
|
|
|
63795
|
-
Context was compacted but the swarm is still running.
|
|
63884
|
+
Context was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**
|
|
63885
|
+
|
|
63886
|
+
Your role is ORCHESTRATION, not implementation. When you catch yourself about to do work directly, STOP.
|
|
63796
63887
|
|
|
63797
63888
|
### ⛔ NEVER DO THESE (Coordinator Anti-Patterns)
|
|
63798
63889
|
|
|
@@ -63803,9 +63894,27 @@ Context was compacted but the swarm is still running. You are the **COORDINATOR*
|
|
|
63803
63894
|
- ❌ **NEVER** implement features yourself - SPAWN A WORKER
|
|
63804
63895
|
- ❌ **NEVER** "just do it myself to save time" - NO. SPAWN A WORKER.
|
|
63805
63896
|
- ❌ **NEVER** reserve files with \`swarmmail_reserve\` - Workers reserve files
|
|
63897
|
+
- ❌ **NEVER** fetch files/docs directly - SPAWN A RESEARCHER
|
|
63806
63898
|
|
|
63807
63899
|
**If you catch yourself about to edit a file, STOP. Use \`swarm_spawn_subtask\` instead.**
|
|
63808
63900
|
|
|
63901
|
+
### \uD83D\uDEAB FORBIDDEN TOOLS (Coordinators MUST delegate these)
|
|
63902
|
+
|
|
63903
|
+
**NEVER use these tools directly. ALWAYS spawn a researcher worker via \`swarm_spawn_researcher\`:**
|
|
63904
|
+
|
|
63905
|
+
**Repository fetching:**
|
|
63906
|
+
- \`repo-crawl_file\`, \`repo-crawl_readme\`, \`repo-crawl_search\`, \`repo-crawl_structure\`, \`repo-crawl_tree\`
|
|
63907
|
+
- \`repo-autopsy_*\` (all repo-autopsy tools)
|
|
63908
|
+
|
|
63909
|
+
**Web/documentation fetching:**
|
|
63910
|
+
- \`webfetch\`, \`fetch_fetch\`
|
|
63911
|
+
- \`context7_resolve-library-id\`, \`context7_get-library-docs\`
|
|
63912
|
+
|
|
63913
|
+
**Knowledge base:**
|
|
63914
|
+
- \`pdf-brain_search\`, \`pdf-brain_read\`
|
|
63915
|
+
|
|
63916
|
+
**If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
|
|
63917
|
+
|
|
63809
63918
|
### ✅ ALWAYS DO THESE (Coordinator Checklist)
|
|
63810
63919
|
|
|
63811
63920
|
On resume, execute this checklist IN ORDER:
|
|
@@ -63855,6 +63964,87 @@ Extract from session context:
|
|
|
63855
63964
|
- **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
|
|
63856
63965
|
- **Close the loop** - When all subtasks done, verify and close the epic
|
|
63857
63966
|
|
|
63967
|
+
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
63968
|
+
|
|
63969
|
+
---
|
|
63970
|
+
|
|
63971
|
+
## \uD83D\uDCCB FULL COORDINATOR WORKFLOW (Reference)
|
|
63972
|
+
|
|
63973
|
+
You are ALWAYS swarming. Here is the complete workflow for any new work:
|
|
63974
|
+
|
|
63975
|
+
### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
|
|
63976
|
+
|
|
63977
|
+
**If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
|
|
63978
|
+
|
|
63979
|
+
\`\`\`
|
|
63980
|
+
swarm_spawn_researcher(
|
|
63981
|
+
research_id="research-<topic>",
|
|
63982
|
+
epic_id="<epic-id>",
|
|
63983
|
+
tech_stack=["<technology>"],
|
|
63984
|
+
project_path="<path>"
|
|
63985
|
+
)
|
|
63986
|
+
// Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
|
|
63987
|
+
\`\`\`
|
|
63988
|
+
|
|
63989
|
+
### Phase 2: Knowledge Gathering
|
|
63990
|
+
|
|
63991
|
+
\`\`\`
|
|
63992
|
+
semantic-memory_find(query="<task keywords>", limit=5) # Past learnings
|
|
63993
|
+
cass_search(query="<task description>", limit=5) # Similar past tasks
|
|
63994
|
+
skills_list() # Available skills
|
|
63995
|
+
\`\`\`
|
|
63996
|
+
|
|
63997
|
+
### Phase 3: Decompose
|
|
63998
|
+
|
|
63999
|
+
\`\`\`
|
|
64000
|
+
swarm_select_strategy(task="<task>")
|
|
64001
|
+
swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
|
|
64002
|
+
swarm_validate_decomposition(response="<CellTree JSON>")
|
|
64003
|
+
\`\`\`
|
|
64004
|
+
|
|
64005
|
+
### Phase 4: Create Cells
|
|
64006
|
+
|
|
64007
|
+
\`hive_create_epic(epic_title="<task>", subtasks=[...])\`
|
|
64008
|
+
|
|
64009
|
+
### Phase 5: DO NOT Reserve Files
|
|
64010
|
+
|
|
64011
|
+
> **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
|
|
64012
|
+
|
|
64013
|
+
### Phase 6: Spawn Workers
|
|
64014
|
+
|
|
64015
|
+
\`\`\`
|
|
64016
|
+
swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
|
|
64017
|
+
Task(subagent_type="swarm/worker", prompt="<from above>")
|
|
64018
|
+
\`\`\`
|
|
64019
|
+
|
|
64020
|
+
### Phase 7: MANDATORY Review Loop
|
|
64021
|
+
|
|
64022
|
+
**AFTER EVERY Task() RETURNS:**
|
|
64023
|
+
|
|
64024
|
+
1. \`swarmmail_inbox()\` - Check for messages
|
|
64025
|
+
2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
|
|
64026
|
+
3. Evaluate against epic goals
|
|
64027
|
+
4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
|
|
64028
|
+
|
|
64029
|
+
**If needs_changes:**
|
|
64030
|
+
\`\`\`
|
|
64031
|
+
swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
|
|
64032
|
+
// Spawn NEW worker with Task() using retry prompt
|
|
64033
|
+
// Max 3 attempts before marking task blocked
|
|
64034
|
+
\`\`\`
|
|
64035
|
+
|
|
64036
|
+
### Phase 8: Complete
|
|
64037
|
+
|
|
64038
|
+
\`hive_sync()\` - Sync all cells to git
|
|
64039
|
+
|
|
64040
|
+
## Strategy Reference
|
|
64041
|
+
|
|
64042
|
+
| Strategy | Best For | Keywords |
|
|
64043
|
+
| -------------- | ------------------------ | -------------------------------------- |
|
|
64044
|
+
| file-based | Refactoring, migrations | refactor, migrate, rename, update all |
|
|
64045
|
+
| feature-based | New features | add, implement, build, create, feature |
|
|
64046
|
+
| risk-based | Bug fixes, security | fix, bug, security, critical, urgent |
|
|
64047
|
+
|
|
63858
64048
|
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
63859
64049
|
`;
|
|
63860
64050
|
var SWARM_DETECTION_FALLBACK = `## \uD83D\uDC1D Swarm Detection - Check Your Context
|
|
@@ -64792,6 +64982,161 @@ async function resetStorage() {
|
|
|
64792
64982
|
|
|
64793
64983
|
// src/index.ts
|
|
64794
64984
|
init_skills();
|
|
64985
|
+
|
|
64986
|
+
// src/eval-history.ts
|
|
64987
|
+
import * as fs2 from "node:fs";
|
|
64988
|
+
import * as path3 from "node:path";
|
|
64989
|
+
var DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
|
|
64990
|
+
var VARIANCE_THRESHOLD = 0.1;
|
|
64991
|
+
var BOOTSTRAP_THRESHOLD = 10;
|
|
64992
|
+
var STABILIZATION_THRESHOLD = 50;
|
|
64993
|
+
function getEvalHistoryPath(projectPath) {
|
|
64994
|
+
return path3.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
|
|
64995
|
+
}
|
|
64996
|
+
function ensureEvalHistoryDir(projectPath) {
|
|
64997
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
64998
|
+
const dir = path3.dirname(historyPath);
|
|
64999
|
+
if (!fs2.existsSync(dir)) {
|
|
65000
|
+
fs2.mkdirSync(dir, { recursive: true });
|
|
65001
|
+
}
|
|
65002
|
+
}
|
|
65003
|
+
function recordEvalRun(projectPath, run) {
|
|
65004
|
+
ensureEvalHistoryDir(projectPath);
|
|
65005
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
65006
|
+
const line = `${JSON.stringify(run)}
|
|
65007
|
+
`;
|
|
65008
|
+
fs2.appendFileSync(historyPath, line, "utf-8");
|
|
65009
|
+
}
|
|
65010
|
+
function readAllRecords(projectPath) {
|
|
65011
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
65012
|
+
if (!fs2.existsSync(historyPath)) {
|
|
65013
|
+
return [];
|
|
65014
|
+
}
|
|
65015
|
+
const content = fs2.readFileSync(historyPath, "utf-8");
|
|
65016
|
+
const lines = content.trim().split(`
|
|
65017
|
+
`).filter(Boolean);
|
|
65018
|
+
return lines.map((line) => JSON.parse(line));
|
|
65019
|
+
}
|
|
65020
|
+
function getScoreHistory(projectPath, evalName) {
|
|
65021
|
+
return readAllRecords(projectPath).filter((run) => run.eval_name === evalName);
|
|
65022
|
+
}
|
|
65023
|
+
function calculateVariance(scores) {
|
|
65024
|
+
if (scores.length <= 1) {
|
|
65025
|
+
return 0;
|
|
65026
|
+
}
|
|
65027
|
+
const mean = scores.reduce((sum2, score) => sum2 + score, 0) / scores.length;
|
|
65028
|
+
const variance5 = scores.reduce((sum2, score) => {
|
|
65029
|
+
const deviation = score - mean;
|
|
65030
|
+
return sum2 + deviation * deviation;
|
|
65031
|
+
}, 0) / scores.length;
|
|
65032
|
+
return variance5;
|
|
65033
|
+
}
|
|
65034
|
+
function getPhase(projectPath, evalName) {
|
|
65035
|
+
const history = getScoreHistory(projectPath, evalName);
|
|
65036
|
+
if (history.length < BOOTSTRAP_THRESHOLD) {
|
|
65037
|
+
return "bootstrap";
|
|
65038
|
+
}
|
|
65039
|
+
if (history.length <= STABILIZATION_THRESHOLD) {
|
|
65040
|
+
return "stabilization";
|
|
65041
|
+
}
|
|
65042
|
+
const scores = history.map((run) => run.score);
|
|
65043
|
+
const variance5 = calculateVariance(scores);
|
|
65044
|
+
if (variance5 < VARIANCE_THRESHOLD) {
|
|
65045
|
+
return "production";
|
|
65046
|
+
}
|
|
65047
|
+
return "stabilization";
|
|
65048
|
+
}
|
|
65049
|
+
// src/eval-gates.ts
|
|
65050
|
+
var DEFAULT_THRESHOLDS = {
|
|
65051
|
+
stabilization: 0.1,
|
|
65052
|
+
production: 0.05
|
|
65053
|
+
};
|
|
65054
|
+
function calculateBaseline(history, currentScore) {
|
|
65055
|
+
if (history.length === 0) {
|
|
65056
|
+
return currentScore;
|
|
65057
|
+
}
|
|
65058
|
+
return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
|
|
65059
|
+
}
|
|
65060
|
+
function calculateRegression(baseline, currentScore) {
|
|
65061
|
+
if (baseline === 0) {
|
|
65062
|
+
return 0;
|
|
65063
|
+
}
|
|
65064
|
+
return (baseline - currentScore) / baseline;
|
|
65065
|
+
}
|
|
65066
|
+
function formatRegressionMessage(regressionPercent, baseline, currentScore) {
|
|
65067
|
+
return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
|
|
65068
|
+
}
|
|
65069
|
+
function checkGate(projectPath, evalName, currentScore, config2) {
|
|
65070
|
+
const thresholds = {
|
|
65071
|
+
stabilization: config2?.stabilizationThreshold ?? DEFAULT_THRESHOLDS.stabilization,
|
|
65072
|
+
production: config2?.productionThreshold ?? DEFAULT_THRESHOLDS.production
|
|
65073
|
+
};
|
|
65074
|
+
const phase = getPhase(projectPath, evalName);
|
|
65075
|
+
const history = getScoreHistory(projectPath, evalName);
|
|
65076
|
+
if (phase === "bootstrap") {
|
|
65077
|
+
return {
|
|
65078
|
+
passed: true,
|
|
65079
|
+
phase: "bootstrap",
|
|
65080
|
+
message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
|
|
65081
|
+
currentScore
|
|
65082
|
+
};
|
|
65083
|
+
}
|
|
65084
|
+
const baseline = calculateBaseline(history, currentScore);
|
|
65085
|
+
const regressionPercent = calculateRegression(baseline, currentScore);
|
|
65086
|
+
const regressionMsg = formatRegressionMessage(regressionPercent, baseline, currentScore);
|
|
65087
|
+
if (phase === "stabilization") {
|
|
65088
|
+
if (regressionPercent > thresholds.stabilization) {
|
|
65089
|
+
return {
|
|
65090
|
+
passed: true,
|
|
65091
|
+
phase: "stabilization",
|
|
65092
|
+
message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
|
|
65093
|
+
baseline,
|
|
65094
|
+
currentScore,
|
|
65095
|
+
regressionPercent
|
|
65096
|
+
};
|
|
65097
|
+
}
|
|
65098
|
+
if (history.length > 50) {
|
|
65099
|
+
const scores = history.map((run) => run.score);
|
|
65100
|
+
const variance5 = calculateVariance(scores);
|
|
65101
|
+
return {
|
|
65102
|
+
passed: true,
|
|
65103
|
+
phase: "stabilization",
|
|
65104
|
+
message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance5.toFixed(3)}) prevents production phase.`,
|
|
65105
|
+
baseline,
|
|
65106
|
+
currentScore,
|
|
65107
|
+
regressionPercent
|
|
65108
|
+
};
|
|
65109
|
+
}
|
|
65110
|
+
return {
|
|
65111
|
+
passed: true,
|
|
65112
|
+
phase: "stabilization",
|
|
65113
|
+
message: `Stabilization phase: ${regressionMsg} - acceptable`,
|
|
65114
|
+
baseline,
|
|
65115
|
+
currentScore,
|
|
65116
|
+
regressionPercent
|
|
65117
|
+
};
|
|
65118
|
+
}
|
|
65119
|
+
if (regressionPercent > thresholds.production) {
|
|
65120
|
+
return {
|
|
65121
|
+
passed: false,
|
|
65122
|
+
phase: "production",
|
|
65123
|
+
message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
|
|
65124
|
+
baseline,
|
|
65125
|
+
currentScore,
|
|
65126
|
+
regressionPercent
|
|
65127
|
+
};
|
|
65128
|
+
}
|
|
65129
|
+
return {
|
|
65130
|
+
passed: true,
|
|
65131
|
+
phase: "production",
|
|
65132
|
+
message: `Production phase: ${regressionMsg} - acceptable`,
|
|
65133
|
+
baseline,
|
|
65134
|
+
currentScore,
|
|
65135
|
+
regressionPercent
|
|
65136
|
+
};
|
|
65137
|
+
}
|
|
65138
|
+
|
|
65139
|
+
// src/index.ts
|
|
64795
65140
|
var SwarmPlugin = async (input) => {
|
|
64796
65141
|
const { $, directory, client } = input;
|
|
64797
65142
|
setHiveWorkingDirectory(directory);
|
|
@@ -64858,7 +65203,7 @@ var SwarmPlugin = async (input) => {
|
|
|
64858
65203
|
if (isInCoordinatorContext()) {
|
|
64859
65204
|
const ctx = getCoordinatorContext();
|
|
64860
65205
|
const violation = detectCoordinatorViolation({
|
|
64861
|
-
sessionId:
|
|
65206
|
+
sessionId: input2.sessionID || "unknown",
|
|
64862
65207
|
epicId: ctx.epicId || "unknown",
|
|
64863
65208
|
toolName,
|
|
64864
65209
|
toolArgs: output.args,
|
|
@@ -64972,6 +65317,7 @@ export {
|
|
|
64972
65317
|
researchTools,
|
|
64973
65318
|
requireTool,
|
|
64974
65319
|
repoCrawlTools,
|
|
65320
|
+
recordEvalRun,
|
|
64975
65321
|
parseFrontmatter,
|
|
64976
65322
|
migrateBeadsToHive,
|
|
64977
65323
|
mergeHistoricBeads,
|
|
@@ -65012,12 +65358,15 @@ export {
|
|
|
65012
65358
|
getStatusChanges,
|
|
65013
65359
|
getSkillsContextForSwarm,
|
|
65014
65360
|
getSkill,
|
|
65361
|
+
getScoreHistory,
|
|
65015
65362
|
getSchemaByName,
|
|
65363
|
+
getPhase,
|
|
65016
65364
|
getMandateStorage,
|
|
65017
65365
|
getLogger,
|
|
65018
65366
|
getInstalledVersions,
|
|
65019
65367
|
getHiveWorkingDirectory,
|
|
65020
65368
|
getHiveAdapter,
|
|
65369
|
+
getEvalHistoryPath,
|
|
65021
65370
|
getCellIdFromEvent,
|
|
65022
65371
|
getBeadsWorkingDirectory,
|
|
65023
65372
|
getBeadsAdapter,
|
|
@@ -65035,6 +65384,7 @@ export {
|
|
|
65035
65384
|
evaluatePromotion,
|
|
65036
65385
|
evaluateBatchPromotions,
|
|
65037
65386
|
ensureHiveDirectory,
|
|
65387
|
+
ensureEvalHistoryDir,
|
|
65038
65388
|
discoverSkills,
|
|
65039
65389
|
discoverDocTools,
|
|
65040
65390
|
src_default as default,
|
|
@@ -65050,8 +65400,10 @@ export {
|
|
|
65050
65400
|
createAgentMailError,
|
|
65051
65401
|
clearSessionState,
|
|
65052
65402
|
checkTool,
|
|
65403
|
+
checkGate,
|
|
65053
65404
|
checkBeadsMigrationNeeded,
|
|
65054
65405
|
checkAllTools,
|
|
65406
|
+
calculateVariance,
|
|
65055
65407
|
beads_update,
|
|
65056
65408
|
beads_sync,
|
|
65057
65409
|
beads_start,
|
|
@@ -65073,6 +65425,7 @@ export {
|
|
|
65073
65425
|
VoteTypeSchema,
|
|
65074
65426
|
VoteSchema,
|
|
65075
65427
|
ValidationResultSchema,
|
|
65428
|
+
VARIANCE_THRESHOLD,
|
|
65076
65429
|
UpdateSwarmContextArgsSchema,
|
|
65077
65430
|
TaskDecompositionSchema,
|
|
65078
65431
|
SwarmStrategySchema,
|
|
@@ -65092,6 +65445,7 @@ export {
|
|
|
65092
65445
|
SWARM_COMPACTION_CONTEXT,
|
|
65093
65446
|
SUBTASK_PROMPT_V2,
|
|
65094
65447
|
STRATEGIES,
|
|
65448
|
+
STABILIZATION_THRESHOLD,
|
|
65095
65449
|
RepoCrawlError,
|
|
65096
65450
|
QuerySwarmContextsArgsSchema,
|
|
65097
65451
|
QueryMandatesArgsSchema,
|
|
@@ -65114,10 +65468,12 @@ export {
|
|
|
65114
65468
|
DecompositionError,
|
|
65115
65469
|
DecomposedSubtaskSchema,
|
|
65116
65470
|
DecomposeArgsSchema,
|
|
65471
|
+
DEFAULT_THRESHOLDS,
|
|
65117
65472
|
DEFAULT_STORAGE_CONFIG,
|
|
65118
65473
|
DEFAULT_MANDATE_STORAGE_CONFIG,
|
|
65119
65474
|
DEFAULT_MANDATE_DECAY_CONFIG,
|
|
65120
65475
|
DEFAULT_GUARDRAIL_CONFIG,
|
|
65476
|
+
DEFAULT_EVAL_HISTORY_PATH,
|
|
65121
65477
|
DEFAULT_CRITERIA,
|
|
65122
65478
|
CriterionEvaluationSchema,
|
|
65123
65479
|
CreateSwarmContextArgsSchema,
|
|
@@ -65185,6 +65541,7 @@ export {
|
|
|
65185
65541
|
BeadAssignedEventSchema,
|
|
65186
65542
|
BaseCellEventSchema,
|
|
65187
65543
|
BaseBeadEventSchema,
|
|
65544
|
+
BOOTSTRAP_THRESHOLD,
|
|
65188
65545
|
AgentProgressSchema,
|
|
65189
65546
|
AgentMailNotInitializedError,
|
|
65190
65547
|
AgentMailError
|