opencode-swarm-plugin 0.37.0 → 0.39.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +20 -5
- package/.hive/memories.jsonl +35 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/.turbo/turbo-build.log +4 -4
- package/.turbo/turbo-test.log +319 -319
- package/CHANGELOG.md +258 -0
- package/README.md +50 -0
- package/bin/swarm.test.ts +475 -0
- package/bin/swarm.ts +385 -208
- package/dist/compaction-hook.d.ts +1 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +81 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/hive.d.ts +59 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +87 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +823 -131
- package/dist/plugin.js +655 -131
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/swarm-decompose.d.ts +30 -0
- package/dist/swarm-decompose.d.ts.map +1 -1
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +19 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +595 -94
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
- package/evals/scorers/coordinator-discipline.ts +13 -13
- package/examples/plugin-wrapper-template.ts +177 -8
- package/package.json +7 -2
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +139 -2
- package/src/compaction-hook.ts +113 -2
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +422 -0
- package/src/eval-capture.ts +94 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/index.ts +61 -1
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.test.ts +40 -47
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.test.ts +270 -7
- package/src/swarm-orchestrate.ts +100 -13
- package/src/swarm-prompts.test.ts +121 -0
- package/src/swarm-prompts.ts +297 -4
- package/src/swarm-research.integration.test.ts +157 -0
- package/src/swarm-review.ts +3 -3
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/dist/index.js
CHANGED
|
@@ -22178,6 +22178,383 @@ Codebase context considered: ${args.codebase_context.slice(0, 200)}...`;
|
|
|
22178
22178
|
};
|
|
22179
22179
|
});
|
|
22180
22180
|
|
|
22181
|
+
// src/eval-capture.ts
|
|
22182
|
+
var exports_eval_capture = {};
|
|
22183
|
+
__export(exports_eval_capture, {
|
|
22184
|
+
updateEvalRecord: () => updateEvalRecord,
|
|
22185
|
+
saveSession: () => saveSession,
|
|
22186
|
+
readSessionEvents: () => readSessionEvents,
|
|
22187
|
+
readPartialRecords: () => readPartialRecords,
|
|
22188
|
+
readEvalRecords: () => readEvalRecords,
|
|
22189
|
+
getSessionPath: () => getSessionPath,
|
|
22190
|
+
getSessionDir: () => getSessionDir,
|
|
22191
|
+
getEvalDataStats: () => getEvalDataStats,
|
|
22192
|
+
getEvalDataPath: () => getEvalDataPath,
|
|
22193
|
+
finalizeEvalRecord: () => finalizeEvalRecord,
|
|
22194
|
+
exportForEvalite: () => exportForEvalite,
|
|
22195
|
+
ensureSessionDir: () => ensureSessionDir,
|
|
22196
|
+
ensureEvalDataDir: () => ensureEvalDataDir,
|
|
22197
|
+
captureSubtaskOutcome: () => captureSubtaskOutcome,
|
|
22198
|
+
captureHumanFeedback: () => captureHumanFeedback,
|
|
22199
|
+
captureDecomposition: () => captureDecomposition,
|
|
22200
|
+
captureCoordinatorEvent: () => captureCoordinatorEvent,
|
|
22201
|
+
captureCompactionEvent: () => captureCompactionEvent,
|
|
22202
|
+
appendEvalRecord: () => appendEvalRecord,
|
|
22203
|
+
SubtaskOutcomeSchema: () => SubtaskOutcomeSchema,
|
|
22204
|
+
EvalRecordSchema: () => EvalRecordSchema,
|
|
22205
|
+
DEFAULT_EVAL_DATA_PATH: () => DEFAULT_EVAL_DATA_PATH,
|
|
22206
|
+
CoordinatorSessionSchema: () => CoordinatorSessionSchema,
|
|
22207
|
+
CoordinatorEventSchema: () => CoordinatorEventSchema
|
|
22208
|
+
});
|
|
22209
|
+
import * as fs from "node:fs";
|
|
22210
|
+
import * as os from "node:os";
|
|
22211
|
+
import * as path from "node:path";
|
|
22212
|
+
function getEvalDataPath(projectPath) {
|
|
22213
|
+
return path.join(projectPath, DEFAULT_EVAL_DATA_PATH);
|
|
22214
|
+
}
|
|
22215
|
+
function ensureEvalDataDir(projectPath) {
|
|
22216
|
+
const evalPath = getEvalDataPath(projectPath);
|
|
22217
|
+
const dir = path.dirname(evalPath);
|
|
22218
|
+
if (!fs.existsSync(dir)) {
|
|
22219
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
22220
|
+
}
|
|
22221
|
+
}
|
|
22222
|
+
function appendEvalRecord(projectPath, record2) {
|
|
22223
|
+
ensureEvalDataDir(projectPath);
|
|
22224
|
+
const evalPath = getEvalDataPath(projectPath);
|
|
22225
|
+
const line = `${JSON.stringify(record2)}
|
|
22226
|
+
`;
|
|
22227
|
+
fs.appendFileSync(evalPath, line, "utf-8");
|
|
22228
|
+
}
|
|
22229
|
+
function readEvalRecords(projectPath) {
|
|
22230
|
+
const evalPath = getEvalDataPath(projectPath);
|
|
22231
|
+
if (!fs.existsSync(evalPath)) {
|
|
22232
|
+
return [];
|
|
22233
|
+
}
|
|
22234
|
+
const content = fs.readFileSync(evalPath, "utf-8");
|
|
22235
|
+
const lines = content.trim().split(`
|
|
22236
|
+
`).filter(Boolean);
|
|
22237
|
+
return lines.map((line) => {
|
|
22238
|
+
const parsed = JSON.parse(line);
|
|
22239
|
+
return EvalRecordSchema.parse(parsed);
|
|
22240
|
+
});
|
|
22241
|
+
}
|
|
22242
|
+
function readPartialRecords(projectPath) {
|
|
22243
|
+
const evalPath = getEvalDataPath(projectPath);
|
|
22244
|
+
if (!fs.existsSync(evalPath)) {
|
|
22245
|
+
return [];
|
|
22246
|
+
}
|
|
22247
|
+
const content = fs.readFileSync(evalPath, "utf-8");
|
|
22248
|
+
const lines = content.trim().split(`
|
|
22249
|
+
`).filter(Boolean);
|
|
22250
|
+
return lines.map((line) => JSON.parse(line));
|
|
22251
|
+
}
|
|
22252
|
+
function updateEvalRecord(projectPath, id, updates) {
|
|
22253
|
+
const records = readPartialRecords(projectPath);
|
|
22254
|
+
const index = records.findIndex((r) => r.id === id);
|
|
22255
|
+
if (index === -1) {
|
|
22256
|
+
return false;
|
|
22257
|
+
}
|
|
22258
|
+
records[index] = { ...records[index], ...updates };
|
|
22259
|
+
const evalPath = getEvalDataPath(projectPath);
|
|
22260
|
+
const content = `${records.map((r) => JSON.stringify(r)).join(`
|
|
22261
|
+
`)}
|
|
22262
|
+
`;
|
|
22263
|
+
fs.writeFileSync(evalPath, content, "utf-8");
|
|
22264
|
+
return true;
|
|
22265
|
+
}
|
|
22266
|
+
function captureDecomposition(params) {
|
|
22267
|
+
const record2 = {
|
|
22268
|
+
id: params.epicId,
|
|
22269
|
+
timestamp: new Date().toISOString(),
|
|
22270
|
+
project_path: params.projectPath,
|
|
22271
|
+
task: params.task,
|
|
22272
|
+
context: params.context,
|
|
22273
|
+
strategy: params.strategy,
|
|
22274
|
+
subtask_count: params.subtasks.length,
|
|
22275
|
+
epic_title: params.epicTitle,
|
|
22276
|
+
epic_description: params.epicDescription,
|
|
22277
|
+
subtasks: params.subtasks,
|
|
22278
|
+
outcomes: []
|
|
22279
|
+
};
|
|
22280
|
+
inProgressRecords.set(params.epicId, record2);
|
|
22281
|
+
appendEvalRecord(params.projectPath, record2);
|
|
22282
|
+
return record2;
|
|
22283
|
+
}
|
|
22284
|
+
function captureSubtaskOutcome(params) {
|
|
22285
|
+
const outcome = {
|
|
22286
|
+
bead_id: params.beadId,
|
|
22287
|
+
title: params.title,
|
|
22288
|
+
planned_files: params.plannedFiles,
|
|
22289
|
+
actual_files: params.actualFiles,
|
|
22290
|
+
duration_ms: params.durationMs,
|
|
22291
|
+
error_count: params.errorCount,
|
|
22292
|
+
retry_count: params.retryCount,
|
|
22293
|
+
success: params.success,
|
|
22294
|
+
failure_mode: params.failureMode
|
|
22295
|
+
};
|
|
22296
|
+
const record2 = inProgressRecords.get(params.epicId);
|
|
22297
|
+
if (record2) {
|
|
22298
|
+
record2.outcomes = record2.outcomes || [];
|
|
22299
|
+
record2.outcomes.push(outcome);
|
|
22300
|
+
}
|
|
22301
|
+
updateEvalRecord(params.projectPath, params.epicId, {
|
|
22302
|
+
outcomes: record2?.outcomes
|
|
22303
|
+
});
|
|
22304
|
+
}
|
|
22305
|
+
function finalizeEvalRecord(params) {
|
|
22306
|
+
const record2 = inProgressRecords.get(params.epicId);
|
|
22307
|
+
if (!record2 || !record2.outcomes || record2.outcomes.length === 0) {
|
|
22308
|
+
return null;
|
|
22309
|
+
}
|
|
22310
|
+
const outcomes = record2.outcomes;
|
|
22311
|
+
const overallSuccess = outcomes.every((o) => o.success);
|
|
22312
|
+
const totalDurationMs = outcomes.reduce((sum, o) => sum + o.duration_ms, 0);
|
|
22313
|
+
const totalErrors = outcomes.reduce((sum, o) => sum + o.error_count, 0);
|
|
22314
|
+
const allPlannedFiles = record2.subtasks?.flatMap((s) => s.files) || [];
|
|
22315
|
+
const fileOccurrences = new Map;
|
|
22316
|
+
for (const file2 of allPlannedFiles) {
|
|
22317
|
+
fileOccurrences.set(file2, (fileOccurrences.get(file2) || 0) + 1);
|
|
22318
|
+
}
|
|
22319
|
+
const fileOverlapCount = Array.from(fileOccurrences.values()).filter((count) => count > 1).length;
|
|
22320
|
+
const plannedFileSet = new Set(allPlannedFiles);
|
|
22321
|
+
const actualFileSet = new Set(outcomes.flatMap((o) => o.actual_files));
|
|
22322
|
+
const scopeAccuracy = plannedFileSet.size > 0 ? actualFileSet.size / plannedFileSet.size : 1;
|
|
22323
|
+
const durations = outcomes.map((o) => o.duration_ms).filter((d) => d > 0);
|
|
22324
|
+
const timeBalanceRatio = durations.length > 1 ? Math.max(...durations) / Math.min(...durations) : 1;
|
|
22325
|
+
const finalRecord = {
|
|
22326
|
+
...record2,
|
|
22327
|
+
overall_success: overallSuccess,
|
|
22328
|
+
total_duration_ms: totalDurationMs,
|
|
22329
|
+
total_errors: totalErrors,
|
|
22330
|
+
file_overlap_count: fileOverlapCount,
|
|
22331
|
+
scope_accuracy: scopeAccuracy,
|
|
22332
|
+
time_balance_ratio: timeBalanceRatio
|
|
22333
|
+
};
|
|
22334
|
+
updateEvalRecord(params.projectPath, params.epicId, finalRecord);
|
|
22335
|
+
inProgressRecords.delete(params.epicId);
|
|
22336
|
+
return finalRecord;
|
|
22337
|
+
}
|
|
22338
|
+
function captureHumanFeedback(params) {
|
|
22339
|
+
updateEvalRecord(params.projectPath, params.epicId, {
|
|
22340
|
+
human_accepted: params.accepted,
|
|
22341
|
+
human_modified: params.modified,
|
|
22342
|
+
human_notes: params.notes
|
|
22343
|
+
});
|
|
22344
|
+
}
|
|
22345
|
+
function exportForEvalite(projectPath) {
|
|
22346
|
+
const records = readEvalRecords(projectPath);
|
|
22347
|
+
return records.filter((r) => r.outcomes && r.outcomes.length > 0).map((record2) => ({
|
|
22348
|
+
input: {
|
|
22349
|
+
task: record2.task,
|
|
22350
|
+
context: record2.context
|
|
22351
|
+
},
|
|
22352
|
+
expected: {
|
|
22353
|
+
minSubtasks: 2,
|
|
22354
|
+
subtaskCount: record2.subtask_count,
|
|
22355
|
+
requiredFiles: record2.subtasks.flatMap((s) => s.files),
|
|
22356
|
+
overallSuccess: record2.overall_success
|
|
22357
|
+
},
|
|
22358
|
+
actual: record2
|
|
22359
|
+
}));
|
|
22360
|
+
}
|
|
22361
|
+
function getEvalDataStats(projectPath) {
|
|
22362
|
+
const records = readEvalRecords(projectPath);
|
|
22363
|
+
const complete = records.filter((r) => r.outcomes && r.outcomes.length > 0);
|
|
22364
|
+
if (complete.length === 0) {
|
|
22365
|
+
return {
|
|
22366
|
+
totalRecords: records.length,
|
|
22367
|
+
completeRecords: 0,
|
|
22368
|
+
successRate: 0,
|
|
22369
|
+
avgSubtasks: 0,
|
|
22370
|
+
avgDurationMs: 0,
|
|
22371
|
+
avgScopeAccuracy: 0,
|
|
22372
|
+
avgTimeBalance: 0
|
|
22373
|
+
};
|
|
22374
|
+
}
|
|
22375
|
+
const successCount = complete.filter((r) => r.overall_success).length;
|
|
22376
|
+
const avgSubtasks = complete.reduce((sum, r) => sum + (r.outcomes?.length || 0), 0) / complete.length;
|
|
22377
|
+
const avgDurationMs = complete.reduce((sum, r) => sum + (r.total_duration_ms || 0), 0) / complete.length;
|
|
22378
|
+
const avgScopeAccuracy = complete.reduce((sum, r) => sum + (r.scope_accuracy || 1), 0) / complete.length;
|
|
22379
|
+
const avgTimeBalance = complete.reduce((sum, r) => sum + (r.time_balance_ratio || 1), 0) / complete.length;
|
|
22380
|
+
return {
|
|
22381
|
+
totalRecords: records.length,
|
|
22382
|
+
completeRecords: complete.length,
|
|
22383
|
+
successRate: successCount / complete.length,
|
|
22384
|
+
avgSubtasks,
|
|
22385
|
+
avgDurationMs,
|
|
22386
|
+
avgScopeAccuracy,
|
|
22387
|
+
avgTimeBalance
|
|
22388
|
+
};
|
|
22389
|
+
}
|
|
22390
|
+
function getSessionDir() {
|
|
22391
|
+
return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
|
|
22392
|
+
}
|
|
22393
|
+
function getSessionPath(sessionId) {
|
|
22394
|
+
return path.join(getSessionDir(), `${sessionId}.jsonl`);
|
|
22395
|
+
}
|
|
22396
|
+
function ensureSessionDir() {
|
|
22397
|
+
const sessionDir = getSessionDir();
|
|
22398
|
+
if (!fs.existsSync(sessionDir)) {
|
|
22399
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
22400
|
+
}
|
|
22401
|
+
}
|
|
22402
|
+
function captureCoordinatorEvent(event) {
|
|
22403
|
+
CoordinatorEventSchema.parse(event);
|
|
22404
|
+
ensureSessionDir();
|
|
22405
|
+
const sessionPath = getSessionPath(event.session_id);
|
|
22406
|
+
const line = `${JSON.stringify(event)}
|
|
22407
|
+
`;
|
|
22408
|
+
fs.appendFileSync(sessionPath, line, "utf-8");
|
|
22409
|
+
}
|
|
22410
|
+
function captureCompactionEvent(params) {
|
|
22411
|
+
const event = {
|
|
22412
|
+
session_id: params.session_id,
|
|
22413
|
+
epic_id: params.epic_id,
|
|
22414
|
+
timestamp: new Date().toISOString(),
|
|
22415
|
+
event_type: "COMPACTION",
|
|
22416
|
+
compaction_type: params.compaction_type,
|
|
22417
|
+
payload: params.payload
|
|
22418
|
+
};
|
|
22419
|
+
captureCoordinatorEvent(event);
|
|
22420
|
+
}
|
|
22421
|
+
function readSessionEvents(sessionId) {
|
|
22422
|
+
const sessionPath = getSessionPath(sessionId);
|
|
22423
|
+
if (!fs.existsSync(sessionPath)) {
|
|
22424
|
+
return [];
|
|
22425
|
+
}
|
|
22426
|
+
const content = fs.readFileSync(sessionPath, "utf-8");
|
|
22427
|
+
const lines = content.trim().split(`
|
|
22428
|
+
`).filter(Boolean);
|
|
22429
|
+
return lines.map((line) => {
|
|
22430
|
+
const parsed = JSON.parse(line);
|
|
22431
|
+
return CoordinatorEventSchema.parse(parsed);
|
|
22432
|
+
});
|
|
22433
|
+
}
|
|
22434
|
+
function saveSession(params) {
|
|
22435
|
+
const events = readSessionEvents(params.session_id);
|
|
22436
|
+
if (events.length === 0) {
|
|
22437
|
+
return null;
|
|
22438
|
+
}
|
|
22439
|
+
const timestamps = events.map((e) => new Date(e.timestamp).getTime());
|
|
22440
|
+
const startTime = new Date(Math.min(...timestamps)).toISOString();
|
|
22441
|
+
const endTime = new Date(Math.max(...timestamps)).toISOString();
|
|
22442
|
+
const session = {
|
|
22443
|
+
session_id: params.session_id,
|
|
22444
|
+
epic_id: params.epic_id,
|
|
22445
|
+
start_time: startTime,
|
|
22446
|
+
end_time: endTime,
|
|
22447
|
+
events
|
|
22448
|
+
};
|
|
22449
|
+
return session;
|
|
22450
|
+
}
|
|
22451
|
+
var SubtaskOutcomeSchema, EvalRecordSchema, CoordinatorEventSchema, CoordinatorSessionSchema, DEFAULT_EVAL_DATA_PATH = ".opencode/eval-data.jsonl", inProgressRecords;
|
|
22452
|
+
var init_eval_capture = __esm(() => {
|
|
22453
|
+
init_zod();
|
|
22454
|
+
SubtaskOutcomeSchema = exports_external.object({
|
|
22455
|
+
bead_id: exports_external.string(),
|
|
22456
|
+
title: exports_external.string(),
|
|
22457
|
+
planned_files: exports_external.array(exports_external.string()),
|
|
22458
|
+
actual_files: exports_external.array(exports_external.string()),
|
|
22459
|
+
duration_ms: exports_external.number().int().min(0),
|
|
22460
|
+
error_count: exports_external.number().int().min(0),
|
|
22461
|
+
retry_count: exports_external.number().int().min(0),
|
|
22462
|
+
success: exports_external.boolean(),
|
|
22463
|
+
failure_mode: exports_external.string().optional()
|
|
22464
|
+
});
|
|
22465
|
+
EvalRecordSchema = exports_external.object({
|
|
22466
|
+
id: exports_external.string(),
|
|
22467
|
+
timestamp: exports_external.string(),
|
|
22468
|
+
project_path: exports_external.string(),
|
|
22469
|
+
task: exports_external.string(),
|
|
22470
|
+
context: exports_external.string().optional(),
|
|
22471
|
+
strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
|
|
22472
|
+
subtask_count: exports_external.number().int().min(1),
|
|
22473
|
+
epic_title: exports_external.string(),
|
|
22474
|
+
epic_description: exports_external.string().optional(),
|
|
22475
|
+
subtasks: exports_external.array(exports_external.object({
|
|
22476
|
+
title: exports_external.string(),
|
|
22477
|
+
description: exports_external.string().optional(),
|
|
22478
|
+
files: exports_external.array(exports_external.string()),
|
|
22479
|
+
dependencies: exports_external.array(exports_external.number()).optional(),
|
|
22480
|
+
estimated_complexity: exports_external.number().int().min(1).max(5).optional()
|
|
22481
|
+
})),
|
|
22482
|
+
outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
|
|
22483
|
+
overall_success: exports_external.boolean().optional(),
|
|
22484
|
+
total_duration_ms: exports_external.number().int().min(0).optional(),
|
|
22485
|
+
total_errors: exports_external.number().int().min(0).optional(),
|
|
22486
|
+
human_accepted: exports_external.boolean().optional(),
|
|
22487
|
+
human_modified: exports_external.boolean().optional(),
|
|
22488
|
+
human_notes: exports_external.string().optional(),
|
|
22489
|
+
file_overlap_count: exports_external.number().int().min(0).optional(),
|
|
22490
|
+
scope_accuracy: exports_external.number().min(0).max(2).optional(),
|
|
22491
|
+
time_balance_ratio: exports_external.number().min(1).optional()
|
|
22492
|
+
});
|
|
22493
|
+
CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
|
|
22494
|
+
exports_external.object({
|
|
22495
|
+
session_id: exports_external.string(),
|
|
22496
|
+
epic_id: exports_external.string(),
|
|
22497
|
+
timestamp: exports_external.string(),
|
|
22498
|
+
event_type: exports_external.literal("DECISION"),
|
|
22499
|
+
decision_type: exports_external.enum([
|
|
22500
|
+
"strategy_selected",
|
|
22501
|
+
"worker_spawned",
|
|
22502
|
+
"review_completed",
|
|
22503
|
+
"decomposition_complete"
|
|
22504
|
+
]),
|
|
22505
|
+
payload: exports_external.any()
|
|
22506
|
+
}),
|
|
22507
|
+
exports_external.object({
|
|
22508
|
+
session_id: exports_external.string(),
|
|
22509
|
+
epic_id: exports_external.string(),
|
|
22510
|
+
timestamp: exports_external.string(),
|
|
22511
|
+
event_type: exports_external.literal("VIOLATION"),
|
|
22512
|
+
violation_type: exports_external.enum([
|
|
22513
|
+
"coordinator_edited_file",
|
|
22514
|
+
"coordinator_ran_tests",
|
|
22515
|
+
"coordinator_reserved_files",
|
|
22516
|
+
"no_worker_spawned"
|
|
22517
|
+
]),
|
|
22518
|
+
payload: exports_external.any()
|
|
22519
|
+
}),
|
|
22520
|
+
exports_external.object({
|
|
22521
|
+
session_id: exports_external.string(),
|
|
22522
|
+
epic_id: exports_external.string(),
|
|
22523
|
+
timestamp: exports_external.string(),
|
|
22524
|
+
event_type: exports_external.literal("OUTCOME"),
|
|
22525
|
+
outcome_type: exports_external.enum([
|
|
22526
|
+
"subtask_success",
|
|
22527
|
+
"subtask_retry",
|
|
22528
|
+
"subtask_failed",
|
|
22529
|
+
"epic_complete"
|
|
22530
|
+
]),
|
|
22531
|
+
payload: exports_external.any()
|
|
22532
|
+
}),
|
|
22533
|
+
exports_external.object({
|
|
22534
|
+
session_id: exports_external.string(),
|
|
22535
|
+
epic_id: exports_external.string(),
|
|
22536
|
+
timestamp: exports_external.string(),
|
|
22537
|
+
event_type: exports_external.literal("COMPACTION"),
|
|
22538
|
+
compaction_type: exports_external.enum([
|
|
22539
|
+
"detection_complete",
|
|
22540
|
+
"prompt_generated",
|
|
22541
|
+
"context_injected",
|
|
22542
|
+
"resumption_started",
|
|
22543
|
+
"tool_call_tracked"
|
|
22544
|
+
]),
|
|
22545
|
+
payload: exports_external.any()
|
|
22546
|
+
})
|
|
22547
|
+
]);
|
|
22548
|
+
CoordinatorSessionSchema = exports_external.object({
|
|
22549
|
+
session_id: exports_external.string(),
|
|
22550
|
+
epic_id: exports_external.string(),
|
|
22551
|
+
start_time: exports_external.string(),
|
|
22552
|
+
end_time: exports_external.string().optional(),
|
|
22553
|
+
events: exports_external.array(CoordinatorEventSchema)
|
|
22554
|
+
});
|
|
22555
|
+
inProgressRecords = new Map;
|
|
22556
|
+
});
|
|
22557
|
+
|
|
22181
22558
|
// src/learning.ts
|
|
22182
22559
|
var exports_learning = {};
|
|
22183
22560
|
__export(exports_learning, {
|
|
@@ -39409,6 +39786,71 @@ var hive_ready = tool({
|
|
|
39409
39786
|
}
|
|
39410
39787
|
}
|
|
39411
39788
|
});
|
|
39789
|
+
var hive_cells = tool({
|
|
39790
|
+
description: `Query cells from the hive database with flexible filtering.
|
|
39791
|
+
|
|
39792
|
+
USE THIS TOOL TO:
|
|
39793
|
+
- List all open cells: hive_cells()
|
|
39794
|
+
- Find cells by status: hive_cells({ status: "in_progress" })
|
|
39795
|
+
- Find cells by type: hive_cells({ type: "bug" })
|
|
39796
|
+
- Get a specific cell by partial ID: hive_cells({ id: "mjkmd" })
|
|
39797
|
+
- Get the next ready (unblocked) cell: hive_cells({ ready: true })
|
|
39798
|
+
- Combine filters: hive_cells({ status: "open", type: "task" })
|
|
39799
|
+
|
|
39800
|
+
RETURNS: Array of cells with id, title, status, priority, type, parent_id, created_at, updated_at
|
|
39801
|
+
|
|
39802
|
+
PREFER THIS OVER hive_query when you need to:
|
|
39803
|
+
- See what work is available
|
|
39804
|
+
- Check status of multiple cells
|
|
39805
|
+
- Find cells matching criteria
|
|
39806
|
+
- Look up a cell by partial ID`,
|
|
39807
|
+
args: {
|
|
39808
|
+
id: tool.schema.string().optional().describe("Partial or full cell ID to look up"),
|
|
39809
|
+
status: tool.schema.enum(["open", "in_progress", "blocked", "closed"]).optional().describe("Filter by status"),
|
|
39810
|
+
type: tool.schema.enum(["task", "bug", "feature", "epic", "chore"]).optional().describe("Filter by type"),
|
|
39811
|
+
ready: tool.schema.boolean().optional().describe("If true, return only the next unblocked cell"),
|
|
39812
|
+
limit: tool.schema.number().optional().describe("Max cells to return (default 20)")
|
|
39813
|
+
},
|
|
39814
|
+
async execute(args, ctx) {
|
|
39815
|
+
const projectKey = getHiveWorkingDirectory();
|
|
39816
|
+
const adapter = await getHiveAdapter(projectKey);
|
|
39817
|
+
try {
|
|
39818
|
+
if (args.id) {
|
|
39819
|
+
const fullId = await resolvePartialId(adapter, projectKey, args.id) || args.id;
|
|
39820
|
+
const cell = await adapter.getCell(projectKey, fullId);
|
|
39821
|
+
if (!cell) {
|
|
39822
|
+
throw new HiveError(`No cell found matching ID '${args.id}'`, "hive_cells");
|
|
39823
|
+
}
|
|
39824
|
+
const formatted2 = formatCellForOutput(cell);
|
|
39825
|
+
return JSON.stringify([formatted2], null, 2);
|
|
39826
|
+
}
|
|
39827
|
+
if (args.ready) {
|
|
39828
|
+
const ready = await adapter.getNextReadyCell(projectKey);
|
|
39829
|
+
if (!ready) {
|
|
39830
|
+
return JSON.stringify([], null, 2);
|
|
39831
|
+
}
|
|
39832
|
+
const formatted2 = formatCellForOutput(ready);
|
|
39833
|
+
return JSON.stringify([formatted2], null, 2);
|
|
39834
|
+
}
|
|
39835
|
+
const cells = await adapter.queryCells(projectKey, {
|
|
39836
|
+
status: args.status,
|
|
39837
|
+
type: args.type,
|
|
39838
|
+
limit: args.limit || 20
|
|
39839
|
+
});
|
|
39840
|
+
const formatted = cells.map((c) => formatCellForOutput(c));
|
|
39841
|
+
return JSON.stringify(formatted, null, 2);
|
|
39842
|
+
} catch (error45) {
|
|
39843
|
+
const message = error45 instanceof Error ? error45.message : String(error45);
|
|
39844
|
+
if (message.includes("Ambiguous hash")) {
|
|
39845
|
+
throw new HiveError(`Ambiguous ID '${args.id}': multiple cells match. Please provide more characters.`, "hive_cells");
|
|
39846
|
+
}
|
|
39847
|
+
if (message.includes("Bead not found") || message.includes("Cell not found")) {
|
|
39848
|
+
throw new HiveError(`No cell found matching ID '${args.id || "unknown"}'`, "hive_cells");
|
|
39849
|
+
}
|
|
39850
|
+
throw new HiveError(`Failed to query cells: ${message}`, "hive_cells");
|
|
39851
|
+
}
|
|
39852
|
+
}
|
|
39853
|
+
});
|
|
39412
39854
|
var hive_sync = tool({
|
|
39413
39855
|
description: "Sync hive to git and push (MANDATORY at session end)",
|
|
39414
39856
|
args: {
|
|
@@ -39550,6 +39992,7 @@ var hiveTools = {
|
|
|
39550
39992
|
hive_close,
|
|
39551
39993
|
hive_start,
|
|
39552
39994
|
hive_ready,
|
|
39995
|
+
hive_cells,
|
|
39553
39996
|
hive_sync,
|
|
39554
39997
|
hive_link_thread
|
|
39555
39998
|
};
|
|
@@ -41846,122 +42289,7 @@ init_swarm_strategies();
|
|
|
41846
42289
|
init_dist();
|
|
41847
42290
|
init_zod();
|
|
41848
42291
|
init_swarm_strategies();
|
|
41849
|
-
|
|
41850
|
-
// src/eval-capture.ts
|
|
41851
|
-
init_zod();
|
|
41852
|
-
import * as fs from "node:fs";
|
|
41853
|
-
import * as os from "node:os";
|
|
41854
|
-
import * as path from "node:path";
|
|
41855
|
-
var SubtaskOutcomeSchema = exports_external.object({
|
|
41856
|
-
bead_id: exports_external.string(),
|
|
41857
|
-
title: exports_external.string(),
|
|
41858
|
-
planned_files: exports_external.array(exports_external.string()),
|
|
41859
|
-
actual_files: exports_external.array(exports_external.string()),
|
|
41860
|
-
duration_ms: exports_external.number().int().min(0),
|
|
41861
|
-
error_count: exports_external.number().int().min(0),
|
|
41862
|
-
retry_count: exports_external.number().int().min(0),
|
|
41863
|
-
success: exports_external.boolean(),
|
|
41864
|
-
failure_mode: exports_external.string().optional()
|
|
41865
|
-
});
|
|
41866
|
-
var EvalRecordSchema = exports_external.object({
|
|
41867
|
-
id: exports_external.string(),
|
|
41868
|
-
timestamp: exports_external.string(),
|
|
41869
|
-
project_path: exports_external.string(),
|
|
41870
|
-
task: exports_external.string(),
|
|
41871
|
-
context: exports_external.string().optional(),
|
|
41872
|
-
strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
|
|
41873
|
-
subtask_count: exports_external.number().int().min(1),
|
|
41874
|
-
epic_title: exports_external.string(),
|
|
41875
|
-
epic_description: exports_external.string().optional(),
|
|
41876
|
-
subtasks: exports_external.array(exports_external.object({
|
|
41877
|
-
title: exports_external.string(),
|
|
41878
|
-
description: exports_external.string().optional(),
|
|
41879
|
-
files: exports_external.array(exports_external.string()),
|
|
41880
|
-
dependencies: exports_external.array(exports_external.number()).optional(),
|
|
41881
|
-
estimated_complexity: exports_external.number().int().min(1).max(5).optional()
|
|
41882
|
-
})),
|
|
41883
|
-
outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
|
|
41884
|
-
overall_success: exports_external.boolean().optional(),
|
|
41885
|
-
total_duration_ms: exports_external.number().int().min(0).optional(),
|
|
41886
|
-
total_errors: exports_external.number().int().min(0).optional(),
|
|
41887
|
-
human_accepted: exports_external.boolean().optional(),
|
|
41888
|
-
human_modified: exports_external.boolean().optional(),
|
|
41889
|
-
human_notes: exports_external.string().optional(),
|
|
41890
|
-
file_overlap_count: exports_external.number().int().min(0).optional(),
|
|
41891
|
-
scope_accuracy: exports_external.number().min(0).max(2).optional(),
|
|
41892
|
-
time_balance_ratio: exports_external.number().min(1).optional()
|
|
41893
|
-
});
|
|
41894
|
-
var CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
|
|
41895
|
-
exports_external.object({
|
|
41896
|
-
session_id: exports_external.string(),
|
|
41897
|
-
epic_id: exports_external.string(),
|
|
41898
|
-
timestamp: exports_external.string(),
|
|
41899
|
-
event_type: exports_external.literal("DECISION"),
|
|
41900
|
-
decision_type: exports_external.enum([
|
|
41901
|
-
"strategy_selected",
|
|
41902
|
-
"worker_spawned",
|
|
41903
|
-
"review_completed",
|
|
41904
|
-
"decomposition_complete"
|
|
41905
|
-
]),
|
|
41906
|
-
payload: exports_external.any()
|
|
41907
|
-
}),
|
|
41908
|
-
exports_external.object({
|
|
41909
|
-
session_id: exports_external.string(),
|
|
41910
|
-
epic_id: exports_external.string(),
|
|
41911
|
-
timestamp: exports_external.string(),
|
|
41912
|
-
event_type: exports_external.literal("VIOLATION"),
|
|
41913
|
-
violation_type: exports_external.enum([
|
|
41914
|
-
"coordinator_edited_file",
|
|
41915
|
-
"coordinator_ran_tests",
|
|
41916
|
-
"coordinator_reserved_files",
|
|
41917
|
-
"no_worker_spawned"
|
|
41918
|
-
]),
|
|
41919
|
-
payload: exports_external.any()
|
|
41920
|
-
}),
|
|
41921
|
-
exports_external.object({
|
|
41922
|
-
session_id: exports_external.string(),
|
|
41923
|
-
epic_id: exports_external.string(),
|
|
41924
|
-
timestamp: exports_external.string(),
|
|
41925
|
-
event_type: exports_external.literal("OUTCOME"),
|
|
41926
|
-
outcome_type: exports_external.enum([
|
|
41927
|
-
"subtask_success",
|
|
41928
|
-
"subtask_retry",
|
|
41929
|
-
"subtask_failed",
|
|
41930
|
-
"epic_complete"
|
|
41931
|
-
]),
|
|
41932
|
-
payload: exports_external.any()
|
|
41933
|
-
})
|
|
41934
|
-
]);
|
|
41935
|
-
var CoordinatorSessionSchema = exports_external.object({
|
|
41936
|
-
session_id: exports_external.string(),
|
|
41937
|
-
epic_id: exports_external.string(),
|
|
41938
|
-
start_time: exports_external.string(),
|
|
41939
|
-
end_time: exports_external.string().optional(),
|
|
41940
|
-
events: exports_external.array(CoordinatorEventSchema)
|
|
41941
|
-
});
|
|
41942
|
-
var inProgressRecords = new Map;
|
|
41943
|
-
function getSessionDir() {
|
|
41944
|
-
return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
|
|
41945
|
-
}
|
|
41946
|
-
function getSessionPath(sessionId) {
|
|
41947
|
-
return path.join(getSessionDir(), `${sessionId}.jsonl`);
|
|
41948
|
-
}
|
|
41949
|
-
function ensureSessionDir() {
|
|
41950
|
-
const sessionDir = getSessionDir();
|
|
41951
|
-
if (!fs.existsSync(sessionDir)) {
|
|
41952
|
-
fs.mkdirSync(sessionDir, { recursive: true });
|
|
41953
|
-
}
|
|
41954
|
-
}
|
|
41955
|
-
function captureCoordinatorEvent(event) {
|
|
41956
|
-
CoordinatorEventSchema.parse(event);
|
|
41957
|
-
ensureSessionDir();
|
|
41958
|
-
const sessionPath = getSessionPath(event.session_id);
|
|
41959
|
-
const line = `${JSON.stringify(event)}
|
|
41960
|
-
`;
|
|
41961
|
-
fs.appendFileSync(sessionPath, line, "utf-8");
|
|
41962
|
-
}
|
|
41963
|
-
|
|
41964
|
-
// src/swarm-decompose.ts
|
|
42292
|
+
init_eval_capture();
|
|
41965
42293
|
var DECOMPOSITION_PROMPT = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
|
|
41966
42294
|
|
|
41967
42295
|
## Task
|
|
@@ -42279,9 +42607,14 @@ ${fullContext}` : `## Additional Context
|
|
|
42279
42607
|
}
|
|
42280
42608
|
});
|
|
42281
42609
|
var swarm_validate_decomposition = tool({
|
|
42282
|
-
description: "Validate a decomposition response against CellTreeSchema",
|
|
42610
|
+
description: "Validate a decomposition response against CellTreeSchema and capture for eval",
|
|
42283
42611
|
args: {
|
|
42284
|
-
response: tool.schema.string().describe("JSON response from agent (CellTree format)")
|
|
42612
|
+
response: tool.schema.string().describe("JSON response from agent (CellTree format)"),
|
|
42613
|
+
project_path: tool.schema.string().optional().describe("Project path for eval capture"),
|
|
42614
|
+
task: tool.schema.string().optional().describe("Original task description for eval capture"),
|
|
42615
|
+
context: tool.schema.string().optional().describe("Context provided for decomposition"),
|
|
42616
|
+
strategy: tool.schema.enum(["file-based", "feature-based", "risk-based", "auto"]).optional().describe("Decomposition strategy used"),
|
|
42617
|
+
epic_id: tool.schema.string().optional().describe("Epic ID for eval capture")
|
|
42285
42618
|
},
|
|
42286
42619
|
async execute(args) {
|
|
42287
42620
|
try {
|
|
@@ -42315,6 +42648,29 @@ var swarm_validate_decomposition = tool({
|
|
|
42315
42648
|
}
|
|
42316
42649
|
}
|
|
42317
42650
|
const instructionConflicts = detectInstructionConflicts(validated.subtasks);
|
|
42651
|
+
if (args.project_path && args.task && args.strategy && args.epic_id) {
|
|
42652
|
+
try {
|
|
42653
|
+
const { captureDecomposition: captureDecomposition2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
|
|
42654
|
+
captureDecomposition2({
|
|
42655
|
+
epicId: args.epic_id,
|
|
42656
|
+
projectPath: args.project_path,
|
|
42657
|
+
task: args.task,
|
|
42658
|
+
context: args.context,
|
|
42659
|
+
strategy: args.strategy,
|
|
42660
|
+
epicTitle: validated.epic.title,
|
|
42661
|
+
epicDescription: validated.epic.description,
|
|
42662
|
+
subtasks: validated.subtasks.map((s) => ({
|
|
42663
|
+
title: s.title,
|
|
42664
|
+
description: s.description,
|
|
42665
|
+
files: s.files,
|
|
42666
|
+
dependencies: s.dependencies,
|
|
42667
|
+
estimated_complexity: s.estimated_complexity
|
|
42668
|
+
}))
|
|
42669
|
+
});
|
|
42670
|
+
} catch (error45) {
|
|
42671
|
+
console.warn("[swarm_validate_decomposition] Failed to capture decomposition:", error45);
|
|
42672
|
+
}
|
|
42673
|
+
}
|
|
42318
42674
|
return JSON.stringify({
|
|
42319
42675
|
valid: true,
|
|
42320
42676
|
cell_tree: validated,
|
|
@@ -42355,7 +42711,7 @@ var swarm_delegate_planning = tool({
|
|
|
42355
42711
|
strategy: tool.schema.enum(["auto", "file-based", "feature-based", "risk-based"]).optional().default("auto").describe("Decomposition strategy (default: auto-detect)"),
|
|
42356
42712
|
query_cass: tool.schema.boolean().optional().default(true).describe("Query CASS for similar past tasks (default: true)")
|
|
42357
42713
|
},
|
|
42358
|
-
async execute(args) {
|
|
42714
|
+
async execute(args, _ctx) {
|
|
42359
42715
|
const { selectStrategy: selectStrategy2, formatStrategyGuidelines: formatStrategyGuidelines2 } = await Promise.resolve().then(() => (init_swarm_strategies(), exports_swarm_strategies));
|
|
42360
42716
|
const { formatMemoryQueryForDecomposition: formatMemoryQueryForDecomposition2 } = await Promise.resolve().then(() => (init_learning(), exports_learning));
|
|
42361
42717
|
const { listSkills: listSkills2, getSkillsContextForSwarm: getSkillsContextForSwarm2, findRelevantSkills: findRelevantSkills2 } = await Promise.resolve().then(() => (init_skills(), exports_skills));
|
|
@@ -42371,7 +42727,7 @@ var swarm_delegate_planning = tool({
|
|
|
42371
42727
|
}
|
|
42372
42728
|
try {
|
|
42373
42729
|
captureCoordinatorEvent({
|
|
42374
|
-
session_id:
|
|
42730
|
+
session_id: _ctx.sessionID || "unknown",
|
|
42375
42731
|
epic_id: "planning",
|
|
42376
42732
|
timestamp: new Date().toISOString(),
|
|
42377
42733
|
event_type: "DECISION",
|
|
@@ -44481,6 +44837,7 @@ var worktreeTools = {
|
|
|
44481
44837
|
init_dist();
|
|
44482
44838
|
init_zod();
|
|
44483
44839
|
import { sendSwarmMessage as sendSwarmMessage2 } from "swarm-mail";
|
|
44840
|
+
init_eval_capture();
|
|
44484
44841
|
var ReviewIssueSchema = exports_external.object({
|
|
44485
44842
|
file: exports_external.string(),
|
|
44486
44843
|
line: exports_external.number().optional(),
|
|
@@ -44705,7 +45062,7 @@ var swarm_review_feedback = tool({
|
|
|
44705
45062
|
summary: exports_external.string().optional().describe("Review summary"),
|
|
44706
45063
|
issues: exports_external.string().optional().describe("JSON array of ReviewIssue objects (for needs_changes)")
|
|
44707
45064
|
},
|
|
44708
|
-
async execute(args) {
|
|
45065
|
+
async execute(args, _ctx) {
|
|
44709
45066
|
let parsedIssues = [];
|
|
44710
45067
|
if (args.issues) {
|
|
44711
45068
|
try {
|
|
@@ -44728,7 +45085,7 @@ var swarm_review_feedback = tool({
|
|
|
44728
45085
|
markReviewApproved(args.task_id);
|
|
44729
45086
|
try {
|
|
44730
45087
|
captureCoordinatorEvent({
|
|
44731
|
-
session_id:
|
|
45088
|
+
session_id: _ctx.sessionID || "unknown",
|
|
44732
45089
|
epic_id: epicId,
|
|
44733
45090
|
timestamp: new Date().toISOString(),
|
|
44734
45091
|
event_type: "DECISION",
|
|
@@ -44766,7 +45123,7 @@ You may now complete the task with \`swarm_complete\`.`,
|
|
|
44766
45123
|
const remaining = MAX_REVIEW_ATTEMPTS - attemptNumber;
|
|
44767
45124
|
try {
|
|
44768
45125
|
captureCoordinatorEvent({
|
|
44769
|
-
session_id:
|
|
45126
|
+
session_id: _ctx.sessionID || "unknown",
|
|
44770
45127
|
epic_id: epicId,
|
|
44771
45128
|
timestamp: new Date().toISOString(),
|
|
44772
45129
|
event_type: "DECISION",
|
|
@@ -44837,6 +45194,7 @@ var reviewTools = {
|
|
|
44837
45194
|
};
|
|
44838
45195
|
|
|
44839
45196
|
// src/swarm-orchestrate.ts
|
|
45197
|
+
init_eval_capture();
|
|
44840
45198
|
function generateWorkerHandoff(params) {
|
|
44841
45199
|
const handoff = {
|
|
44842
45200
|
contract: {
|
|
@@ -45737,10 +46095,29 @@ Files touched: ${args.files_touched?.join(", ") || "none recorded"}`,
|
|
|
45737
46095
|
reason: "No files_owned contract found (non-epic subtask or decomposition event missing)"
|
|
45738
46096
|
}
|
|
45739
46097
|
};
|
|
46098
|
+
try {
|
|
46099
|
+
const { captureSubtaskOutcome: captureSubtaskOutcome2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
|
|
46100
|
+
const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
|
|
46101
|
+
const evalEpicId = cell.parent_id || epicId2;
|
|
46102
|
+
captureSubtaskOutcome2({
|
|
46103
|
+
epicId: evalEpicId,
|
|
46104
|
+
projectPath: args.project_key,
|
|
46105
|
+
beadId: args.bead_id,
|
|
46106
|
+
title: cell.title,
|
|
46107
|
+
plannedFiles: args.planned_files || [],
|
|
46108
|
+
actualFiles: args.files_touched || [],
|
|
46109
|
+
durationMs: durationMs2,
|
|
46110
|
+
errorCount: args.error_count || 0,
|
|
46111
|
+
retryCount: args.retry_count || 0,
|
|
46112
|
+
success: true
|
|
46113
|
+
});
|
|
46114
|
+
} catch (error45) {
|
|
46115
|
+
console.warn("[swarm_complete] Failed to capture subtask outcome:", error45);
|
|
46116
|
+
}
|
|
45740
46117
|
try {
|
|
45741
46118
|
const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
|
|
45742
46119
|
captureCoordinatorEvent({
|
|
45743
|
-
session_id:
|
|
46120
|
+
session_id: _ctx.sessionID || "unknown",
|
|
45744
46121
|
epic_id: epicId2,
|
|
45745
46122
|
timestamp: new Date().toISOString(),
|
|
45746
46123
|
event_type: "OUTCOME",
|
|
@@ -45822,7 +46199,7 @@ ${errorStack.slice(0, 1000)}
|
|
|
45822
46199
|
try {
|
|
45823
46200
|
const durationMs = args.start_time ? Date.now() - args.start_time : 0;
|
|
45824
46201
|
captureCoordinatorEvent({
|
|
45825
|
-
session_id:
|
|
46202
|
+
session_id: _ctx.sessionID || "unknown",
|
|
45826
46203
|
epic_id: epicId,
|
|
45827
46204
|
timestamp: new Date().toISOString(),
|
|
45828
46205
|
event_type: "OUTCOME",
|
|
@@ -45889,7 +46266,9 @@ var swarm_record_outcome = tool({
|
|
|
45889
46266
|
"user_cancelled",
|
|
45890
46267
|
"unknown"
|
|
45891
46268
|
]).optional().describe("Failure classification (only when success=false). Auto-classified if not provided."),
|
|
45892
|
-
failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)")
|
|
46269
|
+
failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)"),
|
|
46270
|
+
project_path: tool.schema.string().optional().describe("Project path (for finalizing eval records when all subtasks complete)"),
|
|
46271
|
+
epic_id: tool.schema.string().optional().describe("Epic ID (for finalizing eval records when all subtasks complete)")
|
|
45893
46272
|
},
|
|
45894
46273
|
async execute(args) {
|
|
45895
46274
|
const signals = {
|
|
@@ -45911,6 +46290,18 @@ var swarm_record_outcome = tool({
|
|
|
45911
46290
|
const validated = OutcomeSignalsSchema.parse(signals);
|
|
45912
46291
|
const scored = scoreImplicitFeedback(validated, DEFAULT_LEARNING_CONFIG);
|
|
45913
46292
|
const errorStats = await globalErrorAccumulator.getErrorStats(args.bead_id);
|
|
46293
|
+
let finalizedRecord = null;
|
|
46294
|
+
if (args.project_path && args.epic_id) {
|
|
46295
|
+
try {
|
|
46296
|
+
const { finalizeEvalRecord: finalizeEvalRecord2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
|
|
46297
|
+
finalizedRecord = finalizeEvalRecord2({
|
|
46298
|
+
epicId: args.epic_id,
|
|
46299
|
+
projectPath: args.project_path
|
|
46300
|
+
});
|
|
46301
|
+
} catch (error45) {
|
|
46302
|
+
console.warn("[swarm_record_outcome] Failed to finalize eval record:", error45);
|
|
46303
|
+
}
|
|
46304
|
+
}
|
|
45914
46305
|
const criteriaToScore = args.criteria ?? [
|
|
45915
46306
|
"type_safe",
|
|
45916
46307
|
"no_bugs",
|
|
@@ -45952,6 +46343,7 @@ var swarm_record_outcome = tool({
|
|
|
45952
46343
|
accumulated_errors: errorStats.total,
|
|
45953
46344
|
unresolved_errors: errorStats.unresolved
|
|
45954
46345
|
},
|
|
46346
|
+
finalized_eval_record: finalizedRecord || undefined,
|
|
45955
46347
|
note: "Feedback events should be stored for criterion weight calculation. Use learning.ts functions to apply weights."
|
|
45956
46348
|
}, null, 2);
|
|
45957
46349
|
}
|
|
@@ -45983,12 +46375,31 @@ async function runResearchPhase(task, projectPath, options2) {
|
|
|
45983
46375
|
if (techStack.length === 0) {
|
|
45984
46376
|
return {
|
|
45985
46377
|
tech_stack: [],
|
|
46378
|
+
spawn_instructions: [],
|
|
45986
46379
|
summaries: {},
|
|
45987
46380
|
memory_ids: []
|
|
45988
46381
|
};
|
|
45989
46382
|
}
|
|
46383
|
+
const spawnInstructions = [];
|
|
46384
|
+
for (const tech of techStack) {
|
|
46385
|
+
const researchId = `research-${tech}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
|
|
46386
|
+
const prompt = formatResearcherPrompt({
|
|
46387
|
+
research_id: researchId,
|
|
46388
|
+
epic_id: "standalone-research",
|
|
46389
|
+
tech_stack: [tech],
|
|
46390
|
+
project_path: projectPath,
|
|
46391
|
+
check_upgrades: options2?.checkUpgrades ?? false
|
|
46392
|
+
});
|
|
46393
|
+
spawnInstructions.push({
|
|
46394
|
+
research_id: researchId,
|
|
46395
|
+
tech,
|
|
46396
|
+
prompt,
|
|
46397
|
+
subagent_type: "swarm/researcher"
|
|
46398
|
+
});
|
|
46399
|
+
}
|
|
45990
46400
|
return {
|
|
45991
46401
|
tech_stack: techStack,
|
|
46402
|
+
spawn_instructions: spawnInstructions,
|
|
45992
46403
|
summaries: {},
|
|
45993
46404
|
memory_ids: []
|
|
45994
46405
|
};
|
|
@@ -46425,6 +46836,7 @@ var orchestrateTools = {
|
|
|
46425
46836
|
};
|
|
46426
46837
|
|
|
46427
46838
|
// src/swarm-prompts.ts
|
|
46839
|
+
init_eval_capture();
|
|
46428
46840
|
var STRATEGY_DECOMPOSITION_PROMPT2 = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
|
|
46429
46841
|
|
|
46430
46842
|
## Task
|
|
@@ -47221,7 +47633,7 @@ var swarm_spawn_subtask = tool({
|
|
|
47221
47633
|
}).optional().describe("Recovery context from checkpoint compaction"),
|
|
47222
47634
|
model: tool.schema.string().optional().describe("Optional explicit model override (auto-selected if not provided)")
|
|
47223
47635
|
},
|
|
47224
|
-
async execute(args) {
|
|
47636
|
+
async execute(args, _ctx) {
|
|
47225
47637
|
const prompt = formatSubtaskPromptV2({
|
|
47226
47638
|
bead_id: args.bead_id,
|
|
47227
47639
|
epic_id: args.epic_id,
|
|
@@ -47250,7 +47662,7 @@ var swarm_spawn_subtask = tool({
|
|
|
47250
47662
|
const postCompletionInstructions = COORDINATOR_POST_WORKER_CHECKLIST.replace(/{project_key}/g, args.project_path || "$PWD").replace(/{epic_id}/g, args.epic_id).replace(/{task_id}/g, args.bead_id).replace(/{files_touched}/g, filesJoined).replace(/{worker_id}/g, "worker");
|
|
47251
47663
|
try {
|
|
47252
47664
|
captureCoordinatorEvent({
|
|
47253
|
-
session_id:
|
|
47665
|
+
session_id: _ctx.sessionID || "unknown",
|
|
47254
47666
|
epic_id: args.epic_id,
|
|
47255
47667
|
timestamp: new Date().toISOString(),
|
|
47256
47668
|
event_type: "DECISION",
|
|
@@ -63171,6 +63583,7 @@ function createMetrics(result, toolName) {
|
|
|
63171
63583
|
}
|
|
63172
63584
|
|
|
63173
63585
|
// src/planning-guardrails.ts
|
|
63586
|
+
init_eval_capture();
|
|
63174
63587
|
var FILE_MODIFICATION_PATTERNS = [
|
|
63175
63588
|
/\bimplement\b/i,
|
|
63176
63589
|
/\bcreate\b.*\.(ts|js|tsx|jsx|py|rs|go|java|rb|swift|kt)/i,
|
|
@@ -63456,9 +63869,21 @@ function getLog() {
|
|
|
63456
63869
|
}
|
|
63457
63870
|
return _logger;
|
|
63458
63871
|
}
|
|
63459
|
-
var SWARM_COMPACTION_CONTEXT =
|
|
63872
|
+
var SWARM_COMPACTION_CONTEXT = `
|
|
63873
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
63874
|
+
│ │
|
|
63875
|
+
│ \uD83D\uDC1D YOU ARE THE COORDINATOR \uD83D\uDC1D │
|
|
63876
|
+
│ │
|
|
63877
|
+
│ NOT A WORKER. NOT AN IMPLEMENTER. │
|
|
63878
|
+
│ YOU ORCHESTRATE. │
|
|
63879
|
+
│ │
|
|
63880
|
+
└─────────────────────────────────────────────────────────────┘
|
|
63881
|
+
|
|
63882
|
+
## \uD83C\uDFAF NON-NEGOTIABLE: YOU ARE THE COORDINATOR
|
|
63883
|
+
|
|
63884
|
+
Context was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**
|
|
63460
63885
|
|
|
63461
|
-
|
|
63886
|
+
Your role is ORCHESTRATION, not implementation. When you catch yourself about to do work directly, STOP.
|
|
63462
63887
|
|
|
63463
63888
|
### ⛔ NEVER DO THESE (Coordinator Anti-Patterns)
|
|
63464
63889
|
|
|
@@ -63469,9 +63894,27 @@ Context was compacted but the swarm is still running. You are the **COORDINATOR*
|
|
|
63469
63894
|
- ❌ **NEVER** implement features yourself - SPAWN A WORKER
|
|
63470
63895
|
- ❌ **NEVER** "just do it myself to save time" - NO. SPAWN A WORKER.
|
|
63471
63896
|
- ❌ **NEVER** reserve files with \`swarmmail_reserve\` - Workers reserve files
|
|
63897
|
+
- ❌ **NEVER** fetch files/docs directly - SPAWN A RESEARCHER
|
|
63472
63898
|
|
|
63473
63899
|
**If you catch yourself about to edit a file, STOP. Use \`swarm_spawn_subtask\` instead.**
|
|
63474
63900
|
|
|
63901
|
+
### \uD83D\uDEAB FORBIDDEN TOOLS (Coordinators MUST delegate these)
|
|
63902
|
+
|
|
63903
|
+
**NEVER use these tools directly. ALWAYS spawn a researcher worker via \`swarm_spawn_researcher\`:**
|
|
63904
|
+
|
|
63905
|
+
**Repository fetching:**
|
|
63906
|
+
- \`repo-crawl_file\`, \`repo-crawl_readme\`, \`repo-crawl_search\`, \`repo-crawl_structure\`, \`repo-crawl_tree\`
|
|
63907
|
+
- \`repo-autopsy_*\` (all repo-autopsy tools)
|
|
63908
|
+
|
|
63909
|
+
**Web/documentation fetching:**
|
|
63910
|
+
- \`webfetch\`, \`fetch_fetch\`
|
|
63911
|
+
- \`context7_resolve-library-id\`, \`context7_get-library-docs\`
|
|
63912
|
+
|
|
63913
|
+
**Knowledge base:**
|
|
63914
|
+
- \`pdf-brain_search\`, \`pdf-brain_read\`
|
|
63915
|
+
|
|
63916
|
+
**If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
|
|
63917
|
+
|
|
63475
63918
|
### ✅ ALWAYS DO THESE (Coordinator Checklist)
|
|
63476
63919
|
|
|
63477
63920
|
On resume, execute this checklist IN ORDER:
|
|
@@ -63521,6 +63964,87 @@ Extract from session context:
|
|
|
63521
63964
|
- **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
|
|
63522
63965
|
- **Close the loop** - When all subtasks done, verify and close the epic
|
|
63523
63966
|
|
|
63967
|
+
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
63968
|
+
|
|
63969
|
+
---
|
|
63970
|
+
|
|
63971
|
+
## \uD83D\uDCCB FULL COORDINATOR WORKFLOW (Reference)
|
|
63972
|
+
|
|
63973
|
+
You are ALWAYS swarming. Here is the complete workflow for any new work:
|
|
63974
|
+
|
|
63975
|
+
### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
|
|
63976
|
+
|
|
63977
|
+
**If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
|
|
63978
|
+
|
|
63979
|
+
\`\`\`
|
|
63980
|
+
swarm_spawn_researcher(
|
|
63981
|
+
research_id="research-<topic>",
|
|
63982
|
+
epic_id="<epic-id>",
|
|
63983
|
+
tech_stack=["<technology>"],
|
|
63984
|
+
project_path="<path>"
|
|
63985
|
+
)
|
|
63986
|
+
// Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
|
|
63987
|
+
\`\`\`
|
|
63988
|
+
|
|
63989
|
+
### Phase 2: Knowledge Gathering
|
|
63990
|
+
|
|
63991
|
+
\`\`\`
|
|
63992
|
+
semantic-memory_find(query="<task keywords>", limit=5) # Past learnings
|
|
63993
|
+
cass_search(query="<task description>", limit=5) # Similar past tasks
|
|
63994
|
+
skills_list() # Available skills
|
|
63995
|
+
\`\`\`
|
|
63996
|
+
|
|
63997
|
+
### Phase 3: Decompose
|
|
63998
|
+
|
|
63999
|
+
\`\`\`
|
|
64000
|
+
swarm_select_strategy(task="<task>")
|
|
64001
|
+
swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
|
|
64002
|
+
swarm_validate_decomposition(response="<CellTree JSON>")
|
|
64003
|
+
\`\`\`
|
|
64004
|
+
|
|
64005
|
+
### Phase 4: Create Cells
|
|
64006
|
+
|
|
64007
|
+
\`hive_create_epic(epic_title="<task>", subtasks=[...])\`
|
|
64008
|
+
|
|
64009
|
+
### Phase 5: DO NOT Reserve Files
|
|
64010
|
+
|
|
64011
|
+
> **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
|
|
64012
|
+
|
|
64013
|
+
### Phase 6: Spawn Workers
|
|
64014
|
+
|
|
64015
|
+
\`\`\`
|
|
64016
|
+
swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
|
|
64017
|
+
Task(subagent_type="swarm/worker", prompt="<from above>")
|
|
64018
|
+
\`\`\`
|
|
64019
|
+
|
|
64020
|
+
### Phase 7: MANDATORY Review Loop
|
|
64021
|
+
|
|
64022
|
+
**AFTER EVERY Task() RETURNS:**
|
|
64023
|
+
|
|
64024
|
+
1. \`swarmmail_inbox()\` - Check for messages
|
|
64025
|
+
2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
|
|
64026
|
+
3. Evaluate against epic goals
|
|
64027
|
+
4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
|
|
64028
|
+
|
|
64029
|
+
**If needs_changes:**
|
|
64030
|
+
\`\`\`
|
|
64031
|
+
swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
|
|
64032
|
+
// Spawn NEW worker with Task() using retry prompt
|
|
64033
|
+
// Max 3 attempts before marking task blocked
|
|
64034
|
+
\`\`\`
|
|
64035
|
+
|
|
64036
|
+
### Phase 8: Complete
|
|
64037
|
+
|
|
64038
|
+
\`hive_sync()\` - Sync all cells to git
|
|
64039
|
+
|
|
64040
|
+
## Strategy Reference
|
|
64041
|
+
|
|
64042
|
+
| Strategy | Best For | Keywords |
|
|
64043
|
+
| -------------- | ------------------------ | -------------------------------------- |
|
|
64044
|
+
| file-based | Refactoring, migrations | refactor, migrate, rename, update all |
|
|
64045
|
+
| feature-based | New features | add, implement, build, create, feature |
|
|
64046
|
+
| risk-based | Bug fixes, security | fix, bug, security, critical, urgent |
|
|
64047
|
+
|
|
63524
64048
|
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
63525
64049
|
`;
|
|
63526
64050
|
var SWARM_DETECTION_FALLBACK = `## \uD83D\uDC1D Swarm Detection - Check Your Context
|
|
@@ -64458,6 +64982,161 @@ async function resetStorage() {
|
|
|
64458
64982
|
|
|
64459
64983
|
// src/index.ts
|
|
64460
64984
|
init_skills();
|
|
64985
|
+
|
|
64986
|
+
// src/eval-history.ts
|
|
64987
|
+
import * as fs2 from "node:fs";
|
|
64988
|
+
import * as path3 from "node:path";
|
|
64989
|
+
var DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
|
|
64990
|
+
var VARIANCE_THRESHOLD = 0.1;
|
|
64991
|
+
var BOOTSTRAP_THRESHOLD = 10;
|
|
64992
|
+
var STABILIZATION_THRESHOLD = 50;
|
|
64993
|
+
function getEvalHistoryPath(projectPath) {
|
|
64994
|
+
return path3.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
|
|
64995
|
+
}
|
|
64996
|
+
function ensureEvalHistoryDir(projectPath) {
|
|
64997
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
64998
|
+
const dir = path3.dirname(historyPath);
|
|
64999
|
+
if (!fs2.existsSync(dir)) {
|
|
65000
|
+
fs2.mkdirSync(dir, { recursive: true });
|
|
65001
|
+
}
|
|
65002
|
+
}
|
|
65003
|
+
function recordEvalRun(projectPath, run) {
|
|
65004
|
+
ensureEvalHistoryDir(projectPath);
|
|
65005
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
65006
|
+
const line = `${JSON.stringify(run)}
|
|
65007
|
+
`;
|
|
65008
|
+
fs2.appendFileSync(historyPath, line, "utf-8");
|
|
65009
|
+
}
|
|
65010
|
+
function readAllRecords(projectPath) {
|
|
65011
|
+
const historyPath = getEvalHistoryPath(projectPath);
|
|
65012
|
+
if (!fs2.existsSync(historyPath)) {
|
|
65013
|
+
return [];
|
|
65014
|
+
}
|
|
65015
|
+
const content = fs2.readFileSync(historyPath, "utf-8");
|
|
65016
|
+
const lines = content.trim().split(`
|
|
65017
|
+
`).filter(Boolean);
|
|
65018
|
+
return lines.map((line) => JSON.parse(line));
|
|
65019
|
+
}
|
|
65020
|
+
function getScoreHistory(projectPath, evalName) {
|
|
65021
|
+
return readAllRecords(projectPath).filter((run) => run.eval_name === evalName);
|
|
65022
|
+
}
|
|
65023
|
+
function calculateVariance(scores) {
|
|
65024
|
+
if (scores.length <= 1) {
|
|
65025
|
+
return 0;
|
|
65026
|
+
}
|
|
65027
|
+
const mean = scores.reduce((sum2, score) => sum2 + score, 0) / scores.length;
|
|
65028
|
+
const variance5 = scores.reduce((sum2, score) => {
|
|
65029
|
+
const deviation = score - mean;
|
|
65030
|
+
return sum2 + deviation * deviation;
|
|
65031
|
+
}, 0) / scores.length;
|
|
65032
|
+
return variance5;
|
|
65033
|
+
}
|
|
65034
|
+
function getPhase(projectPath, evalName) {
|
|
65035
|
+
const history = getScoreHistory(projectPath, evalName);
|
|
65036
|
+
if (history.length < BOOTSTRAP_THRESHOLD) {
|
|
65037
|
+
return "bootstrap";
|
|
65038
|
+
}
|
|
65039
|
+
if (history.length <= STABILIZATION_THRESHOLD) {
|
|
65040
|
+
return "stabilization";
|
|
65041
|
+
}
|
|
65042
|
+
const scores = history.map((run) => run.score);
|
|
65043
|
+
const variance5 = calculateVariance(scores);
|
|
65044
|
+
if (variance5 < VARIANCE_THRESHOLD) {
|
|
65045
|
+
return "production";
|
|
65046
|
+
}
|
|
65047
|
+
return "stabilization";
|
|
65048
|
+
}
|
|
65049
|
+
// src/eval-gates.ts
|
|
65050
|
+
var DEFAULT_THRESHOLDS = {
|
|
65051
|
+
stabilization: 0.1,
|
|
65052
|
+
production: 0.05
|
|
65053
|
+
};
|
|
65054
|
+
function calculateBaseline(history, currentScore) {
|
|
65055
|
+
if (history.length === 0) {
|
|
65056
|
+
return currentScore;
|
|
65057
|
+
}
|
|
65058
|
+
return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
|
|
65059
|
+
}
|
|
65060
|
+
function calculateRegression(baseline, currentScore) {
|
|
65061
|
+
if (baseline === 0) {
|
|
65062
|
+
return 0;
|
|
65063
|
+
}
|
|
65064
|
+
return (baseline - currentScore) / baseline;
|
|
65065
|
+
}
|
|
65066
|
+
function formatRegressionMessage(regressionPercent, baseline, currentScore) {
|
|
65067
|
+
return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
|
|
65068
|
+
}
|
|
65069
|
+
function checkGate(projectPath, evalName, currentScore, config2) {
|
|
65070
|
+
const thresholds = {
|
|
65071
|
+
stabilization: config2?.stabilizationThreshold ?? DEFAULT_THRESHOLDS.stabilization,
|
|
65072
|
+
production: config2?.productionThreshold ?? DEFAULT_THRESHOLDS.production
|
|
65073
|
+
};
|
|
65074
|
+
const phase = getPhase(projectPath, evalName);
|
|
65075
|
+
const history = getScoreHistory(projectPath, evalName);
|
|
65076
|
+
if (phase === "bootstrap") {
|
|
65077
|
+
return {
|
|
65078
|
+
passed: true,
|
|
65079
|
+
phase: "bootstrap",
|
|
65080
|
+
message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
|
|
65081
|
+
currentScore
|
|
65082
|
+
};
|
|
65083
|
+
}
|
|
65084
|
+
const baseline = calculateBaseline(history, currentScore);
|
|
65085
|
+
const regressionPercent = calculateRegression(baseline, currentScore);
|
|
65086
|
+
const regressionMsg = formatRegressionMessage(regressionPercent, baseline, currentScore);
|
|
65087
|
+
if (phase === "stabilization") {
|
|
65088
|
+
if (regressionPercent > thresholds.stabilization) {
|
|
65089
|
+
return {
|
|
65090
|
+
passed: true,
|
|
65091
|
+
phase: "stabilization",
|
|
65092
|
+
message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
|
|
65093
|
+
baseline,
|
|
65094
|
+
currentScore,
|
|
65095
|
+
regressionPercent
|
|
65096
|
+
};
|
|
65097
|
+
}
|
|
65098
|
+
if (history.length > 50) {
|
|
65099
|
+
const scores = history.map((run) => run.score);
|
|
65100
|
+
const variance5 = calculateVariance(scores);
|
|
65101
|
+
return {
|
|
65102
|
+
passed: true,
|
|
65103
|
+
phase: "stabilization",
|
|
65104
|
+
message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance5.toFixed(3)}) prevents production phase.`,
|
|
65105
|
+
baseline,
|
|
65106
|
+
currentScore,
|
|
65107
|
+
regressionPercent
|
|
65108
|
+
};
|
|
65109
|
+
}
|
|
65110
|
+
return {
|
|
65111
|
+
passed: true,
|
|
65112
|
+
phase: "stabilization",
|
|
65113
|
+
message: `Stabilization phase: ${regressionMsg} - acceptable`,
|
|
65114
|
+
baseline,
|
|
65115
|
+
currentScore,
|
|
65116
|
+
regressionPercent
|
|
65117
|
+
};
|
|
65118
|
+
}
|
|
65119
|
+
if (regressionPercent > thresholds.production) {
|
|
65120
|
+
return {
|
|
65121
|
+
passed: false,
|
|
65122
|
+
phase: "production",
|
|
65123
|
+
message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
|
|
65124
|
+
baseline,
|
|
65125
|
+
currentScore,
|
|
65126
|
+
regressionPercent
|
|
65127
|
+
};
|
|
65128
|
+
}
|
|
65129
|
+
return {
|
|
65130
|
+
passed: true,
|
|
65131
|
+
phase: "production",
|
|
65132
|
+
message: `Production phase: ${regressionMsg} - acceptable`,
|
|
65133
|
+
baseline,
|
|
65134
|
+
currentScore,
|
|
65135
|
+
regressionPercent
|
|
65136
|
+
};
|
|
65137
|
+
}
|
|
65138
|
+
|
|
65139
|
+
// src/index.ts
|
|
64461
65140
|
var SwarmPlugin = async (input) => {
|
|
64462
65141
|
const { $, directory, client } = input;
|
|
64463
65142
|
setHiveWorkingDirectory(directory);
|
|
@@ -64524,7 +65203,7 @@ var SwarmPlugin = async (input) => {
|
|
|
64524
65203
|
if (isInCoordinatorContext()) {
|
|
64525
65204
|
const ctx = getCoordinatorContext();
|
|
64526
65205
|
const violation = detectCoordinatorViolation({
|
|
64527
|
-
sessionId:
|
|
65206
|
+
sessionId: input2.sessionID || "unknown",
|
|
64528
65207
|
epicId: ctx.epicId || "unknown",
|
|
64529
65208
|
toolName,
|
|
64530
65209
|
toolArgs: output.args,
|
|
@@ -64638,6 +65317,7 @@ export {
|
|
|
64638
65317
|
researchTools,
|
|
64639
65318
|
requireTool,
|
|
64640
65319
|
repoCrawlTools,
|
|
65320
|
+
recordEvalRun,
|
|
64641
65321
|
parseFrontmatter,
|
|
64642
65322
|
migrateBeadsToHive,
|
|
64643
65323
|
mergeHistoricBeads,
|
|
@@ -64668,6 +65348,7 @@ export {
|
|
|
64668
65348
|
hive_create_epic,
|
|
64669
65349
|
hive_create,
|
|
64670
65350
|
hive_close,
|
|
65351
|
+
hive_cells,
|
|
64671
65352
|
hiveTools,
|
|
64672
65353
|
guardrailOutput,
|
|
64673
65354
|
groupByTransition,
|
|
@@ -64677,12 +65358,15 @@ export {
|
|
|
64677
65358
|
getStatusChanges,
|
|
64678
65359
|
getSkillsContextForSwarm,
|
|
64679
65360
|
getSkill,
|
|
65361
|
+
getScoreHistory,
|
|
64680
65362
|
getSchemaByName,
|
|
65363
|
+
getPhase,
|
|
64681
65364
|
getMandateStorage,
|
|
64682
65365
|
getLogger,
|
|
64683
65366
|
getInstalledVersions,
|
|
64684
65367
|
getHiveWorkingDirectory,
|
|
64685
65368
|
getHiveAdapter,
|
|
65369
|
+
getEvalHistoryPath,
|
|
64686
65370
|
getCellIdFromEvent,
|
|
64687
65371
|
getBeadsWorkingDirectory,
|
|
64688
65372
|
getBeadsAdapter,
|
|
@@ -64700,6 +65384,7 @@ export {
|
|
|
64700
65384
|
evaluatePromotion,
|
|
64701
65385
|
evaluateBatchPromotions,
|
|
64702
65386
|
ensureHiveDirectory,
|
|
65387
|
+
ensureEvalHistoryDir,
|
|
64703
65388
|
discoverSkills,
|
|
64704
65389
|
discoverDocTools,
|
|
64705
65390
|
src_default as default,
|
|
@@ -64715,8 +65400,10 @@ export {
|
|
|
64715
65400
|
createAgentMailError,
|
|
64716
65401
|
clearSessionState,
|
|
64717
65402
|
checkTool,
|
|
65403
|
+
checkGate,
|
|
64718
65404
|
checkBeadsMigrationNeeded,
|
|
64719
65405
|
checkAllTools,
|
|
65406
|
+
calculateVariance,
|
|
64720
65407
|
beads_update,
|
|
64721
65408
|
beads_sync,
|
|
64722
65409
|
beads_start,
|
|
@@ -64738,6 +65425,7 @@ export {
|
|
|
64738
65425
|
VoteTypeSchema,
|
|
64739
65426
|
VoteSchema,
|
|
64740
65427
|
ValidationResultSchema,
|
|
65428
|
+
VARIANCE_THRESHOLD,
|
|
64741
65429
|
UpdateSwarmContextArgsSchema,
|
|
64742
65430
|
TaskDecompositionSchema,
|
|
64743
65431
|
SwarmStrategySchema,
|
|
@@ -64757,6 +65445,7 @@ export {
|
|
|
64757
65445
|
SWARM_COMPACTION_CONTEXT,
|
|
64758
65446
|
SUBTASK_PROMPT_V2,
|
|
64759
65447
|
STRATEGIES,
|
|
65448
|
+
STABILIZATION_THRESHOLD,
|
|
64760
65449
|
RepoCrawlError,
|
|
64761
65450
|
QuerySwarmContextsArgsSchema,
|
|
64762
65451
|
QueryMandatesArgsSchema,
|
|
@@ -64779,10 +65468,12 @@ export {
|
|
|
64779
65468
|
DecompositionError,
|
|
64780
65469
|
DecomposedSubtaskSchema,
|
|
64781
65470
|
DecomposeArgsSchema,
|
|
65471
|
+
DEFAULT_THRESHOLDS,
|
|
64782
65472
|
DEFAULT_STORAGE_CONFIG,
|
|
64783
65473
|
DEFAULT_MANDATE_STORAGE_CONFIG,
|
|
64784
65474
|
DEFAULT_MANDATE_DECAY_CONFIG,
|
|
64785
65475
|
DEFAULT_GUARDRAIL_CONFIG,
|
|
65476
|
+
DEFAULT_EVAL_HISTORY_PATH,
|
|
64786
65477
|
DEFAULT_CRITERIA,
|
|
64787
65478
|
CriterionEvaluationSchema,
|
|
64788
65479
|
CreateSwarmContextArgsSchema,
|
|
@@ -64850,6 +65541,7 @@ export {
|
|
|
64850
65541
|
BeadAssignedEventSchema,
|
|
64851
65542
|
BaseCellEventSchema,
|
|
64852
65543
|
BaseBeadEventSchema,
|
|
65544
|
+
BOOTSTRAP_THRESHOLD,
|
|
64853
65545
|
AgentProgressSchema,
|
|
64854
65546
|
AgentMailNotInitializedError,
|
|
64855
65547
|
AgentMailError
|