opencode-swarm-plugin 0.40.0 → 0.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
- package/.hive/analysis/session-data-quality-audit.md +320 -0
- package/.hive/eval-results.json +481 -24
- package/.hive/issues.jsonl +67 -16
- package/.hive/memories.jsonl +159 -1
- package/.opencode/eval-history.jsonl +315 -0
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +165 -0
- package/README.md +2 -0
- package/SCORER-ANALYSIS.md +598 -0
- package/bin/eval-gate.test.ts +158 -0
- package/bin/eval-gate.ts +74 -0
- package/bin/swarm.serve.test.ts +46 -0
- package/bin/swarm.test.ts +661 -732
- package/bin/swarm.ts +335 -0
- package/dist/compaction-hook.d.ts +7 -5
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +1 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -1
- package/dist/eval-runner.d.ts +134 -0
- package/dist/eval-runner.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +29 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +99741 -58858
- package/dist/memory-tools.d.ts +70 -2
- package/dist/memory-tools.d.ts.map +1 -1
- package/dist/memory.d.ts +37 -0
- package/dist/memory.d.ts.map +1 -1
- package/dist/observability-tools.d.ts +64 -0
- package/dist/observability-tools.d.ts.map +1 -1
- package/dist/plugin.js +99356 -58318
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +32 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
- package/evals/ARCHITECTURE.md +1189 -0
- package/evals/example.eval.ts +3 -4
- package/evals/fixtures/compaction-prompt-cases.ts +6 -0
- package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
- package/evals/scorers/coordinator-discipline.ts +0 -323
- package/evals/swarm-decomposition.eval.ts +4 -2
- package/package.json +4 -3
- package/src/compaction-prompt-scorers.test.ts +185 -9
- package/src/compaction-prompt-scoring.ts +7 -5
- package/src/eval-runner.test.ts +128 -1
- package/src/eval-runner.ts +46 -0
- package/src/hive.ts +43 -42
- package/src/memory-tools.test.ts +84 -0
- package/src/memory-tools.ts +68 -3
- package/src/memory.test.ts +2 -112
- package/src/memory.ts +88 -49
- package/src/observability-tools.test.ts +13 -0
- package/src/observability-tools.ts +277 -0
- package/src/swarm-orchestrate.test.ts +162 -0
- package/src/swarm-orchestrate.ts +7 -5
- package/src/swarm-prompts.test.ts +168 -4
- package/src/swarm-prompts.ts +228 -7
- package/.env +0 -2
- package/.turbo/turbo-test.log +0 -481
- package/.turbo/turbo-typecheck.log +0 -1
|
@@ -582,6 +582,283 @@ const swarm_insights = tool({
|
|
|
582
582
|
},
|
|
583
583
|
});
|
|
584
584
|
|
|
585
|
+
// ============================================================================
|
|
586
|
+
// Stats CLI Helpers (exported for bin/swarm.ts)
|
|
587
|
+
// ============================================================================
|
|
588
|
+
|
|
589
|
+
export interface SwarmStatsData {
|
|
590
|
+
overall: {
|
|
591
|
+
totalSwarms: number;
|
|
592
|
+
successRate: number;
|
|
593
|
+
avgDurationMin: number;
|
|
594
|
+
};
|
|
595
|
+
byStrategy: Array<{
|
|
596
|
+
strategy: string;
|
|
597
|
+
total: number;
|
|
598
|
+
successRate: number;
|
|
599
|
+
successes: number;
|
|
600
|
+
}>;
|
|
601
|
+
coordinator: {
|
|
602
|
+
violationRate: number;
|
|
603
|
+
spawnEfficiency: number;
|
|
604
|
+
reviewThoroughness: number;
|
|
605
|
+
};
|
|
606
|
+
recentDays: number;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Format swarm stats as beautiful CLI output with box drawing
|
|
611
|
+
*/
|
|
612
|
+
export function formatSwarmStats(stats: SwarmStatsData): string {
|
|
613
|
+
const lines: string[] = [];
|
|
614
|
+
|
|
615
|
+
// Header with ASCII art
|
|
616
|
+
lines.push("┌─────────────────────────────────────────┐");
|
|
617
|
+
lines.push("│ 🐝 SWARM STATISTICS 🐝 │");
|
|
618
|
+
lines.push("├─────────────────────────────────────────┤");
|
|
619
|
+
|
|
620
|
+
// Overall stats
|
|
621
|
+
const totalStr = stats.overall.totalSwarms.toString().padEnd(4);
|
|
622
|
+
const rateStr = `${Math.round(stats.overall.successRate)}%`.padStart(3);
|
|
623
|
+
lines.push(`│ Total Swarms: ${totalStr} Success: ${rateStr} │`);
|
|
624
|
+
|
|
625
|
+
const durationStr = stats.overall.avgDurationMin.toFixed(1);
|
|
626
|
+
lines.push(`│ Avg Duration: ${durationStr}min${" ".repeat(23 - durationStr.length)}│`);
|
|
627
|
+
lines.push("├─────────────────────────────────────────┤");
|
|
628
|
+
|
|
629
|
+
// Strategy breakdown
|
|
630
|
+
lines.push("│ BY STRATEGY │");
|
|
631
|
+
if (stats.byStrategy.length === 0) {
|
|
632
|
+
lines.push("│ ├─ No data yet │");
|
|
633
|
+
} else {
|
|
634
|
+
for (const strategy of stats.byStrategy) {
|
|
635
|
+
const label = strategy.strategy.padEnd(15);
|
|
636
|
+
const rate = `${Math.round(strategy.successRate)}%`.padStart(4);
|
|
637
|
+
const counts = `(${strategy.successes}/${strategy.total})`.padEnd(8);
|
|
638
|
+
lines.push(`│ ├─ ${label} ${rate} ${counts} │`);
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
lines.push("├─────────────────────────────────────────┤");
|
|
642
|
+
|
|
643
|
+
// Coordinator health
|
|
644
|
+
lines.push("│ COORDINATOR HEALTH │");
|
|
645
|
+
const violationStr = `${Math.round(stats.coordinator.violationRate)}%`.padStart(3);
|
|
646
|
+
const spawnStr = `${Math.round(stats.coordinator.spawnEfficiency)}%`.padStart(4);
|
|
647
|
+
const reviewStr = `${Math.round(stats.coordinator.reviewThoroughness)}%`.padStart(3);
|
|
648
|
+
|
|
649
|
+
lines.push(`│ Violation Rate: ${violationStr}${" ".repeat(19 - violationStr.length)}│`);
|
|
650
|
+
lines.push(`│ Spawn Efficiency: ${spawnStr}${" ".repeat(17 - spawnStr.length)}│`);
|
|
651
|
+
lines.push(`│ Review Rate: ${reviewStr}${" ".repeat(19 - reviewStr.length)}│`);
|
|
652
|
+
lines.push("└─────────────────────────────────────────┘");
|
|
653
|
+
|
|
654
|
+
lines.push("");
|
|
655
|
+
lines.push(`📊 Stats for last ${stats.recentDays} days`);
|
|
656
|
+
|
|
657
|
+
return lines.join("\n");
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* Parse time period string like "7d", "24h", "30m" to timestamp
|
|
662
|
+
*/
|
|
663
|
+
export function parseTimePeriod(period: string): number {
|
|
664
|
+
const match = period.match(/^(\d+)([dhm])$/);
|
|
665
|
+
if (!match) {
|
|
666
|
+
throw new Error(
|
|
667
|
+
`Invalid time period format: ${period}. Use "7d", "24h", or "30m"`,
|
|
668
|
+
);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
const [, value, unit] = match;
|
|
672
|
+
const num = Number.parseInt(value, 10);
|
|
673
|
+
const now = Date.now();
|
|
674
|
+
|
|
675
|
+
switch (unit) {
|
|
676
|
+
case "d":
|
|
677
|
+
return now - num * 24 * 60 * 60 * 1000;
|
|
678
|
+
case "h":
|
|
679
|
+
return now - num * 60 * 60 * 1000;
|
|
680
|
+
case "m":
|
|
681
|
+
return now - num * 60 * 1000;
|
|
682
|
+
default:
|
|
683
|
+
throw new Error(`Unknown time unit: ${unit}`);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
/**
|
|
688
|
+
* Aggregate swarm outcomes by strategy
|
|
689
|
+
*/
|
|
690
|
+
export function aggregateByStrategy(
|
|
691
|
+
outcomes: Array<{ strategy: string | null; success: boolean }>,
|
|
692
|
+
): Array<{ strategy: string; total: number; successRate: number; successes: number }> {
|
|
693
|
+
const grouped: Record<string, { total: number; successes: number }> = {};
|
|
694
|
+
|
|
695
|
+
for (const outcome of outcomes) {
|
|
696
|
+
const strategy = outcome.strategy || "unknown";
|
|
697
|
+
if (!grouped[strategy]) {
|
|
698
|
+
grouped[strategy] = { total: 0, successes: 0 };
|
|
699
|
+
}
|
|
700
|
+
grouped[strategy].total++;
|
|
701
|
+
if (outcome.success) {
|
|
702
|
+
grouped[strategy].successes++;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
return Object.entries(grouped).map(([strategy, stats]) => ({
|
|
707
|
+
strategy,
|
|
708
|
+
total: stats.total,
|
|
709
|
+
successes: stats.successes,
|
|
710
|
+
successRate: (stats.successes / stats.total) * 100,
|
|
711
|
+
}));
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// ============================================================================
|
|
715
|
+
// History CLI Helpers (exported for bin/swarm.ts)
|
|
716
|
+
// ============================================================================
|
|
717
|
+
|
|
718
|
+
export interface SwarmHistoryRecord {
|
|
719
|
+
epic_id: string;
|
|
720
|
+
epic_title: string;
|
|
721
|
+
strategy: string;
|
|
722
|
+
timestamp: string;
|
|
723
|
+
overall_success: boolean;
|
|
724
|
+
task_count: number;
|
|
725
|
+
completed_count: number;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
/**
|
|
729
|
+
* Query swarm history from eval_records table
|
|
730
|
+
*/
|
|
731
|
+
export async function querySwarmHistory(
|
|
732
|
+
projectPath: string,
|
|
733
|
+
options?: {
|
|
734
|
+
limit?: number;
|
|
735
|
+
status?: "success" | "failed" | "in_progress";
|
|
736
|
+
strategy?: "file-based" | "feature-based" | "risk-based";
|
|
737
|
+
},
|
|
738
|
+
): Promise<SwarmHistoryRecord[]> {
|
|
739
|
+
const swarmMail = await getSwarmMailLibSQL(projectPath);
|
|
740
|
+
const db = await swarmMail.getDatabase();
|
|
741
|
+
|
|
742
|
+
// Build WHERE clause
|
|
743
|
+
const conditions: string[] = [];
|
|
744
|
+
const params: (string | number)[] = [];
|
|
745
|
+
|
|
746
|
+
if (options?.status) {
|
|
747
|
+
switch (options.status) {
|
|
748
|
+
case "success":
|
|
749
|
+
conditions.push("json_extract(data, '$.overall_success') = 'true'");
|
|
750
|
+
break;
|
|
751
|
+
case "failed":
|
|
752
|
+
conditions.push(
|
|
753
|
+
"json_extract(data, '$.overall_success') = 'false' AND json_extract(data, '$.completed_count') = json_extract(data, '$.task_count')",
|
|
754
|
+
);
|
|
755
|
+
break;
|
|
756
|
+
case "in_progress":
|
|
757
|
+
conditions.push(
|
|
758
|
+
"json_extract(data, '$.completed_count') < json_extract(data, '$.task_count')",
|
|
759
|
+
);
|
|
760
|
+
break;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
if (options?.strategy) {
|
|
765
|
+
conditions.push("json_extract(data, '$.strategy') = ?");
|
|
766
|
+
params.push(options.strategy);
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
const whereClause = conditions.length > 0 ? `AND ${conditions.join(" AND ")}` : "";
|
|
770
|
+
const limit = options?.limit || 10;
|
|
771
|
+
|
|
772
|
+
const query = `
|
|
773
|
+
SELECT
|
|
774
|
+
json_extract(data, '$.epic_id') as epic_id,
|
|
775
|
+
json_extract(data, '$.task') as epic_title,
|
|
776
|
+
json_extract(data, '$.strategy') as strategy,
|
|
777
|
+
timestamp,
|
|
778
|
+
json_extract(data, '$.overall_success') as overall_success,
|
|
779
|
+
CAST(json_extract(data, '$.task_count') AS INTEGER) as task_count,
|
|
780
|
+
CAST(json_extract(data, '$.completed_count') AS INTEGER) as completed_count
|
|
781
|
+
FROM events
|
|
782
|
+
WHERE type = 'eval_finalized'
|
|
783
|
+
${whereClause}
|
|
784
|
+
ORDER BY timestamp DESC
|
|
785
|
+
LIMIT ?
|
|
786
|
+
`;
|
|
787
|
+
|
|
788
|
+
params.push(limit);
|
|
789
|
+
|
|
790
|
+
const result = await db.query(query, params);
|
|
791
|
+
const rows = result.rows as unknown[];
|
|
792
|
+
|
|
793
|
+
return rows.map((row) => {
|
|
794
|
+
const r = row as Record<string, unknown>;
|
|
795
|
+
return {
|
|
796
|
+
epic_id: String(r.epic_id || ""),
|
|
797
|
+
epic_title: String(r.epic_title || "Unknown"),
|
|
798
|
+
strategy: String(r.strategy || "unknown"),
|
|
799
|
+
timestamp: String(r.timestamp || new Date().toISOString()),
|
|
800
|
+
overall_success: String(r.overall_success) === "true",
|
|
801
|
+
task_count: Number(r.task_count) || 0,
|
|
802
|
+
completed_count: Number(r.completed_count) || 0,
|
|
803
|
+
};
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Format relative time (e.g., "2h ago", "1d ago")
|
|
809
|
+
*/
|
|
810
|
+
export function formatRelativeTime(timestamp: string): string {
|
|
811
|
+
const now = Date.now();
|
|
812
|
+
const then = new Date(timestamp).getTime();
|
|
813
|
+
const diffMs = now - then;
|
|
814
|
+
|
|
815
|
+
const minutes = Math.floor(diffMs / 60000);
|
|
816
|
+
const hours = Math.floor(diffMs / 3600000);
|
|
817
|
+
const days = Math.floor(diffMs / 86400000);
|
|
818
|
+
|
|
819
|
+
if (minutes < 60) return `${minutes}m ago`;
|
|
820
|
+
if (hours < 24) return `${hours}h ago`;
|
|
821
|
+
return `${days}d ago`;
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
/**
|
|
825
|
+
* Format swarm history as beautiful CLI table
|
|
826
|
+
*/
|
|
827
|
+
export function formatSwarmHistory(records: SwarmHistoryRecord[]): string {
|
|
828
|
+
if (records.length === 0) {
|
|
829
|
+
return "No swarm history found";
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
const rows = records.map((r) => ({
|
|
833
|
+
time: formatRelativeTime(r.timestamp),
|
|
834
|
+
status: r.overall_success ? "✅" : "❌",
|
|
835
|
+
title:
|
|
836
|
+
r.epic_title.length > 30 ? `${r.epic_title.slice(0, 27)}...` : r.epic_title,
|
|
837
|
+
strategy: r.strategy,
|
|
838
|
+
tasks: `${r.completed_count}/${r.task_count} tasks`,
|
|
839
|
+
}));
|
|
840
|
+
|
|
841
|
+
// Box drawing characters
|
|
842
|
+
const lines: string[] = [];
|
|
843
|
+
lines.push("┌─────────────────────────────────────────────────────────────┐");
|
|
844
|
+
lines.push("│ SWARM HISTORY │");
|
|
845
|
+
lines.push("├─────────────────────────────────────────────────────────────┤");
|
|
846
|
+
|
|
847
|
+
for (const row of rows) {
|
|
848
|
+
const statusCol = `${row.time.padEnd(8)} ${row.status}`;
|
|
849
|
+
const titleCol = row.title.padEnd(32);
|
|
850
|
+
const strategyCol = row.strategy.padEnd(13);
|
|
851
|
+
const tasksCol = row.tasks;
|
|
852
|
+
|
|
853
|
+
const line = `│ ${statusCol} ${titleCol} ${strategyCol} ${tasksCol.padEnd(3)} │`;
|
|
854
|
+
lines.push(line);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
lines.push("└─────────────────────────────────────────────────────────────┘");
|
|
858
|
+
|
|
859
|
+
return lines.join("\n");
|
|
860
|
+
}
|
|
861
|
+
|
|
585
862
|
// ============================================================================
|
|
586
863
|
// Exports
|
|
587
864
|
// ============================================================================
|
|
@@ -272,6 +272,168 @@ describe("captureSubtaskOutcome integration", () => {
|
|
|
272
272
|
});
|
|
273
273
|
});
|
|
274
274
|
|
|
275
|
+
// ============================================================================
|
|
276
|
+
// Event Emission Tests (subtask_outcome events to libSQL)
|
|
277
|
+
// ============================================================================
|
|
278
|
+
|
|
279
|
+
describe("subtask_outcome event emission", () => {
|
|
280
|
+
const mockContext = {
|
|
281
|
+
sessionID: `test-event-emission-${Date.now()}`,
|
|
282
|
+
messageID: `test-message-${Date.now()}`,
|
|
283
|
+
agent: "test-agent",
|
|
284
|
+
abort: new AbortController().signal,
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
let testProjectPath: string;
|
|
288
|
+
|
|
289
|
+
beforeEach(async () => {
|
|
290
|
+
testProjectPath = `/tmp/test-event-emission-${Date.now()}`;
|
|
291
|
+
fs.mkdirSync(testProjectPath, { recursive: true });
|
|
292
|
+
|
|
293
|
+
// Create .hive directory and issues.jsonl
|
|
294
|
+
const hiveDir = `${testProjectPath}/.hive`;
|
|
295
|
+
fs.mkdirSync(hiveDir, { recursive: true });
|
|
296
|
+
fs.writeFileSync(`${hiveDir}/issues.jsonl`, "", "utf-8");
|
|
297
|
+
|
|
298
|
+
// Set hive working directory to testProjectPath
|
|
299
|
+
const { setHiveWorkingDirectory } = await import("./hive");
|
|
300
|
+
setHiveWorkingDirectory(testProjectPath);
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
afterEach(() => {
|
|
304
|
+
if (fs.existsSync(testProjectPath)) {
|
|
305
|
+
fs.rmSync(testProjectPath, { recursive: true, force: true });
|
|
306
|
+
}
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
test("swarm_complete emits subtask_outcome event to libSQL database", async () => {
|
|
310
|
+
// Import dependencies
|
|
311
|
+
const { hive_create_epic } = await import("./hive");
|
|
312
|
+
const { readEvents, getSwarmMailLibSQL } = await import("swarm-mail");
|
|
313
|
+
|
|
314
|
+
// Create an epic with a subtask
|
|
315
|
+
const epicResult = await hive_create_epic.execute({
|
|
316
|
+
epic_title: "Add feature X",
|
|
317
|
+
subtasks: [
|
|
318
|
+
{
|
|
319
|
+
title: "Implement X service",
|
|
320
|
+
priority: 2,
|
|
321
|
+
files: ["src/x.ts"],
|
|
322
|
+
},
|
|
323
|
+
],
|
|
324
|
+
}, mockContext);
|
|
325
|
+
|
|
326
|
+
const epicData = JSON.parse(epicResult);
|
|
327
|
+
const epicId = epicData.epic.id;
|
|
328
|
+
const beadId = epicData.subtasks[0].id;
|
|
329
|
+
|
|
330
|
+
const startTime = Date.now() - 60000; // Started 1 minute ago
|
|
331
|
+
|
|
332
|
+
// Call swarm_complete
|
|
333
|
+
const result = await swarm_complete.execute(
|
|
334
|
+
{
|
|
335
|
+
project_key: testProjectPath,
|
|
336
|
+
agent_name: "TestAgent",
|
|
337
|
+
bead_id: beadId,
|
|
338
|
+
summary: "Implemented X service",
|
|
339
|
+
files_touched: ["src/x.ts"],
|
|
340
|
+
skip_verification: true,
|
|
341
|
+
skip_review: true,
|
|
342
|
+
planned_files: ["src/x.ts"],
|
|
343
|
+
start_time: startTime,
|
|
344
|
+
error_count: 0,
|
|
345
|
+
retry_count: 0,
|
|
346
|
+
},
|
|
347
|
+
mockContext,
|
|
348
|
+
);
|
|
349
|
+
|
|
350
|
+
const parsed = JSON.parse(result);
|
|
351
|
+
expect(parsed.success).toBe(true);
|
|
352
|
+
|
|
353
|
+
// Query events from libSQL database
|
|
354
|
+
const events = await readEvents({
|
|
355
|
+
projectKey: testProjectPath,
|
|
356
|
+
types: ["subtask_outcome"],
|
|
357
|
+
}, testProjectPath);
|
|
358
|
+
|
|
359
|
+
// Should have exactly 1 subtask_outcome event
|
|
360
|
+
expect(events.length).toBe(1);
|
|
361
|
+
|
|
362
|
+
const event = events[0] as any;
|
|
363
|
+
expect(event.type).toBe("subtask_outcome");
|
|
364
|
+
expect(event.epic_id).toBe(epicId);
|
|
365
|
+
expect(event.bead_id).toBe(beadId);
|
|
366
|
+
expect(event.success).toBe(true);
|
|
367
|
+
expect(event.duration_ms).toBeGreaterThan(0);
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
test("subtask_outcome event updates eval_records.outcomes in libSQL", async () => {
|
|
371
|
+
// Import dependencies
|
|
372
|
+
const { hive_create_epic } = await import("./hive");
|
|
373
|
+
const { getSwarmMailLibSQL } = await import("swarm-mail");
|
|
374
|
+
|
|
375
|
+
// Create an epic with a subtask
|
|
376
|
+
const epicResult = await hive_create_epic.execute({
|
|
377
|
+
epic_title: "Add feature Y",
|
|
378
|
+
subtasks: [
|
|
379
|
+
{
|
|
380
|
+
title: "Implement Y service",
|
|
381
|
+
priority: 2,
|
|
382
|
+
files: ["src/y.ts"],
|
|
383
|
+
},
|
|
384
|
+
],
|
|
385
|
+
}, mockContext);
|
|
386
|
+
|
|
387
|
+
const epicData = JSON.parse(epicResult);
|
|
388
|
+
const epicId = epicData.epic.id;
|
|
389
|
+
const beadId = epicData.subtasks[0].id;
|
|
390
|
+
|
|
391
|
+
const startTime = Date.now() - 90000; // Started 1.5 minutes ago
|
|
392
|
+
|
|
393
|
+
// Call swarm_complete
|
|
394
|
+
await swarm_complete.execute(
|
|
395
|
+
{
|
|
396
|
+
project_key: testProjectPath,
|
|
397
|
+
agent_name: "TestAgent",
|
|
398
|
+
bead_id: beadId,
|
|
399
|
+
summary: "Implemented Y service",
|
|
400
|
+
files_touched: ["src/y.ts", "src/y.test.ts"],
|
|
401
|
+
skip_verification: true,
|
|
402
|
+
skip_review: true,
|
|
403
|
+
planned_files: ["src/y.ts"],
|
|
404
|
+
start_time: startTime,
|
|
405
|
+
error_count: 0,
|
|
406
|
+
retry_count: 0,
|
|
407
|
+
},
|
|
408
|
+
mockContext,
|
|
409
|
+
);
|
|
410
|
+
|
|
411
|
+
// Query eval_records from libSQL
|
|
412
|
+
const swarmMail = await getSwarmMailLibSQL(testProjectPath);
|
|
413
|
+
const db = await swarmMail.getDatabase();
|
|
414
|
+
|
|
415
|
+
const result = await db.query<{ outcomes: string | null }>(
|
|
416
|
+
`SELECT outcomes FROM eval_records WHERE id = ?`,
|
|
417
|
+
[epicId]
|
|
418
|
+
);
|
|
419
|
+
|
|
420
|
+
expect(result.rows.length).toBe(1);
|
|
421
|
+
|
|
422
|
+
const outcomes = result.rows[0].outcomes;
|
|
423
|
+
expect(outcomes).not.toBeNull();
|
|
424
|
+
|
|
425
|
+
const parsed = JSON.parse(outcomes || "[]");
|
|
426
|
+
expect(parsed.length).toBe(1);
|
|
427
|
+
|
|
428
|
+
const outcome = parsed[0];
|
|
429
|
+
expect(outcome.bead_id).toBe(beadId);
|
|
430
|
+
expect(outcome.success).toBe(true);
|
|
431
|
+
expect(outcome.duration_ms).toBeGreaterThan(0);
|
|
432
|
+
expect(outcome.planned_files).toEqual(["src/y.ts"]);
|
|
433
|
+
expect(outcome.actual_files).toEqual(["src/y.ts", "src/y.test.ts"]);
|
|
434
|
+
});
|
|
435
|
+
});
|
|
436
|
+
|
|
275
437
|
// ============================================================================
|
|
276
438
|
// Eval Capture Integration Tests (swarm_record_outcome)
|
|
277
439
|
// ============================================================================
|
package/src/swarm-orchestrate.ts
CHANGED
|
@@ -1510,15 +1510,17 @@ This will be recorded as a negative learning signal.`;
|
|
|
1510
1510
|
|
|
1511
1511
|
// Emit SubtaskOutcomeEvent for learning system
|
|
1512
1512
|
try {
|
|
1513
|
-
const epicId = args.bead_id.includes(".")
|
|
1514
|
-
? args.bead_id.split(".")[0]
|
|
1515
|
-
: args.bead_id;
|
|
1516
|
-
|
|
1517
1513
|
const durationMs = args.start_time ? Date.now() - args.start_time : 0;
|
|
1514
|
+
|
|
1515
|
+
// Determine epic ID: use parent_id if available, otherwise fall back to extracting from bead_id
|
|
1516
|
+
// (New hive cell IDs don't follow epicId.subtaskNum pattern - they're independent IDs)
|
|
1517
|
+
const eventEpicId = cell.parent_id || (args.bead_id.includes(".")
|
|
1518
|
+
? args.bead_id.split(".")[0]
|
|
1519
|
+
: args.bead_id);
|
|
1518
1520
|
|
|
1519
1521
|
const event = createEvent("subtask_outcome", {
|
|
1520
1522
|
project_key: args.project_key,
|
|
1521
|
-
epic_id:
|
|
1523
|
+
epic_id: eventEpicId,
|
|
1522
1524
|
bead_id: args.bead_id,
|
|
1523
1525
|
planned_files: args.planned_files || [],
|
|
1524
1526
|
actual_files: args.files_touched || [],
|
|
@@ -146,8 +146,8 @@ describe("SUBTASK_PROMPT_V2", () => {
|
|
|
146
146
|
});
|
|
147
147
|
|
|
148
148
|
describe("formatSubtaskPromptV2", () => {
|
|
149
|
-
test("substitutes all placeholders correctly", () => {
|
|
150
|
-
const result = formatSubtaskPromptV2({
|
|
149
|
+
test("substitutes all placeholders correctly", async () => {
|
|
150
|
+
const result = await formatSubtaskPromptV2({
|
|
151
151
|
bead_id: "test-project-abc123-bead456",
|
|
152
152
|
epic_id: "test-project-abc123-epic789",
|
|
153
153
|
subtask_title: "Test Subtask",
|
|
@@ -165,8 +165,8 @@ describe("formatSubtaskPromptV2", () => {
|
|
|
165
165
|
expect(result).toContain("/path/to/project");
|
|
166
166
|
});
|
|
167
167
|
|
|
168
|
-
test("includes memory query step with MANDATORY emphasis", () => {
|
|
169
|
-
const result = formatSubtaskPromptV2({
|
|
168
|
+
test("includes memory query step with MANDATORY emphasis", async () => {
|
|
169
|
+
const result = await formatSubtaskPromptV2({
|
|
170
170
|
bead_id: "test-project-abc123-def456",
|
|
171
171
|
epic_id: "test-project-abc123-ghi789",
|
|
172
172
|
subtask_title: "Test",
|
|
@@ -939,3 +939,167 @@ describe("formatCoordinatorPrompt", () => {
|
|
|
939
939
|
expect(result).toContain("Phase 1.5:");
|
|
940
940
|
});
|
|
941
941
|
});
|
|
942
|
+
|
|
943
|
+
describe("getRecentEvalFailures", () => {
|
|
944
|
+
test("returns empty string when no failures exist", async () => {
|
|
945
|
+
const { getRecentEvalFailures } = await import("./swarm-prompts");
|
|
946
|
+
const result = await getRecentEvalFailures();
|
|
947
|
+
|
|
948
|
+
// Should not throw and returns string
|
|
949
|
+
expect(typeof result).toBe("string");
|
|
950
|
+
// When no failures, returns empty or a message - either is acceptable
|
|
951
|
+
});
|
|
952
|
+
|
|
953
|
+
test("returns formatted string when failures exist", async () => {
|
|
954
|
+
const { getRecentEvalFailures } = await import("./swarm-prompts");
|
|
955
|
+
|
|
956
|
+
// This test depends on actual memory state
|
|
957
|
+
// Just verify it doesn't throw and returns a string
|
|
958
|
+
const result = await getRecentEvalFailures();
|
|
959
|
+
expect(typeof result).toBe("string");
|
|
960
|
+
});
|
|
961
|
+
|
|
962
|
+
test("includes warning emoji in header when failures present", async () => {
|
|
963
|
+
const { getRecentEvalFailures } = await import("./swarm-prompts");
|
|
964
|
+
|
|
965
|
+
// If there are failures in the system, the header should have ⚠️
|
|
966
|
+
const result = await getRecentEvalFailures();
|
|
967
|
+
|
|
968
|
+
// Either empty (no failures) or contains the warning section
|
|
969
|
+
if (result.length > 0) {
|
|
970
|
+
expect(result).toMatch(/⚠️|Recent Eval Failures/);
|
|
971
|
+
}
|
|
972
|
+
});
|
|
973
|
+
|
|
974
|
+
test("handles memory adapter errors gracefully", async () => {
|
|
975
|
+
const { getRecentEvalFailures } = await import("./swarm-prompts");
|
|
976
|
+
|
|
977
|
+
// Should not throw even if memory is unavailable
|
|
978
|
+
await expect(getRecentEvalFailures()).resolves.toBeDefined();
|
|
979
|
+
});
|
|
980
|
+
});
|
|
981
|
+
|
|
982
|
+
describe("getPromptInsights", () => {
|
|
983
|
+
describe("for coordinators (planning prompts)", () => {
|
|
984
|
+
test("returns formatted insights string", async () => {
|
|
985
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
986
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
987
|
+
|
|
988
|
+
expect(typeof result).toBe("string");
|
|
989
|
+
});
|
|
990
|
+
|
|
991
|
+
test.skip("includes strategy success rates when data exists", async () => {
|
|
992
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
993
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
994
|
+
|
|
995
|
+
// If there's data, should mention strategies
|
|
996
|
+
if (result.length > 0) {
|
|
997
|
+
expect(result).toMatch(/strategy|file-based|feature-based|risk-based/i);
|
|
998
|
+
}
|
|
999
|
+
});
|
|
1000
|
+
|
|
1001
|
+
test.skip("includes recent failure patterns", async () => {
|
|
1002
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1003
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
1004
|
+
|
|
1005
|
+
// Should query for failures and anti-patterns
|
|
1006
|
+
if (result.length > 0) {
|
|
1007
|
+
expect(result).toMatch(/avoid|failure|anti-pattern|success rate/i);
|
|
1008
|
+
}
|
|
1009
|
+
});
|
|
1010
|
+
|
|
1011
|
+
test.skip("returns empty string when no data available", async () => {
|
|
1012
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1013
|
+
|
|
1014
|
+
// With project_key filter that doesn't exist, should return empty
|
|
1015
|
+
const result = await getPromptInsights({
|
|
1016
|
+
role: "coordinator",
|
|
1017
|
+
project_key: "non-existent-project-xyz123"
|
|
1018
|
+
});
|
|
1019
|
+
|
|
1020
|
+
expect(typeof result).toBe("string");
|
|
1021
|
+
});
|
|
1022
|
+
});
|
|
1023
|
+
|
|
1024
|
+
describe("for workers (subtask prompts)", () => {
|
|
1025
|
+
test.skip("returns formatted insights string", async () => {
|
|
1026
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1027
|
+
const result = await getPromptInsights({
|
|
1028
|
+
role: "worker",
|
|
1029
|
+
files: ["src/test.ts"]
|
|
1030
|
+
});
|
|
1031
|
+
|
|
1032
|
+
expect(typeof result).toBe("string");
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
test.skip("queries semantic-memory for file-specific learnings", async () => {
|
|
1036
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1037
|
+
const result = await getPromptInsights({
|
|
1038
|
+
role: "worker",
|
|
1039
|
+
files: ["src/auth.ts", "src/api/login.ts"]
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
// Should query semantic memory with file/domain keywords
|
|
1043
|
+
// Result format doesn't matter, just verify it doesn't throw
|
|
1044
|
+
expect(typeof result).toBe("string");
|
|
1045
|
+
});
|
|
1046
|
+
|
|
1047
|
+
test.skip("includes common pitfalls for domain area", async () => {
|
|
1048
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1049
|
+
const result = await getPromptInsights({
|
|
1050
|
+
role: "worker",
|
|
1051
|
+
domain: "authentication"
|
|
1052
|
+
});
|
|
1053
|
+
|
|
1054
|
+
if (result.length > 0) {
|
|
1055
|
+
expect(result).toMatch(/pitfall|gotcha|warning|common|issue/i);
|
|
1056
|
+
}
|
|
1057
|
+
});
|
|
1058
|
+
});
|
|
1059
|
+
|
|
1060
|
+
describe("handles errors gracefully", () => {
|
|
1061
|
+
test.skip("returns empty string when database unavailable", async () => {
|
|
1062
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1063
|
+
|
|
1064
|
+
// Should not throw even if swarm-mail DB is unavailable
|
|
1065
|
+
await expect(getPromptInsights({ role: "coordinator" })).resolves.toBeDefined();
|
|
1066
|
+
});
|
|
1067
|
+
|
|
1068
|
+
test.skip("returns empty string when semantic-memory unavailable", async () => {
|
|
1069
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1070
|
+
|
|
1071
|
+
// Should not throw even if memory is unavailable
|
|
1072
|
+
await expect(getPromptInsights({ role: "worker", files: [] })).resolves.toBeDefined();
|
|
1073
|
+
});
|
|
1074
|
+
});
|
|
1075
|
+
|
|
1076
|
+
describe("formatting", () => {
|
|
1077
|
+
test.skip("formats strategy stats as readable table", async () => {
|
|
1078
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1079
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
1080
|
+
|
|
1081
|
+
if (result.includes("Strategy")) {
|
|
1082
|
+
// Should use markdown table or similar readable format
|
|
1083
|
+
expect(result).toMatch(/\|.*\||\n-+\n|Strategy.*Success/i);
|
|
1084
|
+
}
|
|
1085
|
+
});
|
|
1086
|
+
|
|
1087
|
+
test.skip("limits output to prevent context bloat", async () => {
|
|
1088
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1089
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
1090
|
+
|
|
1091
|
+
// Should cap at reasonable length (say, 1500 chars max)
|
|
1092
|
+
expect(result.length).toBeLessThan(2000);
|
|
1093
|
+
});
|
|
1094
|
+
|
|
1095
|
+
test.skip("includes visual emphasis (emoji or markdown)", async () => {
|
|
1096
|
+
const { getPromptInsights } = await import("./swarm-prompts");
|
|
1097
|
+
const result = await getPromptInsights({ role: "coordinator" });
|
|
1098
|
+
|
|
1099
|
+
if (result.length > 0) {
|
|
1100
|
+
// Should have at least some formatting
|
|
1101
|
+
expect(result).toMatch(/##|📊|✅|❌|⚠️|\*\*/);
|
|
1102
|
+
}
|
|
1103
|
+
});
|
|
1104
|
+
});
|
|
1105
|
+
});
|