opencode-swarm-plugin 0.40.0 → 0.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
- package/.hive/analysis/session-data-quality-audit.md +320 -0
- package/.hive/eval-results.json +481 -24
- package/.hive/issues.jsonl +67 -16
- package/.hive/memories.jsonl +159 -1
- package/.opencode/eval-history.jsonl +315 -0
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +165 -0
- package/README.md +2 -0
- package/SCORER-ANALYSIS.md +598 -0
- package/bin/eval-gate.test.ts +158 -0
- package/bin/eval-gate.ts +74 -0
- package/bin/swarm.serve.test.ts +46 -0
- package/bin/swarm.test.ts +661 -732
- package/bin/swarm.ts +335 -0
- package/dist/compaction-hook.d.ts +7 -5
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +1 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -1
- package/dist/eval-runner.d.ts +134 -0
- package/dist/eval-runner.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +29 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +99741 -58858
- package/dist/memory-tools.d.ts +70 -2
- package/dist/memory-tools.d.ts.map +1 -1
- package/dist/memory.d.ts +37 -0
- package/dist/memory.d.ts.map +1 -1
- package/dist/observability-tools.d.ts +64 -0
- package/dist/observability-tools.d.ts.map +1 -1
- package/dist/plugin.js +99356 -58318
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +32 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
- package/evals/ARCHITECTURE.md +1189 -0
- package/evals/example.eval.ts +3 -4
- package/evals/fixtures/compaction-prompt-cases.ts +6 -0
- package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
- package/evals/scorers/coordinator-discipline.ts +0 -323
- package/evals/swarm-decomposition.eval.ts +4 -2
- package/package.json +4 -3
- package/src/compaction-prompt-scorers.test.ts +185 -9
- package/src/compaction-prompt-scoring.ts +7 -5
- package/src/eval-runner.test.ts +128 -1
- package/src/eval-runner.ts +46 -0
- package/src/hive.ts +43 -42
- package/src/memory-tools.test.ts +84 -0
- package/src/memory-tools.ts +68 -3
- package/src/memory.test.ts +2 -112
- package/src/memory.ts +88 -49
- package/src/observability-tools.test.ts +13 -0
- package/src/observability-tools.ts +277 -0
- package/src/swarm-orchestrate.test.ts +162 -0
- package/src/swarm-orchestrate.ts +7 -5
- package/src/swarm-prompts.test.ts +168 -4
- package/src/swarm-prompts.ts +228 -7
- package/.env +0 -2
- package/.turbo/turbo-test.log +0 -481
- package/.turbo/turbo-typecheck.log +0 -1
package/bin/swarm.test.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
/**
|
|
3
|
-
* Tests for swarm CLI
|
|
3
|
+
* Tests for swarm CLI helpers
|
|
4
4
|
*
|
|
5
|
-
* These tests verify the
|
|
6
|
-
* - writeFileWithStatus
|
|
7
|
-
* -
|
|
8
|
-
* - rmWithStatus: logs file removal
|
|
5
|
+
* These tests verify the CLI helpers:
|
|
6
|
+
* - File operation helpers (writeFileWithStatus, mkdirWithStatus, rmWithStatus)
|
|
7
|
+
* - Swarm history helpers (formatSwarmHistory, parseHistoryArgs, filterHistoryByStatus)
|
|
9
8
|
*/
|
|
10
9
|
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
|
|
11
10
|
import { mkdirSync, rmSync, writeFileSync, existsSync, readFileSync, readdirSync } from "fs";
|
|
@@ -501,17 +500,17 @@ describe("swarm log sessions", () => {
|
|
|
501
500
|
{
|
|
502
501
|
session_id: "s1",
|
|
503
502
|
epic_id: "e1",
|
|
504
|
-
timestamp: "2025-01-01T00:
|
|
503
|
+
timestamp: "2025-01-01T00:00:01Z",
|
|
505
504
|
event_type: "VIOLATION",
|
|
506
|
-
violation_type: "
|
|
505
|
+
violation_type: "direct_edit",
|
|
507
506
|
payload: {},
|
|
508
507
|
},
|
|
509
508
|
{
|
|
510
509
|
session_id: "s1",
|
|
511
510
|
epic_id: "e1",
|
|
512
|
-
timestamp: "2025-01-01T00:
|
|
511
|
+
timestamp: "2025-01-01T00:00:02Z",
|
|
513
512
|
event_type: "DECISION",
|
|
514
|
-
decision_type: "
|
|
513
|
+
decision_type: "worker_spawned",
|
|
515
514
|
payload: {},
|
|
516
515
|
},
|
|
517
516
|
];
|
|
@@ -535,9 +534,9 @@ describe("swarm log sessions", () => {
|
|
|
535
534
|
{
|
|
536
535
|
session_id: "s1",
|
|
537
536
|
epic_id: "e1",
|
|
538
|
-
timestamp: "2025-01-01T00:
|
|
537
|
+
timestamp: "2025-01-01T00:00:01Z",
|
|
539
538
|
event_type: "VIOLATION",
|
|
540
|
-
violation_type: "
|
|
539
|
+
violation_type: "direct_edit",
|
|
541
540
|
payload: {},
|
|
542
541
|
},
|
|
543
542
|
];
|
|
@@ -555,7 +554,7 @@ describe("swarm log sessions", () => {
|
|
|
555
554
|
{
|
|
556
555
|
session_id: "s1",
|
|
557
556
|
epic_id: "e1",
|
|
558
|
-
timestamp: new Date(now -
|
|
557
|
+
timestamp: new Date(now - 5000).toISOString(), // 5s ago
|
|
559
558
|
event_type: "DECISION",
|
|
560
559
|
decision_type: "worker_spawned",
|
|
561
560
|
payload: {},
|
|
@@ -563,17 +562,17 @@ describe("swarm log sessions", () => {
|
|
|
563
562
|
{
|
|
564
563
|
session_id: "s1",
|
|
565
564
|
epic_id: "e1",
|
|
566
|
-
timestamp: new Date(now -
|
|
567
|
-
event_type: "
|
|
568
|
-
|
|
565
|
+
timestamp: new Date(now - 10000).toISOString(), // 10s ago
|
|
566
|
+
event_type: "DECISION",
|
|
567
|
+
decision_type: "worker_spawned",
|
|
569
568
|
payload: {},
|
|
570
569
|
},
|
|
571
570
|
{
|
|
572
571
|
session_id: "s1",
|
|
573
572
|
epic_id: "e1",
|
|
574
|
-
timestamp: new Date(now -
|
|
575
|
-
event_type: "
|
|
576
|
-
|
|
573
|
+
timestamp: new Date(now - 60000).toISOString(), // 1min ago
|
|
574
|
+
event_type: "DECISION",
|
|
575
|
+
decision_type: "worker_spawned",
|
|
577
576
|
payload: {},
|
|
578
577
|
},
|
|
579
578
|
];
|
|
@@ -682,841 +681,771 @@ describe("Cells command", () => {
|
|
|
682
681
|
},
|
|
683
682
|
];
|
|
684
683
|
|
|
685
|
-
const
|
|
686
|
-
|
|
687
|
-
// Should contain headers
|
|
688
|
-
expect(table).toContain("ID");
|
|
689
|
-
expect(table).toContain("TITLE");
|
|
690
|
-
expect(table).toContain("STATUS");
|
|
691
|
-
expect(table).toContain("PRIORITY");
|
|
692
|
-
|
|
693
|
-
// Should contain cell data
|
|
694
|
-
expect(table).toContain("test-abc123-xyz");
|
|
695
|
-
expect(table).toContain("Fix bug");
|
|
696
|
-
expect(table).toContain("open");
|
|
697
|
-
expect(table).toContain("0");
|
|
698
|
-
|
|
699
|
-
expect(table).toContain("test-def456-abc");
|
|
700
|
-
expect(table).toContain("Add feature");
|
|
701
|
-
expect(table).toContain("in_progress");
|
|
702
|
-
expect(table).toContain("2");
|
|
703
|
-
});
|
|
684
|
+
const result = formatCellsTable(cells);
|
|
704
685
|
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
expect(
|
|
686
|
+
expect(result).toContain("ID");
|
|
687
|
+
expect(result).toContain("TITLE");
|
|
688
|
+
expect(result).toContain("STATUS");
|
|
689
|
+
expect(result).toContain("PRIORITY");
|
|
690
|
+
expect(result).toContain("Fix bug");
|
|
691
|
+
expect(result).toContain("Add feature");
|
|
692
|
+
expect(result).toContain("open");
|
|
693
|
+
expect(result).toContain("in_progress");
|
|
708
694
|
});
|
|
709
|
-
});
|
|
710
|
-
});
|
|
711
|
-
|
|
712
|
-
describe("Log command helpers", () => {
|
|
713
|
-
let testDir: string;
|
|
714
|
-
|
|
715
|
-
beforeEach(() => {
|
|
716
|
-
testDir = join(tmpdir(), `swarm-log-test-${Date.now()}`);
|
|
717
|
-
mkdirSync(testDir, { recursive: true });
|
|
718
|
-
});
|
|
719
|
-
|
|
720
|
-
afterEach(() => {
|
|
721
|
-
if (existsSync(testDir)) {
|
|
722
|
-
rmSync(testDir, { recursive: true, force: true });
|
|
723
|
-
}
|
|
724
|
-
});
|
|
725
|
-
|
|
726
|
-
describe("parseLogLine", () => {
|
|
727
|
-
function parseLogLine(line: string): { level: number; time: string; module: string; msg: string } | null {
|
|
728
|
-
try {
|
|
729
|
-
const parsed = JSON.parse(line);
|
|
730
|
-
if (typeof parsed.level === "number" && parsed.time && parsed.msg) {
|
|
731
|
-
return {
|
|
732
|
-
level: parsed.level,
|
|
733
|
-
time: parsed.time,
|
|
734
|
-
module: parsed.module || "unknown",
|
|
735
|
-
msg: parsed.msg,
|
|
736
|
-
};
|
|
737
|
-
}
|
|
738
|
-
} catch {
|
|
739
|
-
// Invalid JSON
|
|
740
|
-
}
|
|
741
|
-
return null;
|
|
742
|
-
}
|
|
743
695
|
|
|
744
|
-
test("
|
|
745
|
-
const
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
696
|
+
test("truncates long titles with ellipsis", () => {
|
|
697
|
+
const cells = [
|
|
698
|
+
{
|
|
699
|
+
id: "test-abc",
|
|
700
|
+
title: "A".repeat(100),
|
|
701
|
+
status: "open",
|
|
702
|
+
priority: 0,
|
|
703
|
+
type: "task",
|
|
704
|
+
created_at: 1234567890,
|
|
705
|
+
updated_at: 1234567890,
|
|
706
|
+
},
|
|
707
|
+
];
|
|
753
708
|
|
|
754
|
-
|
|
755
|
-
const line = "not json";
|
|
756
|
-
expect(parseLogLine(line)).toBeNull();
|
|
757
|
-
});
|
|
709
|
+
const result = formatCellsTable(cells);
|
|
758
710
|
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
const result = parseLogLine(line);
|
|
762
|
-
|
|
763
|
-
expect(result?.module).toBe("unknown");
|
|
711
|
+
expect(result).toContain("...");
|
|
712
|
+
expect(result.split("\n")[2]).toMatch(/A{47}\.\.\./);
|
|
764
713
|
});
|
|
765
|
-
});
|
|
766
714
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
return logs.filter((log) => log.level >= minLevel);
|
|
770
|
-
}
|
|
715
|
+
test("returns 'No cells found' for empty array", () => {
|
|
716
|
+
const result = formatCellsTable([]);
|
|
771
717
|
|
|
772
|
-
|
|
773
|
-
const logs = [
|
|
774
|
-
{ level: 10 }, // trace
|
|
775
|
-
{ level: 30 }, // info
|
|
776
|
-
{ level: 50 }, // error
|
|
777
|
-
];
|
|
778
|
-
|
|
779
|
-
const result = filterLogsByLevel(logs, 30);
|
|
780
|
-
expect(result).toHaveLength(2);
|
|
781
|
-
expect(result[0].level).toBe(30);
|
|
782
|
-
expect(result[1].level).toBe(50);
|
|
718
|
+
expect(result).toBe("No cells found");
|
|
783
719
|
});
|
|
784
720
|
|
|
785
|
-
test("
|
|
786
|
-
const
|
|
787
|
-
{
|
|
788
|
-
|
|
789
|
-
|
|
721
|
+
test("aligns columns correctly", () => {
|
|
722
|
+
const cells = [
|
|
723
|
+
{
|
|
724
|
+
id: "short",
|
|
725
|
+
title: "T",
|
|
726
|
+
status: "open",
|
|
727
|
+
priority: 0,
|
|
728
|
+
type: "task",
|
|
729
|
+
created_at: 1234567890,
|
|
730
|
+
updated_at: 1234567890,
|
|
731
|
+
},
|
|
732
|
+
{
|
|
733
|
+
id: "very-long-id-here",
|
|
734
|
+
title: "Very long title here",
|
|
735
|
+
status: "in_progress",
|
|
736
|
+
priority: 2,
|
|
737
|
+
type: "task",
|
|
738
|
+
created_at: 1234567890,
|
|
739
|
+
updated_at: 1234567890,
|
|
740
|
+
},
|
|
790
741
|
];
|
|
791
|
-
|
|
792
|
-
const result = filterLogsByLevel(logs, 0);
|
|
793
|
-
expect(result).toHaveLength(3);
|
|
794
|
-
});
|
|
795
|
-
});
|
|
796
742
|
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
return logs.filter((log) => log.module === module);
|
|
800
|
-
}
|
|
743
|
+
const result = formatCellsTable(cells);
|
|
744
|
+
const lines = result.split("\n");
|
|
801
745
|
|
|
802
|
-
|
|
803
|
-
const
|
|
804
|
-
|
|
805
|
-
{ module: "swarm" },
|
|
806
|
-
{ module: "compaction" },
|
|
807
|
-
];
|
|
808
|
-
|
|
809
|
-
const result = filterLogsByModule(logs, "compaction");
|
|
810
|
-
expect(result).toHaveLength(2);
|
|
811
|
-
});
|
|
812
|
-
|
|
813
|
-
test("returns empty array when no match", () => {
|
|
814
|
-
const logs = [
|
|
815
|
-
{ module: "compaction" },
|
|
816
|
-
];
|
|
817
|
-
|
|
818
|
-
const result = filterLogsByModule(logs, "swarm");
|
|
819
|
-
expect(result).toHaveLength(0);
|
|
746
|
+
// All lines should be same length (aligned)
|
|
747
|
+
const lengths = lines.map(l => l.length);
|
|
748
|
+
expect(Math.max(...lengths) - Math.min(...lengths)).toBeLessThan(3);
|
|
820
749
|
});
|
|
821
750
|
});
|
|
751
|
+
});
|
|
822
752
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
if (!match) return null;
|
|
827
|
-
|
|
828
|
-
const [, num, unit] = match;
|
|
829
|
-
const value = parseInt(num, 10);
|
|
830
|
-
|
|
831
|
-
const multipliers: Record<string, number> = {
|
|
832
|
-
s: 1000,
|
|
833
|
-
m: 60 * 1000,
|
|
834
|
-
h: 60 * 60 * 1000,
|
|
835
|
-
d: 24 * 60 * 60 * 1000,
|
|
836
|
-
};
|
|
837
|
-
|
|
838
|
-
return value * multipliers[unit];
|
|
839
|
-
}
|
|
840
|
-
|
|
841
|
-
function filterLogsBySince(logs: Array<{ time: string }>, sinceMs: number): Array<{ time: string }> {
|
|
842
|
-
const cutoffTime = Date.now() - sinceMs;
|
|
843
|
-
return logs.filter((log) => new Date(log.time).getTime() >= cutoffTime);
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
test("parseDuration handles seconds", () => {
|
|
847
|
-
expect(parseDuration("30s")).toBe(30 * 1000);
|
|
848
|
-
});
|
|
753
|
+
// ============================================================================
|
|
754
|
+
// Eval Gate Tests (TDD)
|
|
755
|
+
// ============================================================================
|
|
849
756
|
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
757
|
+
interface EvalRunRecord {
|
|
758
|
+
timestamp: string;
|
|
759
|
+
eval_name: string;
|
|
760
|
+
score: number;
|
|
761
|
+
run_count: number;
|
|
762
|
+
}
|
|
853
763
|
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
764
|
+
interface GateResult {
|
|
765
|
+
passed: boolean;
|
|
766
|
+
phase: "bootstrap" | "stabilization" | "production";
|
|
767
|
+
message: string;
|
|
768
|
+
baseline?: number;
|
|
769
|
+
variance?: number;
|
|
770
|
+
}
|
|
857
771
|
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
772
|
+
/**
|
|
773
|
+
* Calculate variance for phase transitions
|
|
774
|
+
*/
|
|
775
|
+
function calculateVariance(scores: number[]): number {
|
|
776
|
+
if (scores.length <= 1) return 0;
|
|
861
777
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
expect(parseDuration("30")).toBeNull();
|
|
866
|
-
});
|
|
778
|
+
const mean = scores.reduce((sum, x) => sum + x, 0) / scores.length;
|
|
779
|
+
const squaredDiffs = scores.map((x) => Math.pow(x - mean, 2));
|
|
780
|
+
const variance = squaredDiffs.reduce((sum, x) => sum + x, 0) / scores.length;
|
|
867
781
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
const logs = [
|
|
871
|
-
{ time: new Date(now - 10000).toISOString() }, // 10s ago
|
|
872
|
-
{ time: new Date(now - 120000).toISOString() }, // 2m ago
|
|
873
|
-
{ time: new Date(now - 1000).toISOString() }, // 1s ago
|
|
874
|
-
];
|
|
875
|
-
|
|
876
|
-
const result = filterLogsBySince(logs, 60000); // Last 1m
|
|
877
|
-
expect(result).toHaveLength(2); // Only logs within last minute
|
|
878
|
-
});
|
|
879
|
-
});
|
|
782
|
+
return variance;
|
|
783
|
+
}
|
|
880
784
|
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
if (level >= 30) return "INFO ";
|
|
887
|
-
if (level >= 20) return "DEBUG";
|
|
888
|
-
return "TRACE";
|
|
889
|
-
}
|
|
785
|
+
/**
|
|
786
|
+
* Read all eval run records from .hive/eval-history.jsonl
|
|
787
|
+
*/
|
|
788
|
+
function readAllRecords(projectPath: string): EvalRunRecord[] {
|
|
789
|
+
const recordsPath = join(projectPath, ".hive", "eval-history.jsonl");
|
|
890
790
|
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
const module = log.module.padEnd(12);
|
|
895
|
-
return `${timestamp} ${levelName} ${module} ${log.msg}`;
|
|
896
|
-
}
|
|
791
|
+
if (!existsSync(recordsPath)) {
|
|
792
|
+
return [];
|
|
793
|
+
}
|
|
897
794
|
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
level: 30,
|
|
901
|
-
time: "2024-12-24T16:00:00.000Z",
|
|
902
|
-
module: "compaction",
|
|
903
|
-
msg: "started",
|
|
904
|
-
};
|
|
905
|
-
|
|
906
|
-
const result = formatLogLine(log);
|
|
907
|
-
expect(result).toContain("INFO");
|
|
908
|
-
expect(result).toContain("compaction");
|
|
909
|
-
expect(result).toContain("started");
|
|
910
|
-
});
|
|
795
|
+
const content = readFileSync(recordsPath, "utf-8");
|
|
796
|
+
const lines = content.split("\n").filter((line) => line.trim());
|
|
911
797
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
const log2 = formatLogLine({ level: 30, time: "2024-12-24T16:00:00.000Z", module: "compaction", msg: "test" });
|
|
915
|
-
|
|
916
|
-
// Module names should be padded to 12 chars
|
|
917
|
-
expect(log1).toContain("a test"); // 'a' + 11 spaces
|
|
918
|
-
expect(log2).toContain("compaction test"); // 'compaction' + 3 spaces (10 chars + 2)
|
|
919
|
-
});
|
|
798
|
+
return lines.map((line) => JSON.parse(line) as EvalRunRecord);
|
|
799
|
+
}
|
|
920
800
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
801
|
+
/**
|
|
802
|
+
* Record an eval run to .hive/eval-history.jsonl
|
|
803
|
+
*/
|
|
804
|
+
function recordEvalRun(
|
|
805
|
+
projectPath: string,
|
|
806
|
+
record: EvalRunRecord,
|
|
807
|
+
): void {
|
|
808
|
+
const hivePath = join(projectPath, ".hive");
|
|
809
|
+
const recordsPath = join(hivePath, "eval-history.jsonl");
|
|
810
|
+
|
|
811
|
+
// Ensure .hive directory exists
|
|
812
|
+
if (!existsSync(hivePath)) {
|
|
813
|
+
mkdirSync(hivePath, { recursive: true });
|
|
814
|
+
}
|
|
930
815
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
// Create test log files
|
|
934
|
-
const log1 = join(testDir, "swarm.1log");
|
|
935
|
-
const log2 = join(testDir, "swarm.2log");
|
|
936
|
-
const log3 = join(testDir, "compaction.1log");
|
|
937
|
-
|
|
938
|
-
writeFileSync(log1, '{"level":30,"time":"2024-12-24T16:00:00.000Z","msg":"line1"}\n');
|
|
939
|
-
writeFileSync(log2, '{"level":30,"time":"2024-12-24T16:00:01.000Z","msg":"line2"}\n');
|
|
940
|
-
writeFileSync(log3, '{"level":30,"time":"2024-12-24T16:00:02.000Z","module":"compaction","msg":"line3"}\n');
|
|
941
|
-
|
|
942
|
-
function readLogFiles(dir: string): string[] {
|
|
943
|
-
if (!existsSync(dir)) return [];
|
|
944
|
-
|
|
945
|
-
const files = readdirSync(dir)
|
|
946
|
-
.filter((f) => /\.\d+log$/.test(f))
|
|
947
|
-
.sort() // Sort by filename
|
|
948
|
-
.map((f) => join(dir, f));
|
|
949
|
-
|
|
950
|
-
const lines: string[] = [];
|
|
951
|
-
for (const file of files) {
|
|
952
|
-
const content = readFileSync(file, "utf-8");
|
|
953
|
-
lines.push(...content.split("\n").filter((line) => line.trim()));
|
|
954
|
-
}
|
|
955
|
-
|
|
956
|
-
return lines;
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
const lines = readLogFiles(testDir);
|
|
960
|
-
expect(lines).toHaveLength(3);
|
|
961
|
-
// Files are sorted alphabetically: compaction.1log, swarm.1log, swarm.2log
|
|
962
|
-
expect(lines.some((l) => l.includes("line1"))).toBe(true);
|
|
963
|
-
expect(lines.some((l) => l.includes("line2"))).toBe(true);
|
|
964
|
-
expect(lines.some((l) => l.includes("line3"))).toBe(true);
|
|
965
|
-
});
|
|
816
|
+
// Append record as JSONL
|
|
817
|
+
const line = JSON.stringify(record) + "\n";
|
|
966
818
|
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
expect(lines).toHaveLength(0);
|
|
975
|
-
});
|
|
976
|
-
});
|
|
819
|
+
if (existsSync(recordsPath)) {
|
|
820
|
+
const existingContent = readFileSync(recordsPath, "utf-8");
|
|
821
|
+
writeFileSync(recordsPath, existingContent + line);
|
|
822
|
+
} else {
|
|
823
|
+
writeFileSync(recordsPath, line);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
977
826
|
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
// Track file position for incremental reads
|
|
991
|
-
let lastSize = 0;
|
|
992
|
-
|
|
993
|
-
function readNewLines(filePath: string): string[] {
|
|
994
|
-
const content = readFileSync(filePath, "utf-8");
|
|
995
|
-
const newContent = content.slice(lastSize);
|
|
996
|
-
lastSize = content.length;
|
|
997
|
-
return newContent.split("\n").filter((line) => line.trim());
|
|
998
|
-
}
|
|
999
|
-
|
|
1000
|
-
// Simulate watch behavior
|
|
1001
|
-
const watcher = watch(testDir, (eventType, filename) => {
|
|
1002
|
-
if (filename && /\.\d+log$/.test(filename)) {
|
|
1003
|
-
const newLines = readNewLines(join(testDir, filename));
|
|
1004
|
-
collectedLines.push(...newLines);
|
|
1005
|
-
}
|
|
1006
|
-
});
|
|
1007
|
-
|
|
1008
|
-
// Wait for watcher to be ready
|
|
1009
|
-
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1010
|
-
|
|
1011
|
-
// Append new log line
|
|
1012
|
-
appendFileSync(logFile, '{"level":30,"time":"2024-12-24T16:00:01.000Z","msg":"appended"}\n');
|
|
1013
|
-
|
|
1014
|
-
// Wait for event to fire
|
|
1015
|
-
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
1016
|
-
|
|
1017
|
-
watcher.close();
|
|
1018
|
-
|
|
1019
|
-
// Should have detected the new line
|
|
1020
|
-
expect(collectedLines.some((l) => l.includes("appended"))).toBe(true);
|
|
1021
|
-
});
|
|
827
|
+
/**
|
|
828
|
+
* Check eval gate for progressive gating
|
|
829
|
+
*/
|
|
830
|
+
function checkGate(
|
|
831
|
+
projectPath: string,
|
|
832
|
+
evalName: string,
|
|
833
|
+
currentScore: number,
|
|
834
|
+
): GateResult {
|
|
835
|
+
const records = readAllRecords(projectPath).filter(
|
|
836
|
+
(r) => r.eval_name === evalName,
|
|
837
|
+
);
|
|
1022
838
|
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
if (arg === "--watch" || arg === "-w") {
|
|
1031
|
-
watch = true;
|
|
1032
|
-
} else if (arg === "--interval" && i + 1 < args.length) {
|
|
1033
|
-
interval = parseInt(args[++i], 10);
|
|
1034
|
-
}
|
|
1035
|
-
}
|
|
1036
|
-
|
|
1037
|
-
return { watch, interval };
|
|
1038
|
-
}
|
|
1039
|
-
|
|
1040
|
-
expect(parseWatchArgs(["--watch"])).toEqual({ watch: true, interval: 1000 });
|
|
1041
|
-
expect(parseWatchArgs(["-w"])).toEqual({ watch: true, interval: 1000 });
|
|
1042
|
-
expect(parseWatchArgs(["--watch", "--interval", "500"])).toEqual({ watch: true, interval: 500 });
|
|
1043
|
-
expect(parseWatchArgs(["compaction", "--watch"])).toEqual({ watch: true, interval: 1000 });
|
|
1044
|
-
expect(parseWatchArgs(["--level", "error"])).toEqual({ watch: false, interval: 1000 });
|
|
1045
|
-
});
|
|
1046
|
-
});
|
|
1047
|
-
});
|
|
839
|
+
if (records.length < 10) {
|
|
840
|
+
return {
|
|
841
|
+
passed: true,
|
|
842
|
+
phase: "bootstrap",
|
|
843
|
+
message: `BOOTSTRAP (${records.length}/10 runs): no gates yet`,
|
|
844
|
+
};
|
|
845
|
+
}
|
|
1048
846
|
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
847
|
+
const lastTenScores = records.slice(-10).map((r) => r.score);
|
|
848
|
+
const baseline = lastTenScores.reduce((sum, x) => sum + x, 0) / lastTenScores.length;
|
|
849
|
+
const variance = calculateVariance(lastTenScores);
|
|
1052
850
|
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
},
|
|
1063
|
-
recentScores: [
|
|
1064
|
-
{ timestamp: "2024-12-24T10:00:00.000Z", score: 0.85 },
|
|
1065
|
-
{ timestamp: "2024-12-24T11:00:00.000Z", score: 0.87 },
|
|
1066
|
-
{ timestamp: "2024-12-24T12:00:00.000Z", score: 0.82 },
|
|
1067
|
-
],
|
|
851
|
+
if (records.length < 50) {
|
|
852
|
+
const drop = ((baseline - currentScore) / baseline) * 100;
|
|
853
|
+
if (drop > 5) {
|
|
854
|
+
return {
|
|
855
|
+
passed: false,
|
|
856
|
+
phase: "stabilization",
|
|
857
|
+
message: `WARN: Score dropped ${drop.toFixed(1)}% from baseline ${baseline.toFixed(2)}`,
|
|
858
|
+
baseline,
|
|
859
|
+
variance,
|
|
1068
860
|
};
|
|
861
|
+
}
|
|
1069
862
|
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
// Should show thresholds
|
|
1079
|
-
expect(output).toContain("10%"); // stabilization threshold
|
|
1080
|
-
expect(output).toContain("5%"); // production threshold
|
|
1081
|
-
|
|
1082
|
-
// Should show recent scores
|
|
1083
|
-
expect(output).toContain("0.85");
|
|
1084
|
-
expect(output).toContain("0.87");
|
|
1085
|
-
expect(output).toContain("0.82");
|
|
1086
|
-
});
|
|
863
|
+
return {
|
|
864
|
+
passed: true,
|
|
865
|
+
phase: "stabilization",
|
|
866
|
+
message: `Stabilization (${records.length}/50 runs): baseline=${baseline.toFixed(2)}`,
|
|
867
|
+
baseline,
|
|
868
|
+
variance,
|
|
869
|
+
};
|
|
870
|
+
}
|
|
1087
871
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
}
|
|
1096
|
-
|
|
872
|
+
// Production phase: variance < 0.1 AND score doesn't drop >5%
|
|
873
|
+
if (variance < 0.1) {
|
|
874
|
+
const drop = ((baseline - currentScore) / baseline) * 100;
|
|
875
|
+
if (drop > 5) {
|
|
876
|
+
return {
|
|
877
|
+
passed: false,
|
|
878
|
+
phase: "production",
|
|
879
|
+
message: `FAIL: Score dropped ${drop.toFixed(1)}% from baseline ${baseline.toFixed(2)} (variance=${variance.toFixed(3)})`,
|
|
880
|
+
baseline,
|
|
881
|
+
variance,
|
|
1097
882
|
};
|
|
883
|
+
}
|
|
1098
884
|
|
|
1099
|
-
|
|
885
|
+
return {
|
|
886
|
+
passed: true,
|
|
887
|
+
phase: "production",
|
|
888
|
+
message: `PASS: Production phase (variance=${variance.toFixed(3)}, baseline=${baseline.toFixed(2)})`,
|
|
889
|
+
baseline,
|
|
890
|
+
variance,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
1100
893
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
894
|
+
// Stuck in stabilization (>50 runs but variance still high)
|
|
895
|
+
return {
|
|
896
|
+
passed: true,
|
|
897
|
+
phase: "stabilization",
|
|
898
|
+
message: `Stabilization: variance too high (${variance.toFixed(3)} > 0.1), need more consistent runs`,
|
|
899
|
+
baseline,
|
|
900
|
+
variance,
|
|
901
|
+
};
|
|
902
|
+
}
|
|
1104
903
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
};
|
|
904
|
+
/**
|
|
905
|
+
* Ensure .hive directory exists
|
|
906
|
+
*/
|
|
907
|
+
function ensureHiveDirectory(projectPath: string): void {
|
|
908
|
+
const hivePath = join(projectPath, ".hive");
|
|
909
|
+
if (!existsSync(hivePath)) {
|
|
910
|
+
mkdirSync(hivePath, { recursive: true });
|
|
911
|
+
}
|
|
912
|
+
}
|
|
1115
913
|
|
|
1116
|
-
|
|
914
|
+
describe("Eval gate", () => {
|
|
915
|
+
let testDir: string;
|
|
1117
916
|
|
|
1118
|
-
|
|
1119
|
-
});
|
|
917
|
+
beforeEach(() => {
|
|
918
|
+
testDir = join(tmpdir(), `eval-gate-test-${Date.now()}`);
|
|
919
|
+
mkdirSync(testDir, { recursive: true });
|
|
1120
920
|
});
|
|
1121
921
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
eval_name: "swarm-decomposition",
|
|
1128
|
-
score: 0.85,
|
|
1129
|
-
run_count: 1,
|
|
1130
|
-
},
|
|
1131
|
-
{
|
|
1132
|
-
timestamp: "2024-12-24T11:00:00.000Z",
|
|
1133
|
-
eval_name: "swarm-decomposition",
|
|
1134
|
-
score: 0.87,
|
|
1135
|
-
run_count: 2,
|
|
1136
|
-
},
|
|
1137
|
-
{
|
|
1138
|
-
timestamp: "2024-12-24T12:00:00.000Z",
|
|
1139
|
-
eval_name: "coordinator-behavior",
|
|
1140
|
-
score: 0.92,
|
|
1141
|
-
run_count: 1,
|
|
1142
|
-
},
|
|
1143
|
-
];
|
|
922
|
+
afterEach(() => {
|
|
923
|
+
if (existsSync(testDir)) {
|
|
924
|
+
rmSync(testDir, { recursive: true, force: true });
|
|
925
|
+
}
|
|
926
|
+
});
|
|
1144
927
|
|
|
1145
|
-
|
|
928
|
+
describe("Bootstrap phase (<10 runs)", () => {
|
|
929
|
+
test("allows any score", () => {
|
|
930
|
+
ensureHiveDirectory(testDir);
|
|
931
|
+
|
|
932
|
+
// Record 5 runs
|
|
933
|
+
for (let i = 0; i < 5; i++) {
|
|
934
|
+
recordEvalRun(testDir, {
|
|
935
|
+
timestamp: new Date().toISOString(),
|
|
936
|
+
eval_name: "test-eval",
|
|
937
|
+
score: 0.5 + i * 0.1,
|
|
938
|
+
run_count: i + 1,
|
|
939
|
+
});
|
|
940
|
+
}
|
|
1146
941
|
|
|
1147
|
-
|
|
1148
|
-
expect(output).toContain("swarm-decomposition");
|
|
1149
|
-
expect(output).toContain("coordinator-behavior");
|
|
1150
|
-
|
|
1151
|
-
// Should show scores
|
|
1152
|
-
expect(output).toContain("0.85");
|
|
1153
|
-
expect(output).toContain("0.87");
|
|
1154
|
-
expect(output).toContain("0.92");
|
|
1155
|
-
|
|
1156
|
-
// Should show run counts
|
|
1157
|
-
expect(output).toContain("run #1");
|
|
1158
|
-
expect(output).toContain("run #2");
|
|
1159
|
-
});
|
|
942
|
+
const result = checkGate(testDir, "test-eval", 0.3); // Low score
|
|
1160
943
|
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
expect(
|
|
944
|
+
expect(result.passed).toBe(true);
|
|
945
|
+
expect(result.phase).toBe("bootstrap");
|
|
946
|
+
expect(result.message).toContain("BOOTSTRAP");
|
|
1164
947
|
});
|
|
1165
948
|
|
|
1166
|
-
test("
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
949
|
+
test("counts runs correctly", () => {
|
|
950
|
+
ensureHiveDirectory(testDir);
|
|
951
|
+
|
|
952
|
+
for (let i = 0; i < 7; i++) {
|
|
953
|
+
recordEvalRun(testDir, {
|
|
954
|
+
timestamp: new Date().toISOString(),
|
|
955
|
+
eval_name: "test-eval",
|
|
956
|
+
score: 0.8,
|
|
957
|
+
run_count: i + 1,
|
|
958
|
+
});
|
|
959
|
+
}
|
|
1175
960
|
|
|
1176
|
-
const
|
|
961
|
+
const result = checkGate(testDir, "test-eval", 0.8);
|
|
1177
962
|
|
|
1178
|
-
|
|
1179
|
-
expect(
|
|
1180
|
-
expect(output).toMatch(/\d{1,2}:\d{2}/); // Time format
|
|
963
|
+
expect(result.phase).toBe("bootstrap");
|
|
964
|
+
expect(result.message).toContain("7/10");
|
|
1181
965
|
});
|
|
1182
966
|
});
|
|
1183
967
|
|
|
1184
|
-
describe("
|
|
1185
|
-
test("
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
expect(sparkline).toContain("█"); // High score
|
|
1198
|
-
});
|
|
968
|
+
describe("Stabilization phase (10-50 runs)", () => {
|
|
969
|
+
test("warns on >5% regression", () => {
|
|
970
|
+
ensureHiveDirectory(testDir);
|
|
971
|
+
|
|
972
|
+
// Record 20 runs with consistent 0.9 score
|
|
973
|
+
for (let i = 0; i < 20; i++) {
|
|
974
|
+
recordEvalRun(testDir, {
|
|
975
|
+
timestamp: new Date().toISOString(),
|
|
976
|
+
eval_name: "test-eval",
|
|
977
|
+
score: 0.9,
|
|
978
|
+
run_count: i + 1,
|
|
979
|
+
});
|
|
980
|
+
}
|
|
1199
981
|
|
|
1200
|
-
|
|
1201
|
-
const
|
|
1202
|
-
|
|
1203
|
-
expect(sparkline).toMatch(/[▁▂▃▄▅▆▇█]/);
|
|
1204
|
-
});
|
|
982
|
+
// Test with regressed score (>5% drop from 0.9 baseline)
|
|
983
|
+
const regressedScore = 0.85; // 5.5% drop
|
|
984
|
+
const result = checkGate(testDir, "test-eval", regressedScore);
|
|
1205
985
|
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
expect(
|
|
1209
|
-
|
|
1210
|
-
expect(new Set(sparkline.split("")).size).toBe(1);
|
|
986
|
+
expect(result.passed).toBe(false);
|
|
987
|
+
expect(result.phase).toBe("stabilization");
|
|
988
|
+
expect(result.message).toContain("WARN");
|
|
989
|
+
expect(result.baseline).toBeCloseTo(0.9, 2);
|
|
1211
990
|
});
|
|
1212
991
|
|
|
1213
|
-
test("
|
|
1214
|
-
|
|
1215
|
-
expect(sparkline).toBe("");
|
|
1216
|
-
});
|
|
1217
|
-
});
|
|
992
|
+
test("passes when score is stable", () => {
|
|
993
|
+
ensureHiveDirectory(testDir);
|
|
1218
994
|
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
regressionPercent: 0.025,
|
|
1228
|
-
};
|
|
995
|
+
for (let i = 0; i < 25; i++) {
|
|
996
|
+
recordEvalRun(testDir, {
|
|
997
|
+
timestamp: new Date().toISOString(),
|
|
998
|
+
eval_name: "test-eval",
|
|
999
|
+
score: 0.85,
|
|
1000
|
+
run_count: i + 1,
|
|
1001
|
+
});
|
|
1002
|
+
}
|
|
1229
1003
|
|
|
1230
|
-
const
|
|
1004
|
+
const result = checkGate(testDir, "test-eval", 0.86);
|
|
1231
1005
|
|
|
1232
|
-
expect(
|
|
1233
|
-
expect(
|
|
1234
|
-
expect(
|
|
1235
|
-
expect(output).toContain("2.5%"); // regression
|
|
1006
|
+
expect(result.passed).toBe(true);
|
|
1007
|
+
expect(result.phase).toBe("stabilization");
|
|
1008
|
+
expect(result.baseline).toBeCloseTo(0.85, 2);
|
|
1236
1009
|
});
|
|
1010
|
+
});
|
|
1237
1011
|
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1012
|
+
describe("Production phase (>50 runs, low variance)", () => {
|
|
1013
|
+
test("enters production when variance < 0.1", () => {
|
|
1014
|
+
ensureHiveDirectory(testDir);
|
|
1015
|
+
|
|
1016
|
+
// Simulate 60 runs with consistent scores (low variance)
|
|
1017
|
+
for (let i = 0; i < 60; i++) {
|
|
1018
|
+
recordEvalRun(testDir, {
|
|
1019
|
+
timestamp: new Date().toISOString(),
|
|
1020
|
+
eval_name: "test-eval",
|
|
1021
|
+
score: 0.9, // All same score = zero variance
|
|
1022
|
+
run_count: i + 1,
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1247
1025
|
|
|
1248
|
-
const
|
|
1026
|
+
const result = checkGate(testDir, "test-eval", 0.91);
|
|
1249
1027
|
|
|
1250
|
-
expect(
|
|
1251
|
-
expect(
|
|
1252
|
-
expect(output).toContain("exceeds");
|
|
1028
|
+
expect(result.phase).toBe("production");
|
|
1029
|
+
expect(result.variance).toBeLessThan(0.1);
|
|
1253
1030
|
});
|
|
1254
1031
|
|
|
1255
|
-
test("
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1032
|
+
test("fails on regression in production", () => {
|
|
1033
|
+
ensureHiveDirectory(testDir);
|
|
1034
|
+
|
|
1035
|
+
// Simulate 60 runs with consistent high scores to reach production phase
|
|
1036
|
+
for (let i = 0; i < 60; i++) {
|
|
1037
|
+
recordEvalRun(testDir, {
|
|
1038
|
+
timestamp: new Date().toISOString(),
|
|
1039
|
+
eval_name: "test-eval",
|
|
1040
|
+
score: 0.9,
|
|
1041
|
+
run_count: i + 1,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1262
1044
|
|
|
1263
|
-
|
|
1045
|
+
// Now test with a regressed score (>5% drop from 0.9 baseline)
|
|
1046
|
+
const regressedScore = 0.8; // 11% drop
|
|
1047
|
+
const result = checkGate(testDir, "test-eval", regressedScore);
|
|
1264
1048
|
|
|
1265
|
-
expect(
|
|
1266
|
-
expect(
|
|
1267
|
-
expect(
|
|
1049
|
+
expect(result.passed).toBe(false);
|
|
1050
|
+
expect(result.phase).toBe("production");
|
|
1051
|
+
expect(result.message).toContain("FAIL");
|
|
1268
1052
|
});
|
|
1269
1053
|
});
|
|
1270
1054
|
});
|
|
1271
1055
|
|
|
1272
1056
|
// ============================================================================
|
|
1273
|
-
//
|
|
1057
|
+
// History Command Tests (TDD)
|
|
1274
1058
|
// ============================================================================
|
|
1275
1059
|
|
|
1060
|
+
interface SwarmHistoryRecord {
|
|
1061
|
+
epic_id: string;
|
|
1062
|
+
epic_title: string;
|
|
1063
|
+
strategy: string;
|
|
1064
|
+
timestamp: string;
|
|
1065
|
+
overall_success: boolean;
|
|
1066
|
+
task_count: number;
|
|
1067
|
+
completed_count: number;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1276
1070
|
/**
|
|
1277
|
-
*
|
|
1071
|
+
* Format relative time (e.g., "2h ago", "1d ago")
|
|
1278
1072
|
*/
|
|
1279
|
-
function
|
|
1280
|
-
|
|
1073
|
+
function formatRelativeTime(timestamp: string): string {
|
|
1074
|
+
const now = Date.now();
|
|
1075
|
+
const then = new Date(timestamp).getTime();
|
|
1076
|
+
const diffMs = now - then;
|
|
1077
|
+
|
|
1078
|
+
const minutes = Math.floor(diffMs / 60000);
|
|
1079
|
+
const hours = Math.floor(diffMs / 3600000);
|
|
1080
|
+
const days = Math.floor(diffMs / 86400000);
|
|
1081
|
+
|
|
1082
|
+
if (minutes < 60) return `${minutes}m ago`;
|
|
1083
|
+
if (hours < 24) return `${hours}h ago`;
|
|
1084
|
+
return `${days}d ago`;
|
|
1085
|
+
}
|
|
1281
1086
|
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1087
|
+
/**
|
|
1088
|
+
* Format swarm history as beautiful CLI table
|
|
1089
|
+
*/
|
|
1090
|
+
function formatSwarmHistory(records: SwarmHistoryRecord[]): string {
|
|
1091
|
+
if (records.length === 0) {
|
|
1092
|
+
return "No swarm history found";
|
|
1093
|
+
}
|
|
1286
1094
|
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1095
|
+
const rows = records.map(r => ({
|
|
1096
|
+
time: formatRelativeTime(r.timestamp),
|
|
1097
|
+
status: r.overall_success ? "✅" : "❌",
|
|
1098
|
+
title: r.epic_title.length > 30 ? r.epic_title.slice(0, 27) + "..." : r.epic_title,
|
|
1099
|
+
strategy: r.strategy,
|
|
1100
|
+
tasks: `${r.completed_count}/${r.task_count} tasks`,
|
|
1101
|
+
}));
|
|
1102
|
+
|
|
1103
|
+
// Box drawing characters
|
|
1104
|
+
const lines: string[] = [];
|
|
1105
|
+
lines.push("┌─────────────────────────────────────────────────────────────┐");
|
|
1106
|
+
lines.push("│ SWARM HISTORY │");
|
|
1107
|
+
lines.push("├─────────────────────────────────────────────────────────────┤");
|
|
1108
|
+
|
|
1109
|
+
for (const row of rows) {
|
|
1110
|
+
const statusCol = `${row.time.padEnd(8)} ${row.status}`;
|
|
1111
|
+
const titleCol = row.title.padEnd(32);
|
|
1112
|
+
const strategyCol = row.strategy.padEnd(13);
|
|
1113
|
+
const tasksCol = row.tasks;
|
|
1114
|
+
|
|
1115
|
+
const line = `│ ${statusCol} ${titleCol} ${strategyCol} ${tasksCol.padEnd(3)} │`;
|
|
1116
|
+
lines.push(line);
|
|
1290
1117
|
}
|
|
1291
1118
|
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
const index = Math.min(Math.floor(normalized * chars.length), chars.length - 1);
|
|
1296
|
-
return chars[index];
|
|
1297
|
-
})
|
|
1298
|
-
.join("");
|
|
1119
|
+
lines.push("└─────────────────────────────────────────────────────────────┘");
|
|
1120
|
+
|
|
1121
|
+
return lines.join("\n");
|
|
1299
1122
|
}
|
|
1300
1123
|
|
|
1301
1124
|
/**
|
|
1302
|
-
*
|
|
1125
|
+
* Filter history by status
|
|
1303
1126
|
*/
|
|
1304
|
-
function
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
lines.push("Thresholds:");
|
|
1320
|
-
lines.push(` Stabilization: ${(status.thresholds.stabilization * 100).toFixed(0)}% regression warning`);
|
|
1321
|
-
lines.push(` Production: ${(status.thresholds.production * 100).toFixed(0)}% regression failure`);
|
|
1322
|
-
lines.push("");
|
|
1323
|
-
|
|
1324
|
-
// Recent scores with sparkline
|
|
1325
|
-
if (status.recentScores.length > 0) {
|
|
1326
|
-
lines.push("Recent scores:");
|
|
1327
|
-
const sparkline = generateSparkline(status.recentScores.map((s) => s.score));
|
|
1328
|
-
lines.push(` ${sparkline}`);
|
|
1329
|
-
for (const { timestamp, score } of status.recentScores) {
|
|
1330
|
-
const time = new Date(timestamp).toLocaleString();
|
|
1331
|
-
lines.push(` ${time}: ${score.toFixed(2)}`);
|
|
1332
|
-
}
|
|
1333
|
-
} else {
|
|
1334
|
-
lines.push("No scores yet - collecting data");
|
|
1127
|
+
function filterHistoryByStatus(
|
|
1128
|
+
records: SwarmHistoryRecord[],
|
|
1129
|
+
status?: "success" | "failed" | "in_progress",
|
|
1130
|
+
): SwarmHistoryRecord[] {
|
|
1131
|
+
if (!status) return records;
|
|
1132
|
+
|
|
1133
|
+
switch (status) {
|
|
1134
|
+
case "success":
|
|
1135
|
+
return records.filter(r => r.overall_success);
|
|
1136
|
+
case "failed":
|
|
1137
|
+
return records.filter(r => !r.overall_success && r.completed_count === r.task_count);
|
|
1138
|
+
case "in_progress":
|
|
1139
|
+
return records.filter(r => r.completed_count < r.task_count);
|
|
1140
|
+
default:
|
|
1141
|
+
return records;
|
|
1335
1142
|
}
|
|
1143
|
+
}
|
|
1336
1144
|
|
|
1337
|
-
|
|
1145
|
+
/**
|
|
1146
|
+
* Filter history by strategy
|
|
1147
|
+
*/
|
|
1148
|
+
function filterHistoryByStrategy(
|
|
1149
|
+
records: SwarmHistoryRecord[],
|
|
1150
|
+
strategy?: "file-based" | "feature-based" | "risk-based",
|
|
1151
|
+
): SwarmHistoryRecord[] {
|
|
1152
|
+
if (!strategy) return records;
|
|
1153
|
+
return records.filter(r => r.strategy === strategy);
|
|
1338
1154
|
}
|
|
1339
1155
|
|
|
1340
1156
|
/**
|
|
1341
|
-
*
|
|
1157
|
+
* Parse history CLI arguments
|
|
1342
1158
|
*/
|
|
1343
|
-
function
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
}
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1159
|
+
function parseHistoryArgs(args: string[]): {
|
|
1160
|
+
limit: number;
|
|
1161
|
+
status?: "success" | "failed" | "in_progress";
|
|
1162
|
+
strategy?: "file-based" | "feature-based" | "risk-based";
|
|
1163
|
+
verbose: boolean;
|
|
1164
|
+
} {
|
|
1165
|
+
const result: {
|
|
1166
|
+
limit: number;
|
|
1167
|
+
status?: "success" | "failed" | "in_progress";
|
|
1168
|
+
strategy?: "file-based" | "feature-based" | "risk-based";
|
|
1169
|
+
verbose: boolean;
|
|
1170
|
+
} = {
|
|
1171
|
+
limit: 10,
|
|
1172
|
+
verbose: false,
|
|
1173
|
+
};
|
|
1352
1174
|
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
lines.push("");
|
|
1356
|
-
|
|
1357
|
-
// Group by eval name
|
|
1358
|
-
const grouped = new Map<string, typeof history>();
|
|
1359
|
-
for (const entry of history) {
|
|
1360
|
-
if (!grouped.has(entry.eval_name)) {
|
|
1361
|
-
grouped.set(entry.eval_name, []);
|
|
1362
|
-
}
|
|
1363
|
-
grouped.get(entry.eval_name)!.push(entry);
|
|
1364
|
-
}
|
|
1175
|
+
for (let i = 0; i < args.length; i++) {
|
|
1176
|
+
const arg = args[i];
|
|
1365
1177
|
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1178
|
+
if (arg === "--limit" || arg === "-n") {
|
|
1179
|
+
const limitStr = args[i + 1];
|
|
1180
|
+
if (limitStr && !isNaN(Number(limitStr))) {
|
|
1181
|
+
result.limit = Number(limitStr);
|
|
1182
|
+
i++;
|
|
1183
|
+
}
|
|
1184
|
+
} else if (arg === "--status") {
|
|
1185
|
+
const statusStr = args[i + 1];
|
|
1186
|
+
if (statusStr && ["success", "failed", "in_progress"].includes(statusStr)) {
|
|
1187
|
+
result.status = statusStr as "success" | "failed" | "in_progress";
|
|
1188
|
+
i++;
|
|
1189
|
+
}
|
|
1190
|
+
} else if (arg === "--strategy") {
|
|
1191
|
+
const strategyStr = args[i + 1];
|
|
1192
|
+
if (strategyStr && ["file-based", "feature-based", "risk-based"].includes(strategyStr)) {
|
|
1193
|
+
result.strategy = strategyStr as "file-based" | "feature-based" | "risk-based";
|
|
1194
|
+
i++;
|
|
1195
|
+
}
|
|
1196
|
+
} else if (arg === "--verbose" || arg === "-v") {
|
|
1197
|
+
result.verbose = true;
|
|
1381
1198
|
}
|
|
1382
|
-
|
|
1383
|
-
lines.push("");
|
|
1384
1199
|
}
|
|
1385
1200
|
|
|
1386
|
-
return
|
|
1201
|
+
return result;
|
|
1387
1202
|
}
|
|
1388
1203
|
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
baseline?: number;
|
|
1397
|
-
currentScore: number;
|
|
1398
|
-
regressionPercent?: number;
|
|
1399
|
-
}): string {
|
|
1400
|
-
const lines: string[] = [];
|
|
1401
|
-
|
|
1402
|
-
// Pass/fail banner
|
|
1403
|
-
const status = result.passed ? "✅ PASS" : "❌ FAIL";
|
|
1404
|
-
lines.push(status);
|
|
1405
|
-
lines.push("");
|
|
1204
|
+
describe("swarm history", () => {
|
|
1205
|
+
describe("formatRelativeTime", () => {
|
|
1206
|
+
test("formats minutes ago", () => {
|
|
1207
|
+
const fiveMinutesAgo = new Date(Date.now() - 5 * 60000).toISOString();
|
|
1208
|
+
const result = formatRelativeTime(fiveMinutesAgo);
|
|
1209
|
+
expect(result).toMatch(/5m ago/);
|
|
1210
|
+
});
|
|
1406
1211
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1212
|
+
test("formats hours ago", () => {
|
|
1213
|
+
const threeHoursAgo = new Date(Date.now() - 3 * 3600000).toISOString();
|
|
1214
|
+
const result = formatRelativeTime(threeHoursAgo);
|
|
1215
|
+
expect(result).toMatch(/3h ago/);
|
|
1216
|
+
});
|
|
1410
1217
|
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1218
|
+
test("formats days ago", () => {
|
|
1219
|
+
const twoDaysAgo = new Date(Date.now() - 2 * 86400000).toISOString();
|
|
1220
|
+
const result = formatRelativeTime(twoDaysAgo);
|
|
1221
|
+
expect(result).toMatch(/2d ago/);
|
|
1222
|
+
});
|
|
1223
|
+
});
|
|
1414
1224
|
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1225
|
+
describe("formatSwarmHistory", () => {
|
|
1226
|
+
test("formats history as beautiful box-drawn table", () => {
|
|
1227
|
+
const records: SwarmHistoryRecord[] = [
|
|
1228
|
+
{
|
|
1229
|
+
epic_id: "epic-1",
|
|
1230
|
+
epic_title: "Add auth flow",
|
|
1231
|
+
strategy: "feature-based",
|
|
1232
|
+
timestamp: new Date(Date.now() - 2 * 3600000).toISOString(),
|
|
1233
|
+
overall_success: true,
|
|
1234
|
+
task_count: 4,
|
|
1235
|
+
completed_count: 4,
|
|
1236
|
+
},
|
|
1237
|
+
{
|
|
1238
|
+
epic_id: "epic-2",
|
|
1239
|
+
epic_title: "Refactor DB layer",
|
|
1240
|
+
strategy: "file-based",
|
|
1241
|
+
timestamp: new Date(Date.now() - 5 * 3600000).toISOString(),
|
|
1242
|
+
overall_success: false,
|
|
1243
|
+
task_count: 5,
|
|
1244
|
+
completed_count: 2,
|
|
1245
|
+
},
|
|
1246
|
+
];
|
|
1419
1247
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1248
|
+
const result = formatSwarmHistory(records);
|
|
1249
|
+
|
|
1250
|
+
expect(result).toContain("┌─────");
|
|
1251
|
+
expect(result).toContain("SWARM HISTORY");
|
|
1252
|
+
expect(result).toContain("✅");
|
|
1253
|
+
expect(result).toContain("❌");
|
|
1254
|
+
expect(result).toContain("Add auth flow");
|
|
1255
|
+
expect(result).toContain("Refactor DB layer");
|
|
1256
|
+
expect(result).toContain("feature-based");
|
|
1257
|
+
expect(result).toContain("file-based");
|
|
1258
|
+
expect(result).toContain("4/4 tasks");
|
|
1259
|
+
expect(result).toContain("2/5 tasks");
|
|
1260
|
+
expect(result).toContain("└─────");
|
|
1261
|
+
});
|
|
1422
1262
|
|
|
1423
|
-
|
|
1424
|
-
|
|
1263
|
+
test("truncates long titles with ellipsis", () => {
|
|
1264
|
+
const records: SwarmHistoryRecord[] = [
|
|
1265
|
+
{
|
|
1266
|
+
epic_id: "epic-1",
|
|
1267
|
+
epic_title: "A".repeat(100),
|
|
1268
|
+
strategy: "feature-based",
|
|
1269
|
+
timestamp: new Date(Date.now() - 1000).toISOString(),
|
|
1270
|
+
overall_success: true,
|
|
1271
|
+
task_count: 1,
|
|
1272
|
+
completed_count: 1,
|
|
1273
|
+
},
|
|
1274
|
+
];
|
|
1425
1275
|
|
|
1426
|
-
|
|
1427
|
-
// Eval Run Tests
|
|
1428
|
-
// ============================================================================
|
|
1276
|
+
const result = formatSwarmHistory(records);
|
|
1429
1277
|
|
|
1430
|
-
|
|
1431
|
-
|
|
1278
|
+
expect(result).toContain("...");
|
|
1279
|
+
expect(result).toMatch(/A{27}\.\.\./);
|
|
1280
|
+
});
|
|
1432
1281
|
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1282
|
+
test("returns 'No swarm history found' for empty array", () => {
|
|
1283
|
+
const result = formatSwarmHistory([]);
|
|
1284
|
+
expect(result).toBe("No swarm history found");
|
|
1285
|
+
});
|
|
1436
1286
|
});
|
|
1437
1287
|
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1288
|
+
describe("filterHistoryByStatus", () => {
|
|
1289
|
+
const records: SwarmHistoryRecord[] = [
|
|
1290
|
+
{
|
|
1291
|
+
epic_id: "epic-1",
|
|
1292
|
+
epic_title: "Success",
|
|
1293
|
+
strategy: "feature-based",
|
|
1294
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1295
|
+
overall_success: true,
|
|
1296
|
+
task_count: 4,
|
|
1297
|
+
completed_count: 4,
|
|
1298
|
+
},
|
|
1299
|
+
{
|
|
1300
|
+
epic_id: "epic-2",
|
|
1301
|
+
epic_title: "Failed",
|
|
1302
|
+
strategy: "file-based",
|
|
1303
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1304
|
+
overall_success: false,
|
|
1305
|
+
task_count: 4,
|
|
1306
|
+
completed_count: 4,
|
|
1307
|
+
},
|
|
1308
|
+
{
|
|
1309
|
+
epic_id: "epic-3",
|
|
1310
|
+
epic_title: "In Progress",
|
|
1311
|
+
strategy: "risk-based",
|
|
1312
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1313
|
+
overall_success: false,
|
|
1314
|
+
task_count: 5,
|
|
1315
|
+
completed_count: 2,
|
|
1316
|
+
},
|
|
1317
|
+
];
|
|
1318
|
+
|
|
1319
|
+
test("filters success only", () => {
|
|
1320
|
+
const result = filterHistoryByStatus(records, "success");
|
|
1321
|
+
expect(result).toHaveLength(1);
|
|
1322
|
+
expect(result[0].epic_title).toBe("Success");
|
|
1323
|
+
});
|
|
1443
1324
|
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
const { ensureHiveDirectory } = await import("../src/hive.js");
|
|
1449
|
-
|
|
1450
|
-
// Set up test data
|
|
1451
|
-
const evalName = "test-eval";
|
|
1452
|
-
const mockScore = 0.85;
|
|
1453
|
-
|
|
1454
|
-
// Ensure directory exists
|
|
1455
|
-
ensureHiveDirectory(testDir);
|
|
1456
|
-
|
|
1457
|
-
// Get history and record run (simulating what eval run does)
|
|
1458
|
-
const history = getScoreHistory(testDir, evalName);
|
|
1459
|
-
recordEvalRun(testDir, {
|
|
1460
|
-
timestamp: new Date().toISOString(),
|
|
1461
|
-
eval_name: evalName,
|
|
1462
|
-
score: mockScore,
|
|
1463
|
-
run_count: history.length + 1,
|
|
1325
|
+
test("filters failed only", () => {
|
|
1326
|
+
const result = filterHistoryByStatus(records, "failed");
|
|
1327
|
+
expect(result).toHaveLength(1);
|
|
1328
|
+
expect(result[0].epic_title).toBe("Failed");
|
|
1464
1329
|
});
|
|
1465
1330
|
|
|
1466
|
-
|
|
1467
|
-
|
|
1331
|
+
test("filters in_progress only", () => {
|
|
1332
|
+
const result = filterHistoryByStatus(records, "in_progress");
|
|
1333
|
+
expect(result).toHaveLength(1);
|
|
1334
|
+
expect(result[0].epic_title).toBe("In Progress");
|
|
1335
|
+
});
|
|
1468
1336
|
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1337
|
+
test("returns all when no status filter", () => {
|
|
1338
|
+
const result = filterHistoryByStatus(records);
|
|
1339
|
+
expect(result).toHaveLength(3);
|
|
1340
|
+
});
|
|
1341
|
+
});
|
|
1473
1342
|
|
|
1474
|
-
|
|
1475
|
-
|
|
1343
|
+
describe("filterHistoryByStrategy", () => {
|
|
1344
|
+
const records: SwarmHistoryRecord[] = [
|
|
1345
|
+
{
|
|
1346
|
+
epic_id: "epic-1",
|
|
1347
|
+
epic_title: "File",
|
|
1348
|
+
strategy: "file-based",
|
|
1349
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1350
|
+
overall_success: true,
|
|
1351
|
+
task_count: 4,
|
|
1352
|
+
completed_count: 4,
|
|
1353
|
+
},
|
|
1354
|
+
{
|
|
1355
|
+
epic_id: "epic-2",
|
|
1356
|
+
epic_title: "Feature",
|
|
1357
|
+
strategy: "feature-based",
|
|
1358
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1359
|
+
overall_success: true,
|
|
1360
|
+
task_count: 4,
|
|
1361
|
+
completed_count: 4,
|
|
1362
|
+
},
|
|
1363
|
+
{
|
|
1364
|
+
epic_id: "epic-3",
|
|
1365
|
+
epic_title: "Risk",
|
|
1366
|
+
strategy: "risk-based",
|
|
1367
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1368
|
+
overall_success: true,
|
|
1369
|
+
task_count: 4,
|
|
1370
|
+
completed_count: 4,
|
|
1371
|
+
},
|
|
1372
|
+
];
|
|
1373
|
+
|
|
1374
|
+
test("filters file-based only", () => {
|
|
1375
|
+
const result = filterHistoryByStrategy(records, "file-based");
|
|
1376
|
+
expect(result).toHaveLength(1);
|
|
1377
|
+
expect(result[0].epic_title).toBe("File");
|
|
1378
|
+
});
|
|
1476
1379
|
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1380
|
+
test("filters feature-based only", () => {
|
|
1381
|
+
const result = filterHistoryByStrategy(records, "feature-based");
|
|
1382
|
+
expect(result).toHaveLength(1);
|
|
1383
|
+
expect(result[0].epic_title).toBe("Feature");
|
|
1384
|
+
});
|
|
1385
|
+
|
|
1386
|
+
test("filters risk-based only", () => {
|
|
1387
|
+
const result = filterHistoryByStrategy(records, "risk-based");
|
|
1388
|
+
expect(result).toHaveLength(1);
|
|
1389
|
+
expect(result[0].epic_title).toBe("Risk");
|
|
1390
|
+
});
|
|
1391
|
+
|
|
1392
|
+
test("returns all when no strategy filter", () => {
|
|
1393
|
+
const result = filterHistoryByStrategy(records);
|
|
1394
|
+
expect(result).toHaveLength(3);
|
|
1483
1395
|
});
|
|
1484
1396
|
});
|
|
1485
1397
|
|
|
1486
|
-
|
|
1487
|
-
|
|
1398
|
+
describe("parseHistoryArgs", () => {
|
|
1399
|
+
test("parses --limit flag", () => {
|
|
1400
|
+
const result = parseHistoryArgs(["--limit", "20"]);
|
|
1401
|
+
expect(result.limit).toBe(20);
|
|
1402
|
+
});
|
|
1488
1403
|
|
|
1489
|
-
|
|
1490
|
-
|
|
1404
|
+
test("parses -n shorthand for limit", () => {
|
|
1405
|
+
const result = parseHistoryArgs(["-n", "5"]);
|
|
1406
|
+
expect(result.limit).toBe(5);
|
|
1407
|
+
});
|
|
1491
1408
|
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1409
|
+
test("parses --status flag", () => {
|
|
1410
|
+
const result = parseHistoryArgs(["--status", "success"]);
|
|
1411
|
+
expect(result.status).toBe("success");
|
|
1412
|
+
});
|
|
1496
1413
|
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
ensureHiveDirectory(testDir);
|
|
1503
|
-
|
|
1504
|
-
// Simulate 60 runs with consistent high scores to reach production phase
|
|
1505
|
-
for (let i = 0; i < 60; i++) {
|
|
1506
|
-
recordEvalRun(testDir, {
|
|
1507
|
-
timestamp: new Date().toISOString(),
|
|
1508
|
-
eval_name: "test-eval",
|
|
1509
|
-
score: 0.9,
|
|
1510
|
-
run_count: i + 1,
|
|
1511
|
-
});
|
|
1512
|
-
}
|
|
1414
|
+
test("parses --strategy flag", () => {
|
|
1415
|
+
const result = parseHistoryArgs(["--strategy", "file-based"]);
|
|
1416
|
+
expect(result.strategy).toBe("file-based");
|
|
1417
|
+
});
|
|
1513
1418
|
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1419
|
+
test("parses --verbose flag", () => {
|
|
1420
|
+
const result = parseHistoryArgs(["--verbose"]);
|
|
1421
|
+
expect(result.verbose).toBe(true);
|
|
1422
|
+
});
|
|
1423
|
+
|
|
1424
|
+
test("parses -v shorthand for verbose", () => {
|
|
1425
|
+
const result = parseHistoryArgs(["-v"]);
|
|
1426
|
+
expect(result.verbose).toBe(true);
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
test("parses multiple flags together", () => {
|
|
1430
|
+
const result = parseHistoryArgs(["--limit", "15", "--status", "failed", "--verbose"]);
|
|
1431
|
+
expect(result.limit).toBe(15);
|
|
1432
|
+
expect(result.status).toBe("failed");
|
|
1433
|
+
expect(result.verbose).toBe(true);
|
|
1434
|
+
});
|
|
1517
1435
|
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1436
|
+
test("uses default limit of 10 when not specified", () => {
|
|
1437
|
+
const result = parseHistoryArgs([]);
|
|
1438
|
+
expect(result.limit).toBe(10);
|
|
1439
|
+
});
|
|
1440
|
+
|
|
1441
|
+
test("ignores invalid status values", () => {
|
|
1442
|
+
const result = parseHistoryArgs(["--status", "invalid"]);
|
|
1443
|
+
expect(result.status).toBeUndefined();
|
|
1444
|
+
});
|
|
1445
|
+
|
|
1446
|
+
test("ignores invalid strategy values", () => {
|
|
1447
|
+
const result = parseHistoryArgs(["--strategy", "invalid"]);
|
|
1448
|
+
expect(result.strategy).toBeUndefined();
|
|
1449
|
+
});
|
|
1521
1450
|
});
|
|
1522
1451
|
});
|