opencode-swarm-plugin 0.39.1 → 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
- package/.hive/analysis/session-data-quality-audit.md +320 -0
- package/.hive/eval-results.json +481 -24
- package/.hive/issues.jsonl +76 -11
- package/.hive/memories.jsonl +159 -1
- package/.opencode/eval-history.jsonl +315 -0
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +207 -0
- package/README.md +2 -0
- package/SCORER-ANALYSIS.md +598 -0
- package/bin/eval-gate.test.ts +158 -0
- package/bin/eval-gate.ts +74 -0
- package/bin/swarm.test.ts +1054 -719
- package/bin/swarm.ts +577 -0
- package/dist/compaction-hook.d.ts +10 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-observability.d.ts +173 -0
- package/dist/compaction-observability.d.ts.map +1 -0
- package/dist/compaction-prompt-scoring.d.ts +1 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -1
- package/dist/eval-capture.d.ts +93 -0
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-runner.d.ts +134 -0
- package/dist/eval-runner.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +65 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +84043 -28070
- package/dist/memory-tools.d.ts +70 -2
- package/dist/memory-tools.d.ts.map +1 -1
- package/dist/memory.d.ts +37 -0
- package/dist/memory.d.ts.map +1 -1
- package/dist/observability-tools.d.ts +64 -0
- package/dist/observability-tools.d.ts.map +1 -1
- package/dist/plugin.js +83570 -27466
- package/dist/schemas/task.d.ts +3 -3
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +32 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
- package/evals/ARCHITECTURE.md +1189 -0
- package/evals/README.md +113 -0
- package/evals/example.eval.ts +3 -4
- package/evals/fixtures/compaction-prompt-cases.ts +6 -0
- package/evals/scorers/coordinator-discipline.evalite-test.ts +163 -0
- package/evals/scorers/coordinator-discipline.ts +82 -2
- package/evals/scorers/index.test.ts +146 -0
- package/evals/scorers/index.ts +104 -0
- package/evals/swarm-decomposition.eval.ts +13 -4
- package/examples/commands/swarm.md +291 -21
- package/package.json +4 -3
- package/src/compaction-hook.ts +258 -110
- package/src/compaction-observability.integration.test.ts +139 -0
- package/src/compaction-observability.test.ts +187 -0
- package/src/compaction-observability.ts +324 -0
- package/src/compaction-prompt-scorers.test.ts +10 -9
- package/src/compaction-prompt-scoring.ts +7 -5
- package/src/eval-capture.test.ts +204 -1
- package/src/eval-capture.ts +194 -2
- package/src/eval-runner.test.ts +223 -0
- package/src/eval-runner.ts +402 -0
- package/src/hive.ts +57 -22
- package/src/index.ts +54 -1
- package/src/memory-tools.test.ts +84 -0
- package/src/memory-tools.ts +68 -3
- package/src/memory.test.ts +2 -2
- package/src/memory.ts +122 -49
- package/src/observability-tools.test.ts +13 -0
- package/src/observability-tools.ts +277 -0
- package/src/swarm-orchestrate.test.ts +162 -0
- package/src/swarm-orchestrate.ts +7 -5
- package/src/swarm-prompts.test.ts +168 -4
- package/src/swarm-prompts.ts +228 -7
- package/.env +0 -2
- package/.turbo/turbo-test.log +0 -481
- package/.turbo/turbo-typecheck.log +0 -1
- package/dist/beads.d.ts +0 -386
- package/dist/beads.d.ts.map +0 -1
- package/dist/schemas/bead-events.d.ts +0 -698
- package/dist/schemas/bead-events.d.ts.map +0 -1
- package/dist/schemas/bead.d.ts +0 -255
- package/dist/schemas/bead.d.ts.map +0 -1
package/bin/swarm.test.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
/**
|
|
3
|
-
* Tests for swarm CLI
|
|
3
|
+
* Tests for swarm CLI helpers
|
|
4
4
|
*
|
|
5
|
-
* These tests verify the
|
|
6
|
-
* - writeFileWithStatus
|
|
7
|
-
* -
|
|
8
|
-
* - rmWithStatus: logs file removal
|
|
5
|
+
* These tests verify the CLI helpers:
|
|
6
|
+
* - File operation helpers (writeFileWithStatus, mkdirWithStatus, rmWithStatus)
|
|
7
|
+
* - Swarm history helpers (formatSwarmHistory, parseHistoryArgs, filterHistoryByStatus)
|
|
9
8
|
*/
|
|
10
9
|
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
|
|
11
10
|
import { mkdirSync, rmSync, writeFileSync, existsSync, readFileSync, readdirSync } from "fs";
|
|
@@ -197,6 +196,412 @@ READ-ONLY research agent. Never modifies code - only gathers intel and stores fi
|
|
|
197
196
|
// Log Command Tests (TDD)
|
|
198
197
|
// ============================================================================
|
|
199
198
|
|
|
199
|
+
// ============================================================================
|
|
200
|
+
// Session Log Tests (TDD)
|
|
201
|
+
// ============================================================================
|
|
202
|
+
|
|
203
|
+
import type { CoordinatorEvent } from "../src/eval-capture";
|
|
204
|
+
|
|
205
|
+
const TEST_SESSIONS_DIR = join(tmpdir(), "swarm-test-sessions");
|
|
206
|
+
|
|
207
|
+
describe("swarm log sessions", () => {
|
|
208
|
+
beforeEach(() => {
|
|
209
|
+
// Create test sessions directory
|
|
210
|
+
if (!existsSync(TEST_SESSIONS_DIR)) {
|
|
211
|
+
mkdirSync(TEST_SESSIONS_DIR, { recursive: true });
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
afterEach(() => {
|
|
216
|
+
// Cleanup test directory
|
|
217
|
+
if (existsSync(TEST_SESSIONS_DIR)) {
|
|
218
|
+
rmSync(TEST_SESSIONS_DIR, { recursive: true, force: true });
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// ========================================================================
|
|
223
|
+
// Helper Functions (to be implemented in swarm.ts)
|
|
224
|
+
// ========================================================================
|
|
225
|
+
|
|
226
|
+
function createTestSession(
|
|
227
|
+
sessionId: string,
|
|
228
|
+
epicId: string,
|
|
229
|
+
eventCount: number,
|
|
230
|
+
baseTimestamp?: number,
|
|
231
|
+
): void {
|
|
232
|
+
const filePath = join(TEST_SESSIONS_DIR, `${sessionId}.jsonl`);
|
|
233
|
+
const lines: string[] = [];
|
|
234
|
+
const base = baseTimestamp || Date.now();
|
|
235
|
+
|
|
236
|
+
for (let i = 0; i < eventCount; i++) {
|
|
237
|
+
const event: CoordinatorEvent = {
|
|
238
|
+
session_id: sessionId,
|
|
239
|
+
epic_id: epicId,
|
|
240
|
+
timestamp: new Date(base - (eventCount - i) * 1000).toISOString(),
|
|
241
|
+
event_type: "DECISION",
|
|
242
|
+
decision_type: "worker_spawned",
|
|
243
|
+
payload: { worker_id: `worker-${i}` },
|
|
244
|
+
};
|
|
245
|
+
lines.push(JSON.stringify(event));
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
writeFileSync(filePath, lines.join("\n") + "\n");
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Parse a session file and return events
|
|
253
|
+
*/
|
|
254
|
+
function parseSessionFile(filePath: string): CoordinatorEvent[] {
|
|
255
|
+
if (!existsSync(filePath)) {
|
|
256
|
+
throw new Error(`Session file not found: ${filePath}`);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const content = readFileSync(filePath, "utf-8");
|
|
260
|
+
const lines = content.split("\n").filter((line) => line.trim());
|
|
261
|
+
const events: CoordinatorEvent[] = [];
|
|
262
|
+
|
|
263
|
+
for (const line of lines) {
|
|
264
|
+
try {
|
|
265
|
+
const parsed = JSON.parse(line);
|
|
266
|
+
events.push(parsed);
|
|
267
|
+
} catch {
|
|
268
|
+
// Skip invalid JSON lines
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return events;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* List all session files in a directory
|
|
277
|
+
*/
|
|
278
|
+
function listSessionFiles(
|
|
279
|
+
dir: string,
|
|
280
|
+
): Array<{
|
|
281
|
+
session_id: string;
|
|
282
|
+
file_path: string;
|
|
283
|
+
event_count: number;
|
|
284
|
+
start_time: string;
|
|
285
|
+
end_time?: string;
|
|
286
|
+
}> {
|
|
287
|
+
if (!existsSync(dir)) return [];
|
|
288
|
+
|
|
289
|
+
const files = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
|
|
290
|
+
const sessions: Array<{
|
|
291
|
+
session_id: string;
|
|
292
|
+
file_path: string;
|
|
293
|
+
event_count: number;
|
|
294
|
+
start_time: string;
|
|
295
|
+
end_time?: string;
|
|
296
|
+
}> = [];
|
|
297
|
+
|
|
298
|
+
for (const file of files) {
|
|
299
|
+
const filePath = join(dir, file);
|
|
300
|
+
try {
|
|
301
|
+
const events = parseSessionFile(filePath);
|
|
302
|
+
if (events.length === 0) continue;
|
|
303
|
+
|
|
304
|
+
const timestamps = events.map((e) => new Date(e.timestamp).getTime());
|
|
305
|
+
const startTime = new Date(Math.min(...timestamps)).toISOString();
|
|
306
|
+
const endTime =
|
|
307
|
+
timestamps.length > 1
|
|
308
|
+
? new Date(Math.max(...timestamps)).toISOString()
|
|
309
|
+
: undefined;
|
|
310
|
+
|
|
311
|
+
sessions.push({
|
|
312
|
+
session_id: events[0].session_id,
|
|
313
|
+
file_path: filePath,
|
|
314
|
+
event_count: events.length,
|
|
315
|
+
start_time: startTime,
|
|
316
|
+
end_time: endTime,
|
|
317
|
+
});
|
|
318
|
+
} catch {
|
|
319
|
+
// Skip invalid files
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Sort by start time (newest first)
|
|
324
|
+
return sessions.sort((a, b) =>
|
|
325
|
+
new Date(b.start_time).getTime() - new Date(a.start_time).getTime()
|
|
326
|
+
);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Get the latest session file
|
|
331
|
+
*/
|
|
332
|
+
function getLatestSession(
|
|
333
|
+
dir: string,
|
|
334
|
+
): {
|
|
335
|
+
session_id: string;
|
|
336
|
+
file_path: string;
|
|
337
|
+
event_count: number;
|
|
338
|
+
start_time: string;
|
|
339
|
+
end_time?: string;
|
|
340
|
+
} | null {
|
|
341
|
+
const sessions = listSessionFiles(dir);
|
|
342
|
+
return sessions.length > 0 ? sessions[0] : null;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Filter events by type
|
|
347
|
+
*/
|
|
348
|
+
function filterEventsByType(
|
|
349
|
+
events: CoordinatorEvent[],
|
|
350
|
+
eventType: string,
|
|
351
|
+
): CoordinatorEvent[] {
|
|
352
|
+
if (eventType === "all") return events;
|
|
353
|
+
return events.filter((e) => e.event_type === eventType.toUpperCase());
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Filter events by time
|
|
358
|
+
*/
|
|
359
|
+
function filterEventsSince(
|
|
360
|
+
events: CoordinatorEvent[],
|
|
361
|
+
sinceMs: number,
|
|
362
|
+
): CoordinatorEvent[] {
|
|
363
|
+
const cutoffTime = Date.now() - sinceMs;
|
|
364
|
+
return events.filter((e) =>
|
|
365
|
+
new Date(e.timestamp).getTime() >= cutoffTime
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// ========================================================================
|
|
370
|
+
// Tests
|
|
371
|
+
// ========================================================================
|
|
372
|
+
|
|
373
|
+
describe("listSessionFiles", () => {
|
|
374
|
+
test("returns empty array when directory doesn't exist", () => {
|
|
375
|
+
const result = listSessionFiles("/nonexistent/directory");
|
|
376
|
+
expect(result).toEqual([]);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
test("returns empty array when directory is empty", () => {
|
|
380
|
+
const result = listSessionFiles(TEST_SESSIONS_DIR);
|
|
381
|
+
expect(result).toEqual([]);
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
test("lists all session files with metadata", () => {
|
|
385
|
+
createTestSession("ses_abc123", "epic-1", 5);
|
|
386
|
+
createTestSession("ses_def456", "epic-2", 3);
|
|
387
|
+
|
|
388
|
+
const result = listSessionFiles(TEST_SESSIONS_DIR);
|
|
389
|
+
|
|
390
|
+
expect(result).toHaveLength(2);
|
|
391
|
+
expect(result[0].session_id).toMatch(/^ses_/);
|
|
392
|
+
expect(result[0].event_count).toBeGreaterThan(0);
|
|
393
|
+
expect(result[0].start_time).toBeTruthy();
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
test("calculates event count correctly", () => {
|
|
397
|
+
createTestSession("ses_test", "epic-1", 10);
|
|
398
|
+
|
|
399
|
+
const result = listSessionFiles(TEST_SESSIONS_DIR);
|
|
400
|
+
|
|
401
|
+
expect(result[0].event_count).toBe(10);
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
test("extracts start and end times from events", () => {
|
|
405
|
+
createTestSession("ses_test", "epic-1", 5);
|
|
406
|
+
|
|
407
|
+
const result = listSessionFiles(TEST_SESSIONS_DIR);
|
|
408
|
+
|
|
409
|
+
expect(result[0].start_time).toBeTruthy();
|
|
410
|
+
expect(new Date(result[0].start_time).getTime()).toBeLessThan(Date.now());
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
test("sorts sessions by start time (newest first)", () => {
|
|
414
|
+
// Create sessions with explicit different timestamps
|
|
415
|
+
const oldTime = Date.now() - 60000; // 1 minute ago
|
|
416
|
+
const newTime = Date.now();
|
|
417
|
+
|
|
418
|
+
createTestSession("ses_old", "epic-1", 2, oldTime);
|
|
419
|
+
createTestSession("ses_new", "epic-2", 2, newTime);
|
|
420
|
+
|
|
421
|
+
const result = listSessionFiles(TEST_SESSIONS_DIR);
|
|
422
|
+
|
|
423
|
+
expect(result[0].session_id).toBe("ses_new");
|
|
424
|
+
expect(result[1].session_id).toBe("ses_old");
|
|
425
|
+
});
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
describe("parseSessionFile", () => {
|
|
429
|
+
test("parses valid JSONL session file", () => {
|
|
430
|
+
createTestSession("ses_parse", "epic-1", 3);
|
|
431
|
+
const filePath = join(TEST_SESSIONS_DIR, "ses_parse.jsonl");
|
|
432
|
+
|
|
433
|
+
const events = parseSessionFile(filePath);
|
|
434
|
+
|
|
435
|
+
expect(events).toHaveLength(3);
|
|
436
|
+
expect(events[0].session_id).toBe("ses_parse");
|
|
437
|
+
expect(events[0].event_type).toBe("DECISION");
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
test("handles file with trailing newlines", () => {
|
|
441
|
+
const filePath = join(TEST_SESSIONS_DIR, "ses_trailing.jsonl");
|
|
442
|
+
writeFileSync(
|
|
443
|
+
filePath,
|
|
444
|
+
'{"session_id":"test","epic_id":"e1","timestamp":"2025-01-01T00:00:00Z","event_type":"DECISION","decision_type":"worker_spawned","payload":{}}\n\n\n',
|
|
445
|
+
);
|
|
446
|
+
|
|
447
|
+
const events = parseSessionFile(filePath);
|
|
448
|
+
|
|
449
|
+
expect(events).toHaveLength(1);
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
test("skips invalid JSON lines", () => {
|
|
453
|
+
const filePath = join(TEST_SESSIONS_DIR, "ses_invalid.jsonl");
|
|
454
|
+
writeFileSync(
|
|
455
|
+
filePath,
|
|
456
|
+
'{"session_id":"test","epic_id":"e1","timestamp":"2025-01-01T00:00:00Z","event_type":"DECISION","decision_type":"worker_spawned","payload":{}}\ninvalid json\n{"session_id":"test","epic_id":"e1","timestamp":"2025-01-01T00:00:00Z","event_type":"OUTCOME","outcome_type":"subtask_success","payload":{}}\n',
|
|
457
|
+
);
|
|
458
|
+
|
|
459
|
+
const events = parseSessionFile(filePath);
|
|
460
|
+
|
|
461
|
+
expect(events).toHaveLength(2);
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
test("throws error for non-existent file", () => {
|
|
465
|
+
expect(() => parseSessionFile("/nonexistent/file.jsonl")).toThrow();
|
|
466
|
+
});
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
describe("getLatestSession", () => {
|
|
470
|
+
test("returns null when directory is empty", () => {
|
|
471
|
+
const result = getLatestSession(TEST_SESSIONS_DIR);
|
|
472
|
+
expect(result).toBeNull();
|
|
473
|
+
});
|
|
474
|
+
|
|
475
|
+
test("returns the most recent session", () => {
|
|
476
|
+
const oldTime = Date.now() - 60000; // 1 minute ago
|
|
477
|
+
const newTime = Date.now();
|
|
478
|
+
|
|
479
|
+
createTestSession("ses_old", "epic-1", 2, oldTime);
|
|
480
|
+
createTestSession("ses_new", "epic-2", 3, newTime);
|
|
481
|
+
|
|
482
|
+
const result = getLatestSession(TEST_SESSIONS_DIR);
|
|
483
|
+
|
|
484
|
+
expect(result).not.toBeNull();
|
|
485
|
+
expect(result!.session_id).toBe("ses_new");
|
|
486
|
+
});
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
describe("filterEventsByType", () => {
|
|
490
|
+
test("filters DECISION events only", () => {
|
|
491
|
+
const events: CoordinatorEvent[] = [
|
|
492
|
+
{
|
|
493
|
+
session_id: "s1",
|
|
494
|
+
epic_id: "e1",
|
|
495
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
496
|
+
event_type: "DECISION",
|
|
497
|
+
decision_type: "worker_spawned",
|
|
498
|
+
payload: {},
|
|
499
|
+
},
|
|
500
|
+
{
|
|
501
|
+
session_id: "s1",
|
|
502
|
+
epic_id: "e1",
|
|
503
|
+
timestamp: "2025-01-01T00:00:01Z",
|
|
504
|
+
event_type: "VIOLATION",
|
|
505
|
+
violation_type: "direct_edit",
|
|
506
|
+
payload: {},
|
|
507
|
+
},
|
|
508
|
+
{
|
|
509
|
+
session_id: "s1",
|
|
510
|
+
epic_id: "e1",
|
|
511
|
+
timestamp: "2025-01-01T00:00:02Z",
|
|
512
|
+
event_type: "DECISION",
|
|
513
|
+
decision_type: "worker_spawned",
|
|
514
|
+
payload: {},
|
|
515
|
+
},
|
|
516
|
+
];
|
|
517
|
+
|
|
518
|
+
const result = filterEventsByType(events, "DECISION");
|
|
519
|
+
|
|
520
|
+
expect(result).toHaveLength(2);
|
|
521
|
+
expect(result.every((e) => e.event_type === "DECISION")).toBe(true);
|
|
522
|
+
});
|
|
523
|
+
|
|
524
|
+
test("returns all events when type is 'all'", () => {
|
|
525
|
+
const events: CoordinatorEvent[] = [
|
|
526
|
+
{
|
|
527
|
+
session_id: "s1",
|
|
528
|
+
epic_id: "e1",
|
|
529
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
530
|
+
event_type: "DECISION",
|
|
531
|
+
decision_type: "worker_spawned",
|
|
532
|
+
payload: {},
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
session_id: "s1",
|
|
536
|
+
epic_id: "e1",
|
|
537
|
+
timestamp: "2025-01-01T00:00:01Z",
|
|
538
|
+
event_type: "VIOLATION",
|
|
539
|
+
violation_type: "direct_edit",
|
|
540
|
+
payload: {},
|
|
541
|
+
},
|
|
542
|
+
];
|
|
543
|
+
|
|
544
|
+
const result = filterEventsByType(events, "all");
|
|
545
|
+
|
|
546
|
+
expect(result).toHaveLength(2);
|
|
547
|
+
});
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
describe("filterEventsSince", () => {
|
|
551
|
+
test("filters events within time window", () => {
|
|
552
|
+
const now = Date.now();
|
|
553
|
+
const events: CoordinatorEvent[] = [
|
|
554
|
+
{
|
|
555
|
+
session_id: "s1",
|
|
556
|
+
epic_id: "e1",
|
|
557
|
+
timestamp: new Date(now - 5000).toISOString(), // 5s ago
|
|
558
|
+
event_type: "DECISION",
|
|
559
|
+
decision_type: "worker_spawned",
|
|
560
|
+
payload: {},
|
|
561
|
+
},
|
|
562
|
+
{
|
|
563
|
+
session_id: "s1",
|
|
564
|
+
epic_id: "e1",
|
|
565
|
+
timestamp: new Date(now - 10000).toISOString(), // 10s ago
|
|
566
|
+
event_type: "DECISION",
|
|
567
|
+
decision_type: "worker_spawned",
|
|
568
|
+
payload: {},
|
|
569
|
+
},
|
|
570
|
+
{
|
|
571
|
+
session_id: "s1",
|
|
572
|
+
epic_id: "e1",
|
|
573
|
+
timestamp: new Date(now - 60000).toISOString(), // 1min ago
|
|
574
|
+
event_type: "DECISION",
|
|
575
|
+
decision_type: "worker_spawned",
|
|
576
|
+
payload: {},
|
|
577
|
+
},
|
|
578
|
+
];
|
|
579
|
+
|
|
580
|
+
const result = filterEventsSince(events, 30000); // Last 30s
|
|
581
|
+
|
|
582
|
+
expect(result).toHaveLength(2); // 10s and 3s ago
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
test("returns all events when sinceMs is very large", () => {
|
|
586
|
+
const now = Date.now();
|
|
587
|
+
const events: CoordinatorEvent[] = [
|
|
588
|
+
{
|
|
589
|
+
session_id: "s1",
|
|
590
|
+
epic_id: "e1",
|
|
591
|
+
timestamp: new Date(now - 1000).toISOString(),
|
|
592
|
+
event_type: "DECISION",
|
|
593
|
+
decision_type: "worker_spawned",
|
|
594
|
+
payload: {},
|
|
595
|
+
},
|
|
596
|
+
];
|
|
597
|
+
|
|
598
|
+
const result = filterEventsSince(events, 86400000); // 1 day
|
|
599
|
+
|
|
600
|
+
expect(result).toHaveLength(1);
|
|
601
|
+
});
|
|
602
|
+
});
|
|
603
|
+
});
|
|
604
|
+
|
|
200
605
|
// ============================================================================
|
|
201
606
|
// Cells Command Tests (TDD)
|
|
202
607
|
// ============================================================================
|
|
@@ -276,841 +681,771 @@ describe("Cells command", () => {
|
|
|
276
681
|
},
|
|
277
682
|
];
|
|
278
683
|
|
|
279
|
-
const
|
|
280
|
-
|
|
281
|
-
// Should contain headers
|
|
282
|
-
expect(table).toContain("ID");
|
|
283
|
-
expect(table).toContain("TITLE");
|
|
284
|
-
expect(table).toContain("STATUS");
|
|
285
|
-
expect(table).toContain("PRIORITY");
|
|
286
|
-
|
|
287
|
-
// Should contain cell data
|
|
288
|
-
expect(table).toContain("test-abc123-xyz");
|
|
289
|
-
expect(table).toContain("Fix bug");
|
|
290
|
-
expect(table).toContain("open");
|
|
291
|
-
expect(table).toContain("0");
|
|
292
|
-
|
|
293
|
-
expect(table).toContain("test-def456-abc");
|
|
294
|
-
expect(table).toContain("Add feature");
|
|
295
|
-
expect(table).toContain("in_progress");
|
|
296
|
-
expect(table).toContain("2");
|
|
297
|
-
});
|
|
684
|
+
const result = formatCellsTable(cells);
|
|
298
685
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
expect(
|
|
686
|
+
expect(result).toContain("ID");
|
|
687
|
+
expect(result).toContain("TITLE");
|
|
688
|
+
expect(result).toContain("STATUS");
|
|
689
|
+
expect(result).toContain("PRIORITY");
|
|
690
|
+
expect(result).toContain("Fix bug");
|
|
691
|
+
expect(result).toContain("Add feature");
|
|
692
|
+
expect(result).toContain("open");
|
|
693
|
+
expect(result).toContain("in_progress");
|
|
302
694
|
});
|
|
303
|
-
});
|
|
304
|
-
});
|
|
305
|
-
|
|
306
|
-
describe("Log command helpers", () => {
|
|
307
|
-
let testDir: string;
|
|
308
|
-
|
|
309
|
-
beforeEach(() => {
|
|
310
|
-
testDir = join(tmpdir(), `swarm-log-test-${Date.now()}`);
|
|
311
|
-
mkdirSync(testDir, { recursive: true });
|
|
312
|
-
});
|
|
313
|
-
|
|
314
|
-
afterEach(() => {
|
|
315
|
-
if (existsSync(testDir)) {
|
|
316
|
-
rmSync(testDir, { recursive: true, force: true });
|
|
317
|
-
}
|
|
318
|
-
});
|
|
319
|
-
|
|
320
|
-
describe("parseLogLine", () => {
|
|
321
|
-
function parseLogLine(line: string): { level: number; time: string; module: string; msg: string } | null {
|
|
322
|
-
try {
|
|
323
|
-
const parsed = JSON.parse(line);
|
|
324
|
-
if (typeof parsed.level === "number" && parsed.time && parsed.msg) {
|
|
325
|
-
return {
|
|
326
|
-
level: parsed.level,
|
|
327
|
-
time: parsed.time,
|
|
328
|
-
module: parsed.module || "unknown",
|
|
329
|
-
msg: parsed.msg,
|
|
330
|
-
};
|
|
331
|
-
}
|
|
332
|
-
} catch {
|
|
333
|
-
// Invalid JSON
|
|
334
|
-
}
|
|
335
|
-
return null;
|
|
336
|
-
}
|
|
337
695
|
|
|
338
|
-
test("
|
|
339
|
-
const
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
696
|
+
test("truncates long titles with ellipsis", () => {
|
|
697
|
+
const cells = [
|
|
698
|
+
{
|
|
699
|
+
id: "test-abc",
|
|
700
|
+
title: "A".repeat(100),
|
|
701
|
+
status: "open",
|
|
702
|
+
priority: 0,
|
|
703
|
+
type: "task",
|
|
704
|
+
created_at: 1234567890,
|
|
705
|
+
updated_at: 1234567890,
|
|
706
|
+
},
|
|
707
|
+
];
|
|
347
708
|
|
|
348
|
-
|
|
349
|
-
const line = "not json";
|
|
350
|
-
expect(parseLogLine(line)).toBeNull();
|
|
351
|
-
});
|
|
709
|
+
const result = formatCellsTable(cells);
|
|
352
710
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
const result = parseLogLine(line);
|
|
356
|
-
|
|
357
|
-
expect(result?.module).toBe("unknown");
|
|
711
|
+
expect(result).toContain("...");
|
|
712
|
+
expect(result.split("\n")[2]).toMatch(/A{47}\.\.\./);
|
|
358
713
|
});
|
|
359
|
-
});
|
|
360
714
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
return logs.filter((log) => log.level >= minLevel);
|
|
364
|
-
}
|
|
715
|
+
test("returns 'No cells found' for empty array", () => {
|
|
716
|
+
const result = formatCellsTable([]);
|
|
365
717
|
|
|
366
|
-
|
|
367
|
-
const logs = [
|
|
368
|
-
{ level: 10 }, // trace
|
|
369
|
-
{ level: 30 }, // info
|
|
370
|
-
{ level: 50 }, // error
|
|
371
|
-
];
|
|
372
|
-
|
|
373
|
-
const result = filterLogsByLevel(logs, 30);
|
|
374
|
-
expect(result).toHaveLength(2);
|
|
375
|
-
expect(result[0].level).toBe(30);
|
|
376
|
-
expect(result[1].level).toBe(50);
|
|
718
|
+
expect(result).toBe("No cells found");
|
|
377
719
|
});
|
|
378
720
|
|
|
379
|
-
test("
|
|
380
|
-
const
|
|
381
|
-
{
|
|
382
|
-
|
|
383
|
-
|
|
721
|
+
test("aligns columns correctly", () => {
|
|
722
|
+
const cells = [
|
|
723
|
+
{
|
|
724
|
+
id: "short",
|
|
725
|
+
title: "T",
|
|
726
|
+
status: "open",
|
|
727
|
+
priority: 0,
|
|
728
|
+
type: "task",
|
|
729
|
+
created_at: 1234567890,
|
|
730
|
+
updated_at: 1234567890,
|
|
731
|
+
},
|
|
732
|
+
{
|
|
733
|
+
id: "very-long-id-here",
|
|
734
|
+
title: "Very long title here",
|
|
735
|
+
status: "in_progress",
|
|
736
|
+
priority: 2,
|
|
737
|
+
type: "task",
|
|
738
|
+
created_at: 1234567890,
|
|
739
|
+
updated_at: 1234567890,
|
|
740
|
+
},
|
|
384
741
|
];
|
|
385
|
-
|
|
386
|
-
const result = filterLogsByLevel(logs, 0);
|
|
387
|
-
expect(result).toHaveLength(3);
|
|
388
|
-
});
|
|
389
|
-
});
|
|
390
742
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
return logs.filter((log) => log.module === module);
|
|
394
|
-
}
|
|
743
|
+
const result = formatCellsTable(cells);
|
|
744
|
+
const lines = result.split("\n");
|
|
395
745
|
|
|
396
|
-
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
{ module: "swarm" },
|
|
400
|
-
{ module: "compaction" },
|
|
401
|
-
];
|
|
402
|
-
|
|
403
|
-
const result = filterLogsByModule(logs, "compaction");
|
|
404
|
-
expect(result).toHaveLength(2);
|
|
405
|
-
});
|
|
406
|
-
|
|
407
|
-
test("returns empty array when no match", () => {
|
|
408
|
-
const logs = [
|
|
409
|
-
{ module: "compaction" },
|
|
410
|
-
];
|
|
411
|
-
|
|
412
|
-
const result = filterLogsByModule(logs, "swarm");
|
|
413
|
-
expect(result).toHaveLength(0);
|
|
746
|
+
// All lines should be same length (aligned)
|
|
747
|
+
const lengths = lines.map(l => l.length);
|
|
748
|
+
expect(Math.max(...lengths) - Math.min(...lengths)).toBeLessThan(3);
|
|
414
749
|
});
|
|
415
750
|
});
|
|
751
|
+
});
|
|
416
752
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
if (!match) return null;
|
|
421
|
-
|
|
422
|
-
const [, num, unit] = match;
|
|
423
|
-
const value = parseInt(num, 10);
|
|
424
|
-
|
|
425
|
-
const multipliers: Record<string, number> = {
|
|
426
|
-
s: 1000,
|
|
427
|
-
m: 60 * 1000,
|
|
428
|
-
h: 60 * 60 * 1000,
|
|
429
|
-
d: 24 * 60 * 60 * 1000,
|
|
430
|
-
};
|
|
431
|
-
|
|
432
|
-
return value * multipliers[unit];
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
function filterLogsBySince(logs: Array<{ time: string }>, sinceMs: number): Array<{ time: string }> {
|
|
436
|
-
const cutoffTime = Date.now() - sinceMs;
|
|
437
|
-
return logs.filter((log) => new Date(log.time).getTime() >= cutoffTime);
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
test("parseDuration handles seconds", () => {
|
|
441
|
-
expect(parseDuration("30s")).toBe(30 * 1000);
|
|
442
|
-
});
|
|
753
|
+
// ============================================================================
|
|
754
|
+
// Eval Gate Tests (TDD)
|
|
755
|
+
// ============================================================================
|
|
443
756
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
757
|
+
interface EvalRunRecord {
|
|
758
|
+
timestamp: string;
|
|
759
|
+
eval_name: string;
|
|
760
|
+
score: number;
|
|
761
|
+
run_count: number;
|
|
762
|
+
}
|
|
447
763
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
764
|
+
interface GateResult {
|
|
765
|
+
passed: boolean;
|
|
766
|
+
phase: "bootstrap" | "stabilization" | "production";
|
|
767
|
+
message: string;
|
|
768
|
+
baseline?: number;
|
|
769
|
+
variance?: number;
|
|
770
|
+
}
|
|
451
771
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
772
|
+
/**
|
|
773
|
+
* Calculate variance for phase transitions
|
|
774
|
+
*/
|
|
775
|
+
function calculateVariance(scores: number[]): number {
|
|
776
|
+
if (scores.length <= 1) return 0;
|
|
455
777
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
expect(parseDuration("30")).toBeNull();
|
|
460
|
-
});
|
|
778
|
+
const mean = scores.reduce((sum, x) => sum + x, 0) / scores.length;
|
|
779
|
+
const squaredDiffs = scores.map((x) => Math.pow(x - mean, 2));
|
|
780
|
+
const variance = squaredDiffs.reduce((sum, x) => sum + x, 0) / scores.length;
|
|
461
781
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
const logs = [
|
|
465
|
-
{ time: new Date(now - 10000).toISOString() }, // 10s ago
|
|
466
|
-
{ time: new Date(now - 120000).toISOString() }, // 2m ago
|
|
467
|
-
{ time: new Date(now - 1000).toISOString() }, // 1s ago
|
|
468
|
-
];
|
|
469
|
-
|
|
470
|
-
const result = filterLogsBySince(logs, 60000); // Last 1m
|
|
471
|
-
expect(result).toHaveLength(2); // Only logs within last minute
|
|
472
|
-
});
|
|
473
|
-
});
|
|
782
|
+
return variance;
|
|
783
|
+
}
|
|
474
784
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
if (level >= 30) return "INFO ";
|
|
481
|
-
if (level >= 20) return "DEBUG";
|
|
482
|
-
return "TRACE";
|
|
483
|
-
}
|
|
785
|
+
/**
|
|
786
|
+
* Read all eval run records from .hive/eval-history.jsonl
|
|
787
|
+
*/
|
|
788
|
+
function readAllRecords(projectPath: string): EvalRunRecord[] {
|
|
789
|
+
const recordsPath = join(projectPath, ".hive", "eval-history.jsonl");
|
|
484
790
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
const module = log.module.padEnd(12);
|
|
489
|
-
return `${timestamp} ${levelName} ${module} ${log.msg}`;
|
|
490
|
-
}
|
|
791
|
+
if (!existsSync(recordsPath)) {
|
|
792
|
+
return [];
|
|
793
|
+
}
|
|
491
794
|
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
level: 30,
|
|
495
|
-
time: "2024-12-24T16:00:00.000Z",
|
|
496
|
-
module: "compaction",
|
|
497
|
-
msg: "started",
|
|
498
|
-
};
|
|
499
|
-
|
|
500
|
-
const result = formatLogLine(log);
|
|
501
|
-
expect(result).toContain("INFO");
|
|
502
|
-
expect(result).toContain("compaction");
|
|
503
|
-
expect(result).toContain("started");
|
|
504
|
-
});
|
|
795
|
+
const content = readFileSync(recordsPath, "utf-8");
|
|
796
|
+
const lines = content.split("\n").filter((line) => line.trim());
|
|
505
797
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
const log2 = formatLogLine({ level: 30, time: "2024-12-24T16:00:00.000Z", module: "compaction", msg: "test" });
|
|
509
|
-
|
|
510
|
-
// Module names should be padded to 12 chars
|
|
511
|
-
expect(log1).toContain("a test"); // 'a' + 11 spaces
|
|
512
|
-
expect(log2).toContain("compaction test"); // 'compaction' + 3 spaces (10 chars + 2)
|
|
513
|
-
});
|
|
798
|
+
return lines.map((line) => JSON.parse(line) as EvalRunRecord);
|
|
799
|
+
}
|
|
514
800
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
801
|
+
/**
|
|
802
|
+
* Record an eval run to .hive/eval-history.jsonl
|
|
803
|
+
*/
|
|
804
|
+
function recordEvalRun(
|
|
805
|
+
projectPath: string,
|
|
806
|
+
record: EvalRunRecord,
|
|
807
|
+
): void {
|
|
808
|
+
const hivePath = join(projectPath, ".hive");
|
|
809
|
+
const recordsPath = join(hivePath, "eval-history.jsonl");
|
|
810
|
+
|
|
811
|
+
// Ensure .hive directory exists
|
|
812
|
+
if (!existsSync(hivePath)) {
|
|
813
|
+
mkdirSync(hivePath, { recursive: true });
|
|
814
|
+
}
|
|
524
815
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
// Create test log files
|
|
528
|
-
const log1 = join(testDir, "swarm.1log");
|
|
529
|
-
const log2 = join(testDir, "swarm.2log");
|
|
530
|
-
const log3 = join(testDir, "compaction.1log");
|
|
531
|
-
|
|
532
|
-
writeFileSync(log1, '{"level":30,"time":"2024-12-24T16:00:00.000Z","msg":"line1"}\n');
|
|
533
|
-
writeFileSync(log2, '{"level":30,"time":"2024-12-24T16:00:01.000Z","msg":"line2"}\n');
|
|
534
|
-
writeFileSync(log3, '{"level":30,"time":"2024-12-24T16:00:02.000Z","module":"compaction","msg":"line3"}\n');
|
|
535
|
-
|
|
536
|
-
function readLogFiles(dir: string): string[] {
|
|
537
|
-
if (!existsSync(dir)) return [];
|
|
538
|
-
|
|
539
|
-
const files = readdirSync(dir)
|
|
540
|
-
.filter((f) => /\.\d+log$/.test(f))
|
|
541
|
-
.sort() // Sort by filename
|
|
542
|
-
.map((f) => join(dir, f));
|
|
543
|
-
|
|
544
|
-
const lines: string[] = [];
|
|
545
|
-
for (const file of files) {
|
|
546
|
-
const content = readFileSync(file, "utf-8");
|
|
547
|
-
lines.push(...content.split("\n").filter((line) => line.trim()));
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
return lines;
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
const lines = readLogFiles(testDir);
|
|
554
|
-
expect(lines).toHaveLength(3);
|
|
555
|
-
// Files are sorted alphabetically: compaction.1log, swarm.1log, swarm.2log
|
|
556
|
-
expect(lines.some((l) => l.includes("line1"))).toBe(true);
|
|
557
|
-
expect(lines.some((l) => l.includes("line2"))).toBe(true);
|
|
558
|
-
expect(lines.some((l) => l.includes("line3"))).toBe(true);
|
|
559
|
-
});
|
|
816
|
+
// Append record as JSONL
|
|
817
|
+
const line = JSON.stringify(record) + "\n";
|
|
560
818
|
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
expect(lines).toHaveLength(0);
|
|
569
|
-
});
|
|
570
|
-
});
|
|
819
|
+
if (existsSync(recordsPath)) {
|
|
820
|
+
const existingContent = readFileSync(recordsPath, "utf-8");
|
|
821
|
+
writeFileSync(recordsPath, existingContent + line);
|
|
822
|
+
} else {
|
|
823
|
+
writeFileSync(recordsPath, line);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
571
826
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
// Track file position for incremental reads
|
|
585
|
-
let lastSize = 0;
|
|
586
|
-
|
|
587
|
-
function readNewLines(filePath: string): string[] {
|
|
588
|
-
const content = readFileSync(filePath, "utf-8");
|
|
589
|
-
const newContent = content.slice(lastSize);
|
|
590
|
-
lastSize = content.length;
|
|
591
|
-
return newContent.split("\n").filter((line) => line.trim());
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
// Simulate watch behavior
|
|
595
|
-
const watcher = watch(testDir, (eventType, filename) => {
|
|
596
|
-
if (filename && /\.\d+log$/.test(filename)) {
|
|
597
|
-
const newLines = readNewLines(join(testDir, filename));
|
|
598
|
-
collectedLines.push(...newLines);
|
|
599
|
-
}
|
|
600
|
-
});
|
|
601
|
-
|
|
602
|
-
// Wait for watcher to be ready
|
|
603
|
-
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
604
|
-
|
|
605
|
-
// Append new log line
|
|
606
|
-
appendFileSync(logFile, '{"level":30,"time":"2024-12-24T16:00:01.000Z","msg":"appended"}\n');
|
|
607
|
-
|
|
608
|
-
// Wait for event to fire
|
|
609
|
-
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
610
|
-
|
|
611
|
-
watcher.close();
|
|
612
|
-
|
|
613
|
-
// Should have detected the new line
|
|
614
|
-
expect(collectedLines.some((l) => l.includes("appended"))).toBe(true);
|
|
615
|
-
});
|
|
827
|
+
/**
|
|
828
|
+
* Check eval gate for progressive gating
|
|
829
|
+
*/
|
|
830
|
+
function checkGate(
|
|
831
|
+
projectPath: string,
|
|
832
|
+
evalName: string,
|
|
833
|
+
currentScore: number,
|
|
834
|
+
): GateResult {
|
|
835
|
+
const records = readAllRecords(projectPath).filter(
|
|
836
|
+
(r) => r.eval_name === evalName,
|
|
837
|
+
);
|
|
616
838
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
if (arg === "--watch" || arg === "-w") {
|
|
625
|
-
watch = true;
|
|
626
|
-
} else if (arg === "--interval" && i + 1 < args.length) {
|
|
627
|
-
interval = parseInt(args[++i], 10);
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
return { watch, interval };
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
expect(parseWatchArgs(["--watch"])).toEqual({ watch: true, interval: 1000 });
|
|
635
|
-
expect(parseWatchArgs(["-w"])).toEqual({ watch: true, interval: 1000 });
|
|
636
|
-
expect(parseWatchArgs(["--watch", "--interval", "500"])).toEqual({ watch: true, interval: 500 });
|
|
637
|
-
expect(parseWatchArgs(["compaction", "--watch"])).toEqual({ watch: true, interval: 1000 });
|
|
638
|
-
expect(parseWatchArgs(["--level", "error"])).toEqual({ watch: false, interval: 1000 });
|
|
639
|
-
});
|
|
640
|
-
});
|
|
641
|
-
});
|
|
839
|
+
if (records.length < 10) {
|
|
840
|
+
return {
|
|
841
|
+
passed: true,
|
|
842
|
+
phase: "bootstrap",
|
|
843
|
+
message: `BOOTSTRAP (${records.length}/10 runs): no gates yet`,
|
|
844
|
+
};
|
|
845
|
+
}
|
|
642
846
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
847
|
+
const lastTenScores = records.slice(-10).map((r) => r.score);
|
|
848
|
+
const baseline = lastTenScores.reduce((sum, x) => sum + x, 0) / lastTenScores.length;
|
|
849
|
+
const variance = calculateVariance(lastTenScores);
|
|
646
850
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
},
|
|
657
|
-
recentScores: [
|
|
658
|
-
{ timestamp: "2024-12-24T10:00:00.000Z", score: 0.85 },
|
|
659
|
-
{ timestamp: "2024-12-24T11:00:00.000Z", score: 0.87 },
|
|
660
|
-
{ timestamp: "2024-12-24T12:00:00.000Z", score: 0.82 },
|
|
661
|
-
],
|
|
851
|
+
if (records.length < 50) {
|
|
852
|
+
const drop = ((baseline - currentScore) / baseline) * 100;
|
|
853
|
+
if (drop > 5) {
|
|
854
|
+
return {
|
|
855
|
+
passed: false,
|
|
856
|
+
phase: "stabilization",
|
|
857
|
+
message: `WARN: Score dropped ${drop.toFixed(1)}% from baseline ${baseline.toFixed(2)}`,
|
|
858
|
+
baseline,
|
|
859
|
+
variance,
|
|
662
860
|
};
|
|
861
|
+
}
|
|
663
862
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
// Should show thresholds
|
|
673
|
-
expect(output).toContain("10%"); // stabilization threshold
|
|
674
|
-
expect(output).toContain("5%"); // production threshold
|
|
675
|
-
|
|
676
|
-
// Should show recent scores
|
|
677
|
-
expect(output).toContain("0.85");
|
|
678
|
-
expect(output).toContain("0.87");
|
|
679
|
-
expect(output).toContain("0.82");
|
|
680
|
-
});
|
|
863
|
+
return {
|
|
864
|
+
passed: true,
|
|
865
|
+
phase: "stabilization",
|
|
866
|
+
message: `Stabilization (${records.length}/50 runs): baseline=${baseline.toFixed(2)}`,
|
|
867
|
+
baseline,
|
|
868
|
+
variance,
|
|
869
|
+
};
|
|
870
|
+
}
|
|
681
871
|
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
}
|
|
690
|
-
|
|
872
|
+
// Production phase: variance < 0.1 AND score doesn't drop >5%
|
|
873
|
+
if (variance < 0.1) {
|
|
874
|
+
const drop = ((baseline - currentScore) / baseline) * 100;
|
|
875
|
+
if (drop > 5) {
|
|
876
|
+
return {
|
|
877
|
+
passed: false,
|
|
878
|
+
phase: "production",
|
|
879
|
+
message: `FAIL: Score dropped ${drop.toFixed(1)}% from baseline ${baseline.toFixed(2)} (variance=${variance.toFixed(3)})`,
|
|
880
|
+
baseline,
|
|
881
|
+
variance,
|
|
691
882
|
};
|
|
883
|
+
}
|
|
692
884
|
|
|
693
|
-
|
|
885
|
+
return {
|
|
886
|
+
passed: true,
|
|
887
|
+
phase: "production",
|
|
888
|
+
message: `PASS: Production phase (variance=${variance.toFixed(3)}, baseline=${baseline.toFixed(2)})`,
|
|
889
|
+
baseline,
|
|
890
|
+
variance,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
694
893
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
894
|
+
// Stuck in stabilization (>50 runs but variance still high)
|
|
895
|
+
return {
|
|
896
|
+
passed: true,
|
|
897
|
+
phase: "stabilization",
|
|
898
|
+
message: `Stabilization: variance too high (${variance.toFixed(3)} > 0.1), need more consistent runs`,
|
|
899
|
+
baseline,
|
|
900
|
+
variance,
|
|
901
|
+
};
|
|
902
|
+
}
|
|
698
903
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
904
|
+
/**
|
|
905
|
+
* Ensure .hive directory exists
|
|
906
|
+
*/
|
|
907
|
+
function ensureHiveDirectory(projectPath: string): void {
|
|
908
|
+
const hivePath = join(projectPath, ".hive");
|
|
909
|
+
if (!existsSync(hivePath)) {
|
|
910
|
+
mkdirSync(hivePath, { recursive: true });
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
describe("Eval gate", () => {
|
|
915
|
+
let testDir: string;
|
|
709
916
|
|
|
710
|
-
|
|
917
|
+
beforeEach(() => {
|
|
918
|
+
testDir = join(tmpdir(), `eval-gate-test-${Date.now()}`);
|
|
919
|
+
mkdirSync(testDir, { recursive: true });
|
|
920
|
+
});
|
|
711
921
|
|
|
712
|
-
|
|
713
|
-
|
|
922
|
+
afterEach(() => {
|
|
923
|
+
if (existsSync(testDir)) {
|
|
924
|
+
rmSync(testDir, { recursive: true, force: true });
|
|
925
|
+
}
|
|
714
926
|
});
|
|
715
927
|
|
|
716
|
-
describe("
|
|
717
|
-
test("
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
run_count: 2,
|
|
730
|
-
},
|
|
731
|
-
{
|
|
732
|
-
timestamp: "2024-12-24T12:00:00.000Z",
|
|
733
|
-
eval_name: "coordinator-behavior",
|
|
734
|
-
score: 0.92,
|
|
735
|
-
run_count: 1,
|
|
736
|
-
},
|
|
737
|
-
];
|
|
928
|
+
describe("Bootstrap phase (<10 runs)", () => {
|
|
929
|
+
test("allows any score", () => {
|
|
930
|
+
ensureHiveDirectory(testDir);
|
|
931
|
+
|
|
932
|
+
// Record 5 runs
|
|
933
|
+
for (let i = 0; i < 5; i++) {
|
|
934
|
+
recordEvalRun(testDir, {
|
|
935
|
+
timestamp: new Date().toISOString(),
|
|
936
|
+
eval_name: "test-eval",
|
|
937
|
+
score: 0.5 + i * 0.1,
|
|
938
|
+
run_count: i + 1,
|
|
939
|
+
});
|
|
940
|
+
}
|
|
738
941
|
|
|
739
|
-
const
|
|
942
|
+
const result = checkGate(testDir, "test-eval", 0.3); // Low score
|
|
740
943
|
|
|
741
|
-
|
|
742
|
-
expect(
|
|
743
|
-
expect(
|
|
744
|
-
|
|
745
|
-
// Should show scores
|
|
746
|
-
expect(output).toContain("0.85");
|
|
747
|
-
expect(output).toContain("0.87");
|
|
748
|
-
expect(output).toContain("0.92");
|
|
749
|
-
|
|
750
|
-
// Should show run counts
|
|
751
|
-
expect(output).toContain("run #1");
|
|
752
|
-
expect(output).toContain("run #2");
|
|
944
|
+
expect(result.passed).toBe(true);
|
|
945
|
+
expect(result.phase).toBe("bootstrap");
|
|
946
|
+
expect(result.message).toContain("BOOTSTRAP");
|
|
753
947
|
});
|
|
754
948
|
|
|
755
|
-
test("
|
|
756
|
-
|
|
757
|
-
expect(output).toContain("No eval history");
|
|
758
|
-
});
|
|
949
|
+
test("counts runs correctly", () => {
|
|
950
|
+
ensureHiveDirectory(testDir);
|
|
759
951
|
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
];
|
|
952
|
+
for (let i = 0; i < 7; i++) {
|
|
953
|
+
recordEvalRun(testDir, {
|
|
954
|
+
timestamp: new Date().toISOString(),
|
|
955
|
+
eval_name: "test-eval",
|
|
956
|
+
score: 0.8,
|
|
957
|
+
run_count: i + 1,
|
|
958
|
+
});
|
|
959
|
+
}
|
|
769
960
|
|
|
770
|
-
const
|
|
961
|
+
const result = checkGate(testDir, "test-eval", 0.8);
|
|
771
962
|
|
|
772
|
-
|
|
773
|
-
expect(
|
|
774
|
-
expect(output).toMatch(/\d{1,2}:\d{2}/); // Time format
|
|
963
|
+
expect(result.phase).toBe("bootstrap");
|
|
964
|
+
expect(result.message).toContain("7/10");
|
|
775
965
|
});
|
|
776
966
|
});
|
|
777
967
|
|
|
778
|
-
describe("
|
|
779
|
-
test("
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
expect(sparkline).toContain("█"); // High score
|
|
792
|
-
});
|
|
968
|
+
describe("Stabilization phase (10-50 runs)", () => {
|
|
969
|
+
test("warns on >5% regression", () => {
|
|
970
|
+
ensureHiveDirectory(testDir);
|
|
971
|
+
|
|
972
|
+
// Record 20 runs with consistent 0.9 score
|
|
973
|
+
for (let i = 0; i < 20; i++) {
|
|
974
|
+
recordEvalRun(testDir, {
|
|
975
|
+
timestamp: new Date().toISOString(),
|
|
976
|
+
eval_name: "test-eval",
|
|
977
|
+
score: 0.9,
|
|
978
|
+
run_count: i + 1,
|
|
979
|
+
});
|
|
980
|
+
}
|
|
793
981
|
|
|
794
|
-
|
|
795
|
-
const
|
|
796
|
-
|
|
797
|
-
expect(sparkline).toMatch(/[▁▂▃▄▅▆▇█]/);
|
|
798
|
-
});
|
|
982
|
+
// Test with regressed score (>5% drop from 0.9 baseline)
|
|
983
|
+
const regressedScore = 0.85; // 5.5% drop
|
|
984
|
+
const result = checkGate(testDir, "test-eval", regressedScore);
|
|
799
985
|
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
expect(
|
|
803
|
-
|
|
804
|
-
expect(new Set(sparkline.split("")).size).toBe(1);
|
|
986
|
+
expect(result.passed).toBe(false);
|
|
987
|
+
expect(result.phase).toBe("stabilization");
|
|
988
|
+
expect(result.message).toContain("WARN");
|
|
989
|
+
expect(result.baseline).toBeCloseTo(0.9, 2);
|
|
805
990
|
});
|
|
806
991
|
|
|
807
|
-
test("
|
|
808
|
-
|
|
809
|
-
expect(sparkline).toBe("");
|
|
810
|
-
});
|
|
811
|
-
});
|
|
992
|
+
test("passes when score is stable", () => {
|
|
993
|
+
ensureHiveDirectory(testDir);
|
|
812
994
|
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
regressionPercent: 0.025,
|
|
822
|
-
};
|
|
995
|
+
for (let i = 0; i < 25; i++) {
|
|
996
|
+
recordEvalRun(testDir, {
|
|
997
|
+
timestamp: new Date().toISOString(),
|
|
998
|
+
eval_name: "test-eval",
|
|
999
|
+
score: 0.85,
|
|
1000
|
+
run_count: i + 1,
|
|
1001
|
+
});
|
|
1002
|
+
}
|
|
823
1003
|
|
|
824
|
-
const
|
|
1004
|
+
const result = checkGate(testDir, "test-eval", 0.86);
|
|
825
1005
|
|
|
826
|
-
expect(
|
|
827
|
-
expect(
|
|
828
|
-
expect(
|
|
829
|
-
expect(output).toContain("2.5%"); // regression
|
|
1006
|
+
expect(result.passed).toBe(true);
|
|
1007
|
+
expect(result.phase).toBe("stabilization");
|
|
1008
|
+
expect(result.baseline).toBeCloseTo(0.85, 2);
|
|
830
1009
|
});
|
|
1010
|
+
});
|
|
831
1011
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
1012
|
+
describe("Production phase (>50 runs, low variance)", () => {
|
|
1013
|
+
test("enters production when variance < 0.1", () => {
|
|
1014
|
+
ensureHiveDirectory(testDir);
|
|
1015
|
+
|
|
1016
|
+
// Simulate 60 runs with consistent scores (low variance)
|
|
1017
|
+
for (let i = 0; i < 60; i++) {
|
|
1018
|
+
recordEvalRun(testDir, {
|
|
1019
|
+
timestamp: new Date().toISOString(),
|
|
1020
|
+
eval_name: "test-eval",
|
|
1021
|
+
score: 0.9, // All same score = zero variance
|
|
1022
|
+
run_count: i + 1,
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
841
1025
|
|
|
842
|
-
const
|
|
1026
|
+
const result = checkGate(testDir, "test-eval", 0.91);
|
|
843
1027
|
|
|
844
|
-
expect(
|
|
845
|
-
expect(
|
|
846
|
-
expect(output).toContain("exceeds");
|
|
1028
|
+
expect(result.phase).toBe("production");
|
|
1029
|
+
expect(result.variance).toBeLessThan(0.1);
|
|
847
1030
|
});
|
|
848
1031
|
|
|
849
|
-
test("
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
1032
|
+
test("fails on regression in production", () => {
|
|
1033
|
+
ensureHiveDirectory(testDir);
|
|
1034
|
+
|
|
1035
|
+
// Simulate 60 runs with consistent high scores to reach production phase
|
|
1036
|
+
for (let i = 0; i < 60; i++) {
|
|
1037
|
+
recordEvalRun(testDir, {
|
|
1038
|
+
timestamp: new Date().toISOString(),
|
|
1039
|
+
eval_name: "test-eval",
|
|
1040
|
+
score: 0.9,
|
|
1041
|
+
run_count: i + 1,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
856
1044
|
|
|
857
|
-
|
|
1045
|
+
// Now test with a regressed score (>5% drop from 0.9 baseline)
|
|
1046
|
+
const regressedScore = 0.8; // 11% drop
|
|
1047
|
+
const result = checkGate(testDir, "test-eval", regressedScore);
|
|
858
1048
|
|
|
859
|
-
expect(
|
|
860
|
-
expect(
|
|
861
|
-
expect(
|
|
1049
|
+
expect(result.passed).toBe(false);
|
|
1050
|
+
expect(result.phase).toBe("production");
|
|
1051
|
+
expect(result.message).toContain("FAIL");
|
|
862
1052
|
});
|
|
863
1053
|
});
|
|
864
1054
|
});
|
|
865
1055
|
|
|
866
1056
|
// ============================================================================
|
|
867
|
-
//
|
|
1057
|
+
// History Command Tests (TDD)
|
|
868
1058
|
// ============================================================================
|
|
869
1059
|
|
|
1060
|
+
interface SwarmHistoryRecord {
|
|
1061
|
+
epic_id: string;
|
|
1062
|
+
epic_title: string;
|
|
1063
|
+
strategy: string;
|
|
1064
|
+
timestamp: string;
|
|
1065
|
+
overall_success: boolean;
|
|
1066
|
+
task_count: number;
|
|
1067
|
+
completed_count: number;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
870
1070
|
/**
|
|
871
|
-
*
|
|
1071
|
+
* Format relative time (e.g., "2h ago", "1d ago")
|
|
872
1072
|
*/
|
|
873
|
-
function
|
|
874
|
-
|
|
1073
|
+
function formatRelativeTime(timestamp: string): string {
|
|
1074
|
+
const now = Date.now();
|
|
1075
|
+
const then = new Date(timestamp).getTime();
|
|
1076
|
+
const diffMs = now - then;
|
|
1077
|
+
|
|
1078
|
+
const minutes = Math.floor(diffMs / 60000);
|
|
1079
|
+
const hours = Math.floor(diffMs / 3600000);
|
|
1080
|
+
const days = Math.floor(diffMs / 86400000);
|
|
1081
|
+
|
|
1082
|
+
if (minutes < 60) return `${minutes}m ago`;
|
|
1083
|
+
if (hours < 24) return `${hours}h ago`;
|
|
1084
|
+
return `${days}d ago`;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
/**
|
|
1088
|
+
* Format swarm history as beautiful CLI table
|
|
1089
|
+
*/
|
|
1090
|
+
function formatSwarmHistory(records: SwarmHistoryRecord[]): string {
|
|
1091
|
+
if (records.length === 0) {
|
|
1092
|
+
return "No swarm history found";
|
|
1093
|
+
}
|
|
875
1094
|
|
|
876
|
-
const
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1095
|
+
const rows = records.map(r => ({
|
|
1096
|
+
time: formatRelativeTime(r.timestamp),
|
|
1097
|
+
status: r.overall_success ? "✅" : "❌",
|
|
1098
|
+
title: r.epic_title.length > 30 ? r.epic_title.slice(0, 27) + "..." : r.epic_title,
|
|
1099
|
+
strategy: r.strategy,
|
|
1100
|
+
tasks: `${r.completed_count}/${r.task_count} tasks`,
|
|
1101
|
+
}));
|
|
880
1102
|
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
1103
|
+
// Box drawing characters
|
|
1104
|
+
const lines: string[] = [];
|
|
1105
|
+
lines.push("┌─────────────────────────────────────────────────────────────┐");
|
|
1106
|
+
lines.push("│ SWARM HISTORY │");
|
|
1107
|
+
lines.push("├─────────────────────────────────────────────────────────────┤");
|
|
1108
|
+
|
|
1109
|
+
for (const row of rows) {
|
|
1110
|
+
const statusCol = `${row.time.padEnd(8)} ${row.status}`;
|
|
1111
|
+
const titleCol = row.title.padEnd(32);
|
|
1112
|
+
const strategyCol = row.strategy.padEnd(13);
|
|
1113
|
+
const tasksCol = row.tasks;
|
|
1114
|
+
|
|
1115
|
+
const line = `│ ${statusCol} ${titleCol} ${strategyCol} ${tasksCol.padEnd(3)} │`;
|
|
1116
|
+
lines.push(line);
|
|
884
1117
|
}
|
|
885
1118
|
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
const index = Math.min(Math.floor(normalized * chars.length), chars.length - 1);
|
|
890
|
-
return chars[index];
|
|
891
|
-
})
|
|
892
|
-
.join("");
|
|
1119
|
+
lines.push("└─────────────────────────────────────────────────────────────┘");
|
|
1120
|
+
|
|
1121
|
+
return lines.join("\n");
|
|
893
1122
|
}
|
|
894
1123
|
|
|
895
1124
|
/**
|
|
896
|
-
*
|
|
1125
|
+
* Filter history by status
|
|
897
1126
|
*/
|
|
898
|
-
function
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
lines.push("Thresholds:");
|
|
914
|
-
lines.push(` Stabilization: ${(status.thresholds.stabilization * 100).toFixed(0)}% regression warning`);
|
|
915
|
-
lines.push(` Production: ${(status.thresholds.production * 100).toFixed(0)}% regression failure`);
|
|
916
|
-
lines.push("");
|
|
917
|
-
|
|
918
|
-
// Recent scores with sparkline
|
|
919
|
-
if (status.recentScores.length > 0) {
|
|
920
|
-
lines.push("Recent scores:");
|
|
921
|
-
const sparkline = generateSparkline(status.recentScores.map((s) => s.score));
|
|
922
|
-
lines.push(` ${sparkline}`);
|
|
923
|
-
for (const { timestamp, score } of status.recentScores) {
|
|
924
|
-
const time = new Date(timestamp).toLocaleString();
|
|
925
|
-
lines.push(` ${time}: ${score.toFixed(2)}`);
|
|
926
|
-
}
|
|
927
|
-
} else {
|
|
928
|
-
lines.push("No scores yet - collecting data");
|
|
1127
|
+
function filterHistoryByStatus(
|
|
1128
|
+
records: SwarmHistoryRecord[],
|
|
1129
|
+
status?: "success" | "failed" | "in_progress",
|
|
1130
|
+
): SwarmHistoryRecord[] {
|
|
1131
|
+
if (!status) return records;
|
|
1132
|
+
|
|
1133
|
+
switch (status) {
|
|
1134
|
+
case "success":
|
|
1135
|
+
return records.filter(r => r.overall_success);
|
|
1136
|
+
case "failed":
|
|
1137
|
+
return records.filter(r => !r.overall_success && r.completed_count === r.task_count);
|
|
1138
|
+
case "in_progress":
|
|
1139
|
+
return records.filter(r => r.completed_count < r.task_count);
|
|
1140
|
+
default:
|
|
1141
|
+
return records;
|
|
929
1142
|
}
|
|
1143
|
+
}
|
|
930
1144
|
|
|
931
|
-
|
|
1145
|
+
/**
|
|
1146
|
+
* Filter history by strategy
|
|
1147
|
+
*/
|
|
1148
|
+
function filterHistoryByStrategy(
|
|
1149
|
+
records: SwarmHistoryRecord[],
|
|
1150
|
+
strategy?: "file-based" | "feature-based" | "risk-based",
|
|
1151
|
+
): SwarmHistoryRecord[] {
|
|
1152
|
+
if (!strategy) return records;
|
|
1153
|
+
return records.filter(r => r.strategy === strategy);
|
|
932
1154
|
}
|
|
933
1155
|
|
|
934
1156
|
/**
|
|
935
|
-
*
|
|
1157
|
+
* Parse history CLI arguments
|
|
936
1158
|
*/
|
|
937
|
-
function
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
1159
|
+
function parseHistoryArgs(args: string[]): {
|
|
1160
|
+
limit: number;
|
|
1161
|
+
status?: "success" | "failed" | "in_progress";
|
|
1162
|
+
strategy?: "file-based" | "feature-based" | "risk-based";
|
|
1163
|
+
verbose: boolean;
|
|
1164
|
+
} {
|
|
1165
|
+
const result: {
|
|
1166
|
+
limit: number;
|
|
1167
|
+
status?: "success" | "failed" | "in_progress";
|
|
1168
|
+
strategy?: "file-based" | "feature-based" | "risk-based";
|
|
1169
|
+
verbose: boolean;
|
|
1170
|
+
} = {
|
|
1171
|
+
limit: 10,
|
|
1172
|
+
verbose: false,
|
|
1173
|
+
};
|
|
946
1174
|
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
lines.push("");
|
|
950
|
-
|
|
951
|
-
// Group by eval name
|
|
952
|
-
const grouped = new Map<string, typeof history>();
|
|
953
|
-
for (const entry of history) {
|
|
954
|
-
if (!grouped.has(entry.eval_name)) {
|
|
955
|
-
grouped.set(entry.eval_name, []);
|
|
956
|
-
}
|
|
957
|
-
grouped.get(entry.eval_name)!.push(entry);
|
|
958
|
-
}
|
|
1175
|
+
for (let i = 0; i < args.length; i++) {
|
|
1176
|
+
const arg = args[i];
|
|
959
1177
|
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
1178
|
+
if (arg === "--limit" || arg === "-n") {
|
|
1179
|
+
const limitStr = args[i + 1];
|
|
1180
|
+
if (limitStr && !isNaN(Number(limitStr))) {
|
|
1181
|
+
result.limit = Number(limitStr);
|
|
1182
|
+
i++;
|
|
1183
|
+
}
|
|
1184
|
+
} else if (arg === "--status") {
|
|
1185
|
+
const statusStr = args[i + 1];
|
|
1186
|
+
if (statusStr && ["success", "failed", "in_progress"].includes(statusStr)) {
|
|
1187
|
+
result.status = statusStr as "success" | "failed" | "in_progress";
|
|
1188
|
+
i++;
|
|
1189
|
+
}
|
|
1190
|
+
} else if (arg === "--strategy") {
|
|
1191
|
+
const strategyStr = args[i + 1];
|
|
1192
|
+
if (strategyStr && ["file-based", "feature-based", "risk-based"].includes(strategyStr)) {
|
|
1193
|
+
result.strategy = strategyStr as "file-based" | "feature-based" | "risk-based";
|
|
1194
|
+
i++;
|
|
1195
|
+
}
|
|
1196
|
+
} else if (arg === "--verbose" || arg === "-v") {
|
|
1197
|
+
result.verbose = true;
|
|
975
1198
|
}
|
|
976
|
-
|
|
977
|
-
lines.push("");
|
|
978
1199
|
}
|
|
979
1200
|
|
|
980
|
-
return
|
|
1201
|
+
return result;
|
|
981
1202
|
}
|
|
982
1203
|
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
baseline?: number;
|
|
991
|
-
currentScore: number;
|
|
992
|
-
regressionPercent?: number;
|
|
993
|
-
}): string {
|
|
994
|
-
const lines: string[] = [];
|
|
995
|
-
|
|
996
|
-
// Pass/fail banner
|
|
997
|
-
const status = result.passed ? "✅ PASS" : "❌ FAIL";
|
|
998
|
-
lines.push(status);
|
|
999
|
-
lines.push("");
|
|
1204
|
+
describe("swarm history", () => {
|
|
1205
|
+
describe("formatRelativeTime", () => {
|
|
1206
|
+
test("formats minutes ago", () => {
|
|
1207
|
+
const fiveMinutesAgo = new Date(Date.now() - 5 * 60000).toISOString();
|
|
1208
|
+
const result = formatRelativeTime(fiveMinutesAgo);
|
|
1209
|
+
expect(result).toMatch(/5m ago/);
|
|
1210
|
+
});
|
|
1000
1211
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1212
|
+
test("formats hours ago", () => {
|
|
1213
|
+
const threeHoursAgo = new Date(Date.now() - 3 * 3600000).toISOString();
|
|
1214
|
+
const result = formatRelativeTime(threeHoursAgo);
|
|
1215
|
+
expect(result).toMatch(/3h ago/);
|
|
1216
|
+
});
|
|
1004
1217
|
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1218
|
+
test("formats days ago", () => {
|
|
1219
|
+
const twoDaysAgo = new Date(Date.now() - 2 * 86400000).toISOString();
|
|
1220
|
+
const result = formatRelativeTime(twoDaysAgo);
|
|
1221
|
+
expect(result).toMatch(/2d ago/);
|
|
1222
|
+
});
|
|
1223
|
+
});
|
|
1008
1224
|
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1225
|
+
describe("formatSwarmHistory", () => {
|
|
1226
|
+
test("formats history as beautiful box-drawn table", () => {
|
|
1227
|
+
const records: SwarmHistoryRecord[] = [
|
|
1228
|
+
{
|
|
1229
|
+
epic_id: "epic-1",
|
|
1230
|
+
epic_title: "Add auth flow",
|
|
1231
|
+
strategy: "feature-based",
|
|
1232
|
+
timestamp: new Date(Date.now() - 2 * 3600000).toISOString(),
|
|
1233
|
+
overall_success: true,
|
|
1234
|
+
task_count: 4,
|
|
1235
|
+
completed_count: 4,
|
|
1236
|
+
},
|
|
1237
|
+
{
|
|
1238
|
+
epic_id: "epic-2",
|
|
1239
|
+
epic_title: "Refactor DB layer",
|
|
1240
|
+
strategy: "file-based",
|
|
1241
|
+
timestamp: new Date(Date.now() - 5 * 3600000).toISOString(),
|
|
1242
|
+
overall_success: false,
|
|
1243
|
+
task_count: 5,
|
|
1244
|
+
completed_count: 2,
|
|
1245
|
+
},
|
|
1246
|
+
];
|
|
1013
1247
|
|
|
1014
|
-
|
|
1015
|
-
|
|
1248
|
+
const result = formatSwarmHistory(records);
|
|
1249
|
+
|
|
1250
|
+
expect(result).toContain("┌─────");
|
|
1251
|
+
expect(result).toContain("SWARM HISTORY");
|
|
1252
|
+
expect(result).toContain("✅");
|
|
1253
|
+
expect(result).toContain("❌");
|
|
1254
|
+
expect(result).toContain("Add auth flow");
|
|
1255
|
+
expect(result).toContain("Refactor DB layer");
|
|
1256
|
+
expect(result).toContain("feature-based");
|
|
1257
|
+
expect(result).toContain("file-based");
|
|
1258
|
+
expect(result).toContain("4/4 tasks");
|
|
1259
|
+
expect(result).toContain("2/5 tasks");
|
|
1260
|
+
expect(result).toContain("└─────");
|
|
1261
|
+
});
|
|
1016
1262
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1263
|
+
test("truncates long titles with ellipsis", () => {
|
|
1264
|
+
const records: SwarmHistoryRecord[] = [
|
|
1265
|
+
{
|
|
1266
|
+
epic_id: "epic-1",
|
|
1267
|
+
epic_title: "A".repeat(100),
|
|
1268
|
+
strategy: "feature-based",
|
|
1269
|
+
timestamp: new Date(Date.now() - 1000).toISOString(),
|
|
1270
|
+
overall_success: true,
|
|
1271
|
+
task_count: 1,
|
|
1272
|
+
completed_count: 1,
|
|
1273
|
+
},
|
|
1274
|
+
];
|
|
1019
1275
|
|
|
1020
|
-
|
|
1021
|
-
// Eval Run Tests
|
|
1022
|
-
// ============================================================================
|
|
1276
|
+
const result = formatSwarmHistory(records);
|
|
1023
1277
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1278
|
+
expect(result).toContain("...");
|
|
1279
|
+
expect(result).toMatch(/A{27}\.\.\./);
|
|
1280
|
+
});
|
|
1026
1281
|
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1282
|
+
test("returns 'No swarm history found' for empty array", () => {
|
|
1283
|
+
const result = formatSwarmHistory([]);
|
|
1284
|
+
expect(result).toBe("No swarm history found");
|
|
1285
|
+
});
|
|
1030
1286
|
});
|
|
1031
1287
|
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1288
|
+
describe("filterHistoryByStatus", () => {
|
|
1289
|
+
const records: SwarmHistoryRecord[] = [
|
|
1290
|
+
{
|
|
1291
|
+
epic_id: "epic-1",
|
|
1292
|
+
epic_title: "Success",
|
|
1293
|
+
strategy: "feature-based",
|
|
1294
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1295
|
+
overall_success: true,
|
|
1296
|
+
task_count: 4,
|
|
1297
|
+
completed_count: 4,
|
|
1298
|
+
},
|
|
1299
|
+
{
|
|
1300
|
+
epic_id: "epic-2",
|
|
1301
|
+
epic_title: "Failed",
|
|
1302
|
+
strategy: "file-based",
|
|
1303
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1304
|
+
overall_success: false,
|
|
1305
|
+
task_count: 4,
|
|
1306
|
+
completed_count: 4,
|
|
1307
|
+
},
|
|
1308
|
+
{
|
|
1309
|
+
epic_id: "epic-3",
|
|
1310
|
+
epic_title: "In Progress",
|
|
1311
|
+
strategy: "risk-based",
|
|
1312
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1313
|
+
overall_success: false,
|
|
1314
|
+
task_count: 5,
|
|
1315
|
+
completed_count: 2,
|
|
1316
|
+
},
|
|
1317
|
+
];
|
|
1318
|
+
|
|
1319
|
+
test("filters success only", () => {
|
|
1320
|
+
const result = filterHistoryByStatus(records, "success");
|
|
1321
|
+
expect(result).toHaveLength(1);
|
|
1322
|
+
expect(result[0].epic_title).toBe("Success");
|
|
1323
|
+
});
|
|
1037
1324
|
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
const { ensureHiveDirectory } = await import("../src/hive.js");
|
|
1043
|
-
|
|
1044
|
-
// Set up test data
|
|
1045
|
-
const evalName = "test-eval";
|
|
1046
|
-
const mockScore = 0.85;
|
|
1047
|
-
|
|
1048
|
-
// Ensure directory exists
|
|
1049
|
-
ensureHiveDirectory(testDir);
|
|
1050
|
-
|
|
1051
|
-
// Get history and record run (simulating what eval run does)
|
|
1052
|
-
const history = getScoreHistory(testDir, evalName);
|
|
1053
|
-
recordEvalRun(testDir, {
|
|
1054
|
-
timestamp: new Date().toISOString(),
|
|
1055
|
-
eval_name: evalName,
|
|
1056
|
-
score: mockScore,
|
|
1057
|
-
run_count: history.length + 1,
|
|
1325
|
+
test("filters failed only", () => {
|
|
1326
|
+
const result = filterHistoryByStatus(records, "failed");
|
|
1327
|
+
expect(result).toHaveLength(1);
|
|
1328
|
+
expect(result[0].epic_title).toBe("Failed");
|
|
1058
1329
|
});
|
|
1059
1330
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1331
|
+
test("filters in_progress only", () => {
|
|
1332
|
+
const result = filterHistoryByStatus(records, "in_progress");
|
|
1333
|
+
expect(result).toHaveLength(1);
|
|
1334
|
+
expect(result[0].epic_title).toBe("In Progress");
|
|
1335
|
+
});
|
|
1062
1336
|
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1337
|
+
test("returns all when no status filter", () => {
|
|
1338
|
+
const result = filterHistoryByStatus(records);
|
|
1339
|
+
expect(result).toHaveLength(3);
|
|
1340
|
+
});
|
|
1341
|
+
});
|
|
1067
1342
|
|
|
1068
|
-
|
|
1069
|
-
|
|
1343
|
+
describe("filterHistoryByStrategy", () => {
|
|
1344
|
+
const records: SwarmHistoryRecord[] = [
|
|
1345
|
+
{
|
|
1346
|
+
epic_id: "epic-1",
|
|
1347
|
+
epic_title: "File",
|
|
1348
|
+
strategy: "file-based",
|
|
1349
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1350
|
+
overall_success: true,
|
|
1351
|
+
task_count: 4,
|
|
1352
|
+
completed_count: 4,
|
|
1353
|
+
},
|
|
1354
|
+
{
|
|
1355
|
+
epic_id: "epic-2",
|
|
1356
|
+
epic_title: "Feature",
|
|
1357
|
+
strategy: "feature-based",
|
|
1358
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1359
|
+
overall_success: true,
|
|
1360
|
+
task_count: 4,
|
|
1361
|
+
completed_count: 4,
|
|
1362
|
+
},
|
|
1363
|
+
{
|
|
1364
|
+
epic_id: "epic-3",
|
|
1365
|
+
epic_title: "Risk",
|
|
1366
|
+
strategy: "risk-based",
|
|
1367
|
+
timestamp: "2025-01-01T00:00:00Z",
|
|
1368
|
+
overall_success: true,
|
|
1369
|
+
task_count: 4,
|
|
1370
|
+
completed_count: 4,
|
|
1371
|
+
},
|
|
1372
|
+
];
|
|
1373
|
+
|
|
1374
|
+
test("filters file-based only", () => {
|
|
1375
|
+
const result = filterHistoryByStrategy(records, "file-based");
|
|
1376
|
+
expect(result).toHaveLength(1);
|
|
1377
|
+
expect(result[0].epic_title).toBe("File");
|
|
1378
|
+
});
|
|
1070
1379
|
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1380
|
+
test("filters feature-based only", () => {
|
|
1381
|
+
const result = filterHistoryByStrategy(records, "feature-based");
|
|
1382
|
+
expect(result).toHaveLength(1);
|
|
1383
|
+
expect(result[0].epic_title).toBe("Feature");
|
|
1384
|
+
});
|
|
1385
|
+
|
|
1386
|
+
test("filters risk-based only", () => {
|
|
1387
|
+
const result = filterHistoryByStrategy(records, "risk-based");
|
|
1388
|
+
expect(result).toHaveLength(1);
|
|
1389
|
+
expect(result[0].epic_title).toBe("Risk");
|
|
1390
|
+
});
|
|
1391
|
+
|
|
1392
|
+
test("returns all when no strategy filter", () => {
|
|
1393
|
+
const result = filterHistoryByStrategy(records);
|
|
1394
|
+
expect(result).toHaveLength(3);
|
|
1077
1395
|
});
|
|
1078
1396
|
});
|
|
1079
1397
|
|
|
1080
|
-
|
|
1081
|
-
|
|
1398
|
+
describe("parseHistoryArgs", () => {
|
|
1399
|
+
test("parses --limit flag", () => {
|
|
1400
|
+
const result = parseHistoryArgs(["--limit", "20"]);
|
|
1401
|
+
expect(result.limit).toBe(20);
|
|
1402
|
+
});
|
|
1082
1403
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1404
|
+
test("parses -n shorthand for limit", () => {
|
|
1405
|
+
const result = parseHistoryArgs(["-n", "5"]);
|
|
1406
|
+
expect(result.limit).toBe(5);
|
|
1407
|
+
});
|
|
1085
1408
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1409
|
+
test("parses --status flag", () => {
|
|
1410
|
+
const result = parseHistoryArgs(["--status", "success"]);
|
|
1411
|
+
expect(result.status).toBe("success");
|
|
1412
|
+
});
|
|
1090
1413
|
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1414
|
+
test("parses --strategy flag", () => {
|
|
1415
|
+
const result = parseHistoryArgs(["--strategy", "file-based"]);
|
|
1416
|
+
expect(result.strategy).toBe("file-based");
|
|
1417
|
+
});
|
|
1418
|
+
|
|
1419
|
+
test("parses --verbose flag", () => {
|
|
1420
|
+
const result = parseHistoryArgs(["--verbose"]);
|
|
1421
|
+
expect(result.verbose).toBe(true);
|
|
1422
|
+
});
|
|
1423
|
+
|
|
1424
|
+
test("parses -v shorthand for verbose", () => {
|
|
1425
|
+
const result = parseHistoryArgs(["-v"]);
|
|
1426
|
+
expect(result.verbose).toBe(true);
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
test("parses multiple flags together", () => {
|
|
1430
|
+
const result = parseHistoryArgs(["--limit", "15", "--status", "failed", "--verbose"]);
|
|
1431
|
+
expect(result.limit).toBe(15);
|
|
1432
|
+
expect(result.status).toBe("failed");
|
|
1433
|
+
expect(result.verbose).toBe(true);
|
|
1434
|
+
});
|
|
1107
1435
|
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1436
|
+
test("uses default limit of 10 when not specified", () => {
|
|
1437
|
+
const result = parseHistoryArgs([]);
|
|
1438
|
+
expect(result.limit).toBe(10);
|
|
1439
|
+
});
|
|
1111
1440
|
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1441
|
+
test("ignores invalid status values", () => {
|
|
1442
|
+
const result = parseHistoryArgs(["--status", "invalid"]);
|
|
1443
|
+
expect(result.status).toBeUndefined();
|
|
1444
|
+
});
|
|
1445
|
+
|
|
1446
|
+
test("ignores invalid strategy values", () => {
|
|
1447
|
+
const result = parseHistoryArgs(["--strategy", "invalid"]);
|
|
1448
|
+
expect(result.strategy).toBeUndefined();
|
|
1449
|
+
});
|
|
1115
1450
|
});
|
|
1116
1451
|
});
|