opencode-swarm-plugin 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/issues.jsonl +205 -0
- package/INTEGRATION_EXAMPLE.md +66 -0
- package/README.md +127 -562
- package/dist/index.js +3842 -2917
- package/dist/plugin.js +3824 -2918
- package/docs/analysis/subagent-coordination-patterns.md +2 -0
- package/evals/README.md +116 -0
- package/evals/evalite.config.ts +15 -0
- package/evals/example.eval.ts +32 -0
- package/evals/fixtures/decomposition-cases.ts +105 -0
- package/evals/lib/data-loader.test.ts +288 -0
- package/evals/lib/data-loader.ts +111 -0
- package/evals/lib/llm.ts +115 -0
- package/evals/scorers/index.ts +200 -0
- package/evals/scorers/outcome-scorers.test.ts +27 -0
- package/evals/scorers/outcome-scorers.ts +349 -0
- package/evals/swarm-decomposition.eval.ts +112 -0
- package/package.json +8 -1
- package/src/agent-mail.ts +7 -7
- package/src/beads.ts +49 -0
- package/src/eval-capture.ts +487 -0
- package/src/index.ts +53 -3
- package/src/output-guardrails.test.ts +438 -0
- package/src/output-guardrails.ts +381 -0
- package/src/pattern-maturity.test.ts +1160 -0
- package/src/schemas/index.ts +18 -0
- package/src/schemas/swarm-context.ts +115 -0
- package/src/streams/events.test.ts +296 -0
- package/src/streams/events.ts +115 -0
- package/src/streams/migrations.test.ts +24 -20
- package/src/streams/migrations.ts +51 -0
- package/src/streams/projections.ts +187 -0
- package/src/streams/store.ts +275 -0
- package/src/swarm-mail.ts +7 -7
- package/src/swarm-orchestrate.ts +430 -1
- package/src/swarm-prompts.ts +84 -12
|
@@ -34,11 +34,11 @@ describe("Schema Migrations", () => {
|
|
|
34
34
|
it("should run all migrations on fresh database", async () => {
|
|
35
35
|
const result = await runMigrations(db);
|
|
36
36
|
|
|
37
|
-
expect(result.applied).toEqual([1, 2]);
|
|
38
|
-
expect(result.current).toBe(
|
|
37
|
+
expect(result.applied).toEqual([1, 2, 3, 4]);
|
|
38
|
+
expect(result.current).toBe(4);
|
|
39
39
|
|
|
40
40
|
const version = await getCurrentVersion(db);
|
|
41
|
-
expect(version).toBe(
|
|
41
|
+
expect(version).toBe(4);
|
|
42
42
|
});
|
|
43
43
|
|
|
44
44
|
it("should create cursors table with correct schema", async () => {
|
|
@@ -105,16 +105,16 @@ describe("Schema Migrations", () => {
|
|
|
105
105
|
it("should be safe to run migrations multiple times", async () => {
|
|
106
106
|
// First run
|
|
107
107
|
const result1 = await runMigrations(db);
|
|
108
|
-
expect(result1.applied).toEqual([1, 2]);
|
|
108
|
+
expect(result1.applied).toEqual([1, 2, 3, 4]);
|
|
109
109
|
|
|
110
110
|
// Second run - should apply nothing
|
|
111
111
|
const result2 = await runMigrations(db);
|
|
112
112
|
expect(result2.applied).toEqual([]);
|
|
113
|
-
expect(result2.current).toBe(
|
|
113
|
+
expect(result2.current).toBe(4);
|
|
114
114
|
|
|
115
115
|
// Version should still be 2
|
|
116
116
|
const version = await getCurrentVersion(db);
|
|
117
|
-
expect(version).toBe(
|
|
117
|
+
expect(version).toBe(4);
|
|
118
118
|
});
|
|
119
119
|
});
|
|
120
120
|
|
|
@@ -137,8 +137,8 @@ describe("Schema Migrations", () => {
|
|
|
137
137
|
|
|
138
138
|
// Now run migrations - should only apply 2
|
|
139
139
|
const result = await runMigrations(db);
|
|
140
|
-
expect(result.applied).toEqual([2]);
|
|
141
|
-
expect(result.current).toBe(
|
|
140
|
+
expect(result.applied).toEqual([2, 3, 4]);
|
|
141
|
+
expect(result.current).toBe(4);
|
|
142
142
|
});
|
|
143
143
|
});
|
|
144
144
|
|
|
@@ -146,11 +146,11 @@ describe("Schema Migrations", () => {
|
|
|
146
146
|
it("should rollback to target version", async () => {
|
|
147
147
|
// Apply all migrations
|
|
148
148
|
await runMigrations(db);
|
|
149
|
-
expect(await getCurrentVersion(db)).toBe(
|
|
149
|
+
expect(await getCurrentVersion(db)).toBe(4);
|
|
150
150
|
|
|
151
151
|
// Rollback to version 1
|
|
152
152
|
const result = await rollbackTo(db, 1);
|
|
153
|
-
expect(result.rolledBack).toEqual([2]);
|
|
153
|
+
expect(result.rolledBack).toEqual([4, 3, 2]);
|
|
154
154
|
expect(result.current).toBe(1);
|
|
155
155
|
|
|
156
156
|
// Version should be 1
|
|
@@ -180,7 +180,7 @@ describe("Schema Migrations", () => {
|
|
|
180
180
|
await runMigrations(db);
|
|
181
181
|
|
|
182
182
|
const result = await rollbackTo(db, 0);
|
|
183
|
-
expect(result.rolledBack).toEqual([2, 1]);
|
|
183
|
+
expect(result.rolledBack).toEqual([4, 3, 2, 1]);
|
|
184
184
|
expect(result.current).toBe(0);
|
|
185
185
|
|
|
186
186
|
// All tables should be gone
|
|
@@ -196,9 +196,9 @@ describe("Schema Migrations", () => {
|
|
|
196
196
|
it("should do nothing if target version >= current", async () => {
|
|
197
197
|
await runMigrations(db);
|
|
198
198
|
|
|
199
|
-
const result = await rollbackTo(db,
|
|
199
|
+
const result = await rollbackTo(db, 4);
|
|
200
200
|
expect(result.rolledBack).toEqual([]);
|
|
201
|
-
expect(result.current).toBe(
|
|
201
|
+
expect(result.current).toBe(4);
|
|
202
202
|
});
|
|
203
203
|
});
|
|
204
204
|
|
|
@@ -210,12 +210,16 @@ describe("Schema Migrations", () => {
|
|
|
210
210
|
|
|
211
211
|
expect(await isMigrationApplied(db, 1)).toBe(true);
|
|
212
212
|
expect(await isMigrationApplied(db, 2)).toBe(true);
|
|
213
|
+
expect(await isMigrationApplied(db, 3)).toBe(true);
|
|
214
|
+
expect(await isMigrationApplied(db, 4)).toBe(true);
|
|
215
|
+
expect(await isMigrationApplied(db, 3)).toBe(true);
|
|
216
|
+
expect(await isMigrationApplied(db, 4)).toBe(true);
|
|
213
217
|
});
|
|
214
218
|
|
|
215
219
|
it("should list pending migrations", async () => {
|
|
216
220
|
const pending1 = await getPendingMigrations(db);
|
|
217
|
-
expect(pending1).toHaveLength(
|
|
218
|
-
expect(pending1.map((m) => m.version)).toEqual([1, 2]);
|
|
221
|
+
expect(pending1).toHaveLength(4);
|
|
222
|
+
expect(pending1.map((m) => m.version)).toEqual([1, 2, 3, 4]);
|
|
219
223
|
|
|
220
224
|
// Apply migration 1
|
|
221
225
|
const migration = migrations[0];
|
|
@@ -236,8 +240,8 @@ describe("Schema Migrations", () => {
|
|
|
236
240
|
);
|
|
237
241
|
|
|
238
242
|
const pending2 = await getPendingMigrations(db);
|
|
239
|
-
expect(pending2).toHaveLength(
|
|
240
|
-
expect(pending2.map((m) => m.version)).toEqual([2]);
|
|
243
|
+
expect(pending2).toHaveLength(3);
|
|
244
|
+
expect(pending2.map((m) => m.version)).toEqual([2, 3, 4]);
|
|
241
245
|
});
|
|
242
246
|
|
|
243
247
|
it("should list applied migrations", async () => {
|
|
@@ -247,8 +251,8 @@ describe("Schema Migrations", () => {
|
|
|
247
251
|
await runMigrations(db);
|
|
248
252
|
|
|
249
253
|
const applied2 = await getAppliedMigrations(db);
|
|
250
|
-
expect(applied2).toHaveLength(
|
|
251
|
-
expect(applied2.map((m) => m.version)).toEqual([1, 2]);
|
|
254
|
+
expect(applied2).toHaveLength(4);
|
|
255
|
+
expect(applied2.map((m) => m.version)).toEqual([1, 2, 3, 4]);
|
|
252
256
|
expect(applied2[0]?.description).toBe(
|
|
253
257
|
"Add cursors table for DurableCursor",
|
|
254
258
|
);
|
|
@@ -340,7 +344,7 @@ describe("Schema Migrations", () => {
|
|
|
340
344
|
`SELECT version, applied_at, description FROM schema_version ORDER BY version`,
|
|
341
345
|
);
|
|
342
346
|
|
|
343
|
-
expect(result.rows).toHaveLength(
|
|
347
|
+
expect(result.rows).toHaveLength(4);
|
|
344
348
|
expect(result.rows[0]?.version).toBe(1);
|
|
345
349
|
expect(result.rows[0]?.description).toBe(
|
|
346
350
|
"Add cursors table for DurableCursor",
|
|
@@ -107,6 +107,57 @@ export const migrations: Migration[] = [
|
|
|
107
107
|
`,
|
|
108
108
|
down: `DROP TABLE IF EXISTS deferred;`,
|
|
109
109
|
},
|
|
110
|
+
{
|
|
111
|
+
version: 3,
|
|
112
|
+
description: "Add eval_records table for learning system",
|
|
113
|
+
up: `
|
|
114
|
+
CREATE TABLE IF NOT EXISTS eval_records (
|
|
115
|
+
id TEXT PRIMARY KEY,
|
|
116
|
+
project_key TEXT NOT NULL,
|
|
117
|
+
task TEXT NOT NULL,
|
|
118
|
+
context TEXT,
|
|
119
|
+
strategy TEXT NOT NULL,
|
|
120
|
+
epic_title TEXT NOT NULL,
|
|
121
|
+
subtasks JSONB NOT NULL,
|
|
122
|
+
outcomes JSONB,
|
|
123
|
+
overall_success BOOLEAN,
|
|
124
|
+
total_duration_ms INTEGER,
|
|
125
|
+
total_errors INTEGER,
|
|
126
|
+
human_accepted BOOLEAN,
|
|
127
|
+
human_modified BOOLEAN,
|
|
128
|
+
human_notes TEXT,
|
|
129
|
+
file_overlap_count INTEGER,
|
|
130
|
+
scope_accuracy REAL,
|
|
131
|
+
time_balance_ratio REAL,
|
|
132
|
+
created_at BIGINT NOT NULL,
|
|
133
|
+
updated_at BIGINT NOT NULL
|
|
134
|
+
);
|
|
135
|
+
CREATE INDEX IF NOT EXISTS idx_eval_records_project ON eval_records(project_key);
|
|
136
|
+
CREATE INDEX IF NOT EXISTS idx_eval_records_strategy ON eval_records(strategy);
|
|
137
|
+
`,
|
|
138
|
+
down: `DROP TABLE IF EXISTS eval_records;`,
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
version: 4,
|
|
142
|
+
description: "Add swarm_contexts table for context recovery",
|
|
143
|
+
up: `
|
|
144
|
+
CREATE TABLE IF NOT EXISTS swarm_contexts (
|
|
145
|
+
id TEXT PRIMARY KEY,
|
|
146
|
+
epic_id TEXT NOT NULL,
|
|
147
|
+
bead_id TEXT NOT NULL,
|
|
148
|
+
strategy TEXT NOT NULL,
|
|
149
|
+
files JSONB NOT NULL,
|
|
150
|
+
dependencies JSONB NOT NULL,
|
|
151
|
+
directives JSONB NOT NULL,
|
|
152
|
+
recovery JSONB NOT NULL,
|
|
153
|
+
created_at BIGINT NOT NULL,
|
|
154
|
+
updated_at BIGINT NOT NULL
|
|
155
|
+
);
|
|
156
|
+
CREATE INDEX IF NOT EXISTS idx_swarm_contexts_epic ON swarm_contexts(epic_id);
|
|
157
|
+
CREATE INDEX IF NOT EXISTS idx_swarm_contexts_bead ON swarm_contexts(bead_id);
|
|
158
|
+
`,
|
|
159
|
+
down: `DROP TABLE IF EXISTS swarm_contexts;`,
|
|
160
|
+
},
|
|
110
161
|
];
|
|
111
162
|
|
|
112
163
|
// ============================================================================
|
|
@@ -315,3 +315,190 @@ function pathMatches(path: string, pattern: string): boolean {
|
|
|
315
315
|
// Glob match using minimatch
|
|
316
316
|
return minimatch(path, pattern);
|
|
317
317
|
}
|
|
318
|
+
|
|
319
|
+
// ============================================================================
|
|
320
|
+
// Eval Records Projections
|
|
321
|
+
// ============================================================================
|
|
322
|
+
|
|
323
|
+
export interface EvalRecord {
|
|
324
|
+
id: string;
|
|
325
|
+
project_key: string;
|
|
326
|
+
task: string;
|
|
327
|
+
context: string | null;
|
|
328
|
+
strategy: string;
|
|
329
|
+
epic_title: string;
|
|
330
|
+
subtasks: Array<{
|
|
331
|
+
title: string;
|
|
332
|
+
files: string[];
|
|
333
|
+
priority?: number;
|
|
334
|
+
}>;
|
|
335
|
+
outcomes?: Array<{
|
|
336
|
+
bead_id: string;
|
|
337
|
+
planned_files: string[];
|
|
338
|
+
actual_files: string[];
|
|
339
|
+
duration_ms: number;
|
|
340
|
+
error_count: number;
|
|
341
|
+
retry_count: number;
|
|
342
|
+
success: boolean;
|
|
343
|
+
}>;
|
|
344
|
+
overall_success: boolean | null;
|
|
345
|
+
total_duration_ms: number | null;
|
|
346
|
+
total_errors: number | null;
|
|
347
|
+
human_accepted: boolean | null;
|
|
348
|
+
human_modified: boolean | null;
|
|
349
|
+
human_notes: string | null;
|
|
350
|
+
file_overlap_count: number | null;
|
|
351
|
+
scope_accuracy: number | null;
|
|
352
|
+
time_balance_ratio: number | null;
|
|
353
|
+
created_at: number;
|
|
354
|
+
updated_at: number;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
export interface EvalStats {
|
|
358
|
+
totalRecords: number;
|
|
359
|
+
successRate: number;
|
|
360
|
+
avgDurationMs: number;
|
|
361
|
+
byStrategy: Record<string, number>;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Get eval records with optional filters
|
|
366
|
+
*/
|
|
367
|
+
export async function getEvalRecords(
|
|
368
|
+
projectKey: string,
|
|
369
|
+
options?: { limit?: number; strategy?: string },
|
|
370
|
+
projectPath?: string,
|
|
371
|
+
): Promise<EvalRecord[]> {
|
|
372
|
+
const db = await getDatabase(projectPath);
|
|
373
|
+
|
|
374
|
+
const conditions = ["project_key = $1"];
|
|
375
|
+
const params: (string | number)[] = [projectKey];
|
|
376
|
+
let paramIndex = 2;
|
|
377
|
+
|
|
378
|
+
if (options?.strategy) {
|
|
379
|
+
conditions.push(`strategy = $${paramIndex++}`);
|
|
380
|
+
params.push(options.strategy);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const whereClause = conditions.join(" AND ");
|
|
384
|
+
let query = `
|
|
385
|
+
SELECT id, project_key, task, context, strategy, epic_title, subtasks,
|
|
386
|
+
outcomes, overall_success, total_duration_ms, total_errors,
|
|
387
|
+
human_accepted, human_modified, human_notes,
|
|
388
|
+
file_overlap_count, scope_accuracy, time_balance_ratio,
|
|
389
|
+
created_at, updated_at
|
|
390
|
+
FROM eval_records
|
|
391
|
+
WHERE ${whereClause}
|
|
392
|
+
ORDER BY created_at DESC
|
|
393
|
+
`;
|
|
394
|
+
|
|
395
|
+
if (options?.limit) {
|
|
396
|
+
query += ` LIMIT $${paramIndex}`;
|
|
397
|
+
params.push(options.limit);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const result = await db.query<{
|
|
401
|
+
id: string;
|
|
402
|
+
project_key: string;
|
|
403
|
+
task: string;
|
|
404
|
+
context: string | null;
|
|
405
|
+
strategy: string;
|
|
406
|
+
epic_title: string;
|
|
407
|
+
subtasks: string;
|
|
408
|
+
outcomes: string | null;
|
|
409
|
+
overall_success: boolean | null;
|
|
410
|
+
total_duration_ms: number | null;
|
|
411
|
+
total_errors: number | null;
|
|
412
|
+
human_accepted: boolean | null;
|
|
413
|
+
human_modified: boolean | null;
|
|
414
|
+
human_notes: string | null;
|
|
415
|
+
file_overlap_count: number | null;
|
|
416
|
+
scope_accuracy: number | null;
|
|
417
|
+
time_balance_ratio: number | null;
|
|
418
|
+
created_at: string;
|
|
419
|
+
updated_at: string;
|
|
420
|
+
}>(query, params);
|
|
421
|
+
|
|
422
|
+
return result.rows.map((row) => ({
|
|
423
|
+
id: row.id,
|
|
424
|
+
project_key: row.project_key,
|
|
425
|
+
task: row.task,
|
|
426
|
+
context: row.context,
|
|
427
|
+
strategy: row.strategy,
|
|
428
|
+
epic_title: row.epic_title,
|
|
429
|
+
// PGlite returns JSONB columns as already-parsed objects
|
|
430
|
+
subtasks:
|
|
431
|
+
typeof row.subtasks === "string"
|
|
432
|
+
? JSON.parse(row.subtasks)
|
|
433
|
+
: row.subtasks,
|
|
434
|
+
outcomes: row.outcomes
|
|
435
|
+
? typeof row.outcomes === "string"
|
|
436
|
+
? JSON.parse(row.outcomes)
|
|
437
|
+
: row.outcomes
|
|
438
|
+
: undefined,
|
|
439
|
+
overall_success: row.overall_success,
|
|
440
|
+
total_duration_ms: row.total_duration_ms,
|
|
441
|
+
total_errors: row.total_errors,
|
|
442
|
+
human_accepted: row.human_accepted,
|
|
443
|
+
human_modified: row.human_modified,
|
|
444
|
+
human_notes: row.human_notes,
|
|
445
|
+
file_overlap_count: row.file_overlap_count,
|
|
446
|
+
scope_accuracy: row.scope_accuracy,
|
|
447
|
+
time_balance_ratio: row.time_balance_ratio,
|
|
448
|
+
created_at: parseInt(row.created_at as string),
|
|
449
|
+
updated_at: parseInt(row.updated_at as string),
|
|
450
|
+
}));
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Get eval statistics for a project
|
|
455
|
+
*/
|
|
456
|
+
export async function getEvalStats(
|
|
457
|
+
projectKey: string,
|
|
458
|
+
projectPath?: string,
|
|
459
|
+
): Promise<EvalStats> {
|
|
460
|
+
const db = await getDatabase(projectPath);
|
|
461
|
+
|
|
462
|
+
// Get overall stats
|
|
463
|
+
const overallResult = await db.query<{
|
|
464
|
+
total_records: string;
|
|
465
|
+
success_count: string;
|
|
466
|
+
avg_duration: string;
|
|
467
|
+
}>(
|
|
468
|
+
`SELECT
|
|
469
|
+
COUNT(*) as total_records,
|
|
470
|
+
COUNT(*) FILTER (WHERE overall_success = true) as success_count,
|
|
471
|
+
AVG(total_duration_ms) as avg_duration
|
|
472
|
+
FROM eval_records
|
|
473
|
+
WHERE project_key = $1`,
|
|
474
|
+
[projectKey],
|
|
475
|
+
);
|
|
476
|
+
|
|
477
|
+
const totalRecords = parseInt(overallResult.rows[0]?.total_records || "0");
|
|
478
|
+
const successCount = parseInt(overallResult.rows[0]?.success_count || "0");
|
|
479
|
+
const avgDurationMs = parseFloat(overallResult.rows[0]?.avg_duration || "0");
|
|
480
|
+
|
|
481
|
+
// Get by-strategy breakdown
|
|
482
|
+
const strategyResult = await db.query<{
|
|
483
|
+
strategy: string;
|
|
484
|
+
count: string;
|
|
485
|
+
}>(
|
|
486
|
+
`SELECT strategy, COUNT(*) as count
|
|
487
|
+
FROM eval_records
|
|
488
|
+
WHERE project_key = $1
|
|
489
|
+
GROUP BY strategy`,
|
|
490
|
+
[projectKey],
|
|
491
|
+
);
|
|
492
|
+
|
|
493
|
+
const byStrategy: Record<string, number> = {};
|
|
494
|
+
for (const row of strategyResult.rows) {
|
|
495
|
+
byStrategy[row.strategy] = parseInt(row.count);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return {
|
|
499
|
+
totalRecords,
|
|
500
|
+
successRate: totalRecords > 0 ? successCount / totalRecords : 0,
|
|
501
|
+
avgDurationMs,
|
|
502
|
+
byStrategy,
|
|
503
|
+
};
|
|
504
|
+
}
|
package/src/streams/store.ts
CHANGED
|
@@ -531,6 +531,28 @@ async function updateMaterializedViews(
|
|
|
531
531
|
case "task_blocked":
|
|
532
532
|
// No-op for now - could add task tracking table later
|
|
533
533
|
break;
|
|
534
|
+
|
|
535
|
+
// Eval capture events - update eval_records projection
|
|
536
|
+
case "decomposition_generated":
|
|
537
|
+
await handleDecompositionGenerated(db, event);
|
|
538
|
+
break;
|
|
539
|
+
|
|
540
|
+
case "subtask_outcome":
|
|
541
|
+
await handleSubtaskOutcome(db, event);
|
|
542
|
+
break;
|
|
543
|
+
|
|
544
|
+
case "human_feedback":
|
|
545
|
+
await handleHumanFeedback(db, event);
|
|
546
|
+
break;
|
|
547
|
+
|
|
548
|
+
// Swarm checkpoint events - update swarm_contexts table
|
|
549
|
+
case "swarm_checkpointed":
|
|
550
|
+
await handleSwarmCheckpointed(db, event);
|
|
551
|
+
break;
|
|
552
|
+
|
|
553
|
+
case "swarm_recovered":
|
|
554
|
+
await handleSwarmRecovered(db, event);
|
|
555
|
+
break;
|
|
534
556
|
}
|
|
535
557
|
} catch (error) {
|
|
536
558
|
console.error("[SwarmMail] Failed to update materialized views", {
|
|
@@ -707,6 +729,259 @@ async function handleFileReleased(
|
|
|
707
729
|
}
|
|
708
730
|
}
|
|
709
731
|
|
|
732
|
+
async function handleDecompositionGenerated(
|
|
733
|
+
db: Awaited<ReturnType<typeof getDatabase>>,
|
|
734
|
+
event: AgentEvent & { id: number; sequence: number },
|
|
735
|
+
): Promise<void> {
|
|
736
|
+
if (event.type !== "decomposition_generated") return;
|
|
737
|
+
|
|
738
|
+
await db.query(
|
|
739
|
+
`INSERT INTO eval_records (
|
|
740
|
+
id, project_key, task, context, strategy, epic_title, subtasks,
|
|
741
|
+
created_at, updated_at
|
|
742
|
+
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $8)
|
|
743
|
+
ON CONFLICT (id) DO NOTHING`,
|
|
744
|
+
[
|
|
745
|
+
event.epic_id,
|
|
746
|
+
event.project_key,
|
|
747
|
+
event.task,
|
|
748
|
+
event.context || null,
|
|
749
|
+
event.strategy,
|
|
750
|
+
event.epic_title,
|
|
751
|
+
JSON.stringify(event.subtasks),
|
|
752
|
+
event.timestamp,
|
|
753
|
+
],
|
|
754
|
+
);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
async function handleSubtaskOutcome(
|
|
758
|
+
db: Awaited<ReturnType<typeof getDatabase>>,
|
|
759
|
+
event: AgentEvent & { id: number; sequence: number },
|
|
760
|
+
): Promise<void> {
|
|
761
|
+
if (event.type !== "subtask_outcome") return;
|
|
762
|
+
|
|
763
|
+
// Fetch current record to compute metrics
|
|
764
|
+
const result = await db.query<{
|
|
765
|
+
outcomes: string | null;
|
|
766
|
+
subtasks: string;
|
|
767
|
+
}>(`SELECT outcomes, subtasks FROM eval_records WHERE id = $1`, [
|
|
768
|
+
event.epic_id,
|
|
769
|
+
]);
|
|
770
|
+
|
|
771
|
+
if (!result.rows[0]) {
|
|
772
|
+
console.warn(
|
|
773
|
+
`[SwarmMail] No eval_record found for epic_id ${event.epic_id}`,
|
|
774
|
+
);
|
|
775
|
+
return;
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
const row = result.rows[0];
|
|
779
|
+
// PGlite returns JSONB columns as already-parsed objects
|
|
780
|
+
const subtasks = (
|
|
781
|
+
typeof row.subtasks === "string" ? JSON.parse(row.subtasks) : row.subtasks
|
|
782
|
+
) as Array<{
|
|
783
|
+
title: string;
|
|
784
|
+
files: string[];
|
|
785
|
+
}>;
|
|
786
|
+
const outcomes = row.outcomes
|
|
787
|
+
? ((typeof row.outcomes === "string"
|
|
788
|
+
? JSON.parse(row.outcomes)
|
|
789
|
+
: row.outcomes) as Array<{
|
|
790
|
+
bead_id: string;
|
|
791
|
+
planned_files: string[];
|
|
792
|
+
actual_files: string[];
|
|
793
|
+
duration_ms: number;
|
|
794
|
+
error_count: number;
|
|
795
|
+
retry_count: number;
|
|
796
|
+
success: boolean;
|
|
797
|
+
}>)
|
|
798
|
+
: [];
|
|
799
|
+
|
|
800
|
+
// Create new outcome
|
|
801
|
+
const newOutcome = {
|
|
802
|
+
bead_id: event.bead_id,
|
|
803
|
+
planned_files: event.planned_files,
|
|
804
|
+
actual_files: event.actual_files,
|
|
805
|
+
duration_ms: event.duration_ms,
|
|
806
|
+
error_count: event.error_count,
|
|
807
|
+
retry_count: event.retry_count,
|
|
808
|
+
success: event.success,
|
|
809
|
+
};
|
|
810
|
+
|
|
811
|
+
// Append to outcomes array
|
|
812
|
+
const updatedOutcomes = [...outcomes, newOutcome];
|
|
813
|
+
|
|
814
|
+
// Compute metrics
|
|
815
|
+
const fileOverlapCount = computeFileOverlap(subtasks);
|
|
816
|
+
const scopeAccuracy = computeScopeAccuracy(
|
|
817
|
+
event.planned_files,
|
|
818
|
+
event.actual_files,
|
|
819
|
+
);
|
|
820
|
+
const timeBalanceRatio = computeTimeBalanceRatio(updatedOutcomes);
|
|
821
|
+
const overallSuccess = updatedOutcomes.every((o) => o.success);
|
|
822
|
+
const totalDurationMs = updatedOutcomes.reduce(
|
|
823
|
+
(sum, o) => sum + o.duration_ms,
|
|
824
|
+
0,
|
|
825
|
+
);
|
|
826
|
+
const totalErrors = updatedOutcomes.reduce(
|
|
827
|
+
(sum, o) => sum + o.error_count,
|
|
828
|
+
0,
|
|
829
|
+
);
|
|
830
|
+
|
|
831
|
+
// Update record
|
|
832
|
+
await db.query(
|
|
833
|
+
`UPDATE eval_records SET
|
|
834
|
+
outcomes = $1,
|
|
835
|
+
file_overlap_count = $2,
|
|
836
|
+
scope_accuracy = $3,
|
|
837
|
+
time_balance_ratio = $4,
|
|
838
|
+
overall_success = $5,
|
|
839
|
+
total_duration_ms = $6,
|
|
840
|
+
total_errors = $7,
|
|
841
|
+
updated_at = $8
|
|
842
|
+
WHERE id = $9`,
|
|
843
|
+
[
|
|
844
|
+
JSON.stringify(updatedOutcomes),
|
|
845
|
+
fileOverlapCount,
|
|
846
|
+
scopeAccuracy,
|
|
847
|
+
timeBalanceRatio,
|
|
848
|
+
overallSuccess,
|
|
849
|
+
totalDurationMs,
|
|
850
|
+
totalErrors,
|
|
851
|
+
event.timestamp,
|
|
852
|
+
event.epic_id,
|
|
853
|
+
],
|
|
854
|
+
);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
async function handleHumanFeedback(
|
|
858
|
+
db: Awaited<ReturnType<typeof getDatabase>>,
|
|
859
|
+
event: AgentEvent & { id: number; sequence: number },
|
|
860
|
+
): Promise<void> {
|
|
861
|
+
if (event.type !== "human_feedback") return;
|
|
862
|
+
|
|
863
|
+
await db.query(
|
|
864
|
+
`UPDATE eval_records SET
|
|
865
|
+
human_accepted = $1,
|
|
866
|
+
human_modified = $2,
|
|
867
|
+
human_notes = $3,
|
|
868
|
+
updated_at = $4
|
|
869
|
+
WHERE id = $5`,
|
|
870
|
+
[
|
|
871
|
+
event.accepted,
|
|
872
|
+
event.modified,
|
|
873
|
+
event.notes || null,
|
|
874
|
+
event.timestamp,
|
|
875
|
+
event.epic_id,
|
|
876
|
+
],
|
|
877
|
+
);
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
async function handleSwarmCheckpointed(
|
|
881
|
+
db: Awaited<ReturnType<typeof getDatabase>>,
|
|
882
|
+
event: AgentEvent & { id: number; sequence: number },
|
|
883
|
+
): Promise<void> {
|
|
884
|
+
if (event.type !== "swarm_checkpointed") return;
|
|
885
|
+
|
|
886
|
+
await db.query(
|
|
887
|
+
`INSERT INTO swarm_contexts (
|
|
888
|
+
project_key, epic_id, bead_id, strategy, files, dependencies,
|
|
889
|
+
directives, recovery, checkpointed_at, updated_at
|
|
890
|
+
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $9)
|
|
891
|
+
ON CONFLICT (project_key, epic_id, bead_id) DO UPDATE SET
|
|
892
|
+
strategy = EXCLUDED.strategy,
|
|
893
|
+
files = EXCLUDED.files,
|
|
894
|
+
dependencies = EXCLUDED.dependencies,
|
|
895
|
+
directives = EXCLUDED.directives,
|
|
896
|
+
recovery = EXCLUDED.recovery,
|
|
897
|
+
checkpointed_at = EXCLUDED.checkpointed_at,
|
|
898
|
+
updated_at = EXCLUDED.updated_at`,
|
|
899
|
+
[
|
|
900
|
+
event.project_key,
|
|
901
|
+
event.epic_id,
|
|
902
|
+
event.bead_id,
|
|
903
|
+
event.strategy,
|
|
904
|
+
JSON.stringify(event.files),
|
|
905
|
+
JSON.stringify(event.dependencies),
|
|
906
|
+
JSON.stringify(event.directives),
|
|
907
|
+
JSON.stringify(event.recovery),
|
|
908
|
+
event.timestamp,
|
|
909
|
+
],
|
|
910
|
+
);
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
async function handleSwarmRecovered(
|
|
914
|
+
db: Awaited<ReturnType<typeof getDatabase>>,
|
|
915
|
+
event: AgentEvent & { id: number; sequence: number },
|
|
916
|
+
): Promise<void> {
|
|
917
|
+
if (event.type !== "swarm_recovered") return;
|
|
918
|
+
|
|
919
|
+
// Update swarm_contexts to mark as recovered
|
|
920
|
+
await db.query(
|
|
921
|
+
`UPDATE swarm_contexts SET
|
|
922
|
+
recovered_at = $1,
|
|
923
|
+
recovered_from_checkpoint = $2,
|
|
924
|
+
updated_at = $1
|
|
925
|
+
WHERE project_key = $3 AND epic_id = $4 AND bead_id = $5`,
|
|
926
|
+
[
|
|
927
|
+
event.timestamp,
|
|
928
|
+
event.recovered_from_checkpoint,
|
|
929
|
+
event.project_key,
|
|
930
|
+
event.epic_id,
|
|
931
|
+
event.bead_id,
|
|
932
|
+
],
|
|
933
|
+
);
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
// ============================================================================
|
|
937
|
+
// Metric Computation Helpers
|
|
938
|
+
// ============================================================================
|
|
939
|
+
|
|
940
|
+
/**
|
|
941
|
+
* Count files that appear in multiple subtasks
|
|
942
|
+
*/
|
|
943
|
+
function computeFileOverlap(subtasks: Array<{ files: string[] }>): number {
|
|
944
|
+
const fileCount = new Map<string, number>();
|
|
945
|
+
|
|
946
|
+
for (const subtask of subtasks) {
|
|
947
|
+
for (const file of subtask.files) {
|
|
948
|
+
fileCount.set(file, (fileCount.get(file) || 0) + 1);
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
return Array.from(fileCount.values()).filter((count) => count > 1).length;
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
/**
|
|
956
|
+
* Compute scope accuracy: intersection(actual, planned) / planned.length
|
|
957
|
+
*/
|
|
958
|
+
function computeScopeAccuracy(planned: string[], actual: string[]): number {
|
|
959
|
+
if (planned.length === 0) return 1.0;
|
|
960
|
+
|
|
961
|
+
const plannedSet = new Set(planned);
|
|
962
|
+
const intersection = actual.filter((file) => plannedSet.has(file));
|
|
963
|
+
|
|
964
|
+
return intersection.length / planned.length;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
/**
|
|
968
|
+
* Compute time balance ratio: max(duration) / min(duration)
|
|
969
|
+
* Lower is better (more balanced)
|
|
970
|
+
*/
|
|
971
|
+
function computeTimeBalanceRatio(
|
|
972
|
+
outcomes: Array<{ duration_ms: number }>,
|
|
973
|
+
): number | null {
|
|
974
|
+
if (outcomes.length === 0) return null;
|
|
975
|
+
|
|
976
|
+
const durations = outcomes.map((o) => o.duration_ms);
|
|
977
|
+
const max = Math.max(...durations);
|
|
978
|
+
const min = Math.min(...durations);
|
|
979
|
+
|
|
980
|
+
if (min === 0) return null;
|
|
981
|
+
|
|
982
|
+
return max / min;
|
|
983
|
+
}
|
|
984
|
+
|
|
710
985
|
// ============================================================================
|
|
711
986
|
// Convenience Functions
|
|
712
987
|
// ============================================================================
|
package/src/swarm-mail.ts
CHANGED
|
@@ -28,6 +28,7 @@ import {
|
|
|
28
28
|
checkSwarmHealth,
|
|
29
29
|
} from "./streams/swarm-mail";
|
|
30
30
|
import { getActiveReservations } from "./streams/projections";
|
|
31
|
+
import type { MailSessionState } from "./streams/events";
|
|
31
32
|
import {
|
|
32
33
|
existsSync,
|
|
33
34
|
mkdirSync,
|
|
@@ -47,13 +48,12 @@ interface ToolContext {
|
|
|
47
48
|
sessionID: string;
|
|
48
49
|
}
|
|
49
50
|
|
|
50
|
-
/**
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
}
|
|
51
|
+
/**
|
|
52
|
+
* Swarm Mail session state
|
|
53
|
+
* @deprecated Use MailSessionState from streams/events.ts instead
|
|
54
|
+
* This is kept for backward compatibility and re-exported as an alias
|
|
55
|
+
*/
|
|
56
|
+
export type SwarmMailState = MailSessionState;
|
|
57
57
|
|
|
58
58
|
/** Init tool arguments */
|
|
59
59
|
interface InitArgs {
|