opencode-swarm-plugin 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -315,3 +315,190 @@ function pathMatches(path: string, pattern: string): boolean {
315
315
  // Glob match using minimatch
316
316
  return minimatch(path, pattern);
317
317
  }
318
+
319
+ // ============================================================================
320
+ // Eval Records Projections
321
+ // ============================================================================
322
+
323
+ export interface EvalRecord {
324
+ id: string;
325
+ project_key: string;
326
+ task: string;
327
+ context: string | null;
328
+ strategy: string;
329
+ epic_title: string;
330
+ subtasks: Array<{
331
+ title: string;
332
+ files: string[];
333
+ priority?: number;
334
+ }>;
335
+ outcomes?: Array<{
336
+ bead_id: string;
337
+ planned_files: string[];
338
+ actual_files: string[];
339
+ duration_ms: number;
340
+ error_count: number;
341
+ retry_count: number;
342
+ success: boolean;
343
+ }>;
344
+ overall_success: boolean | null;
345
+ total_duration_ms: number | null;
346
+ total_errors: number | null;
347
+ human_accepted: boolean | null;
348
+ human_modified: boolean | null;
349
+ human_notes: string | null;
350
+ file_overlap_count: number | null;
351
+ scope_accuracy: number | null;
352
+ time_balance_ratio: number | null;
353
+ created_at: number;
354
+ updated_at: number;
355
+ }
356
+
357
+ export interface EvalStats {
358
+ totalRecords: number;
359
+ successRate: number;
360
+ avgDurationMs: number;
361
+ byStrategy: Record<string, number>;
362
+ }
363
+
364
+ /**
365
+ * Get eval records with optional filters
366
+ */
367
+ export async function getEvalRecords(
368
+ projectKey: string,
369
+ options?: { limit?: number; strategy?: string },
370
+ projectPath?: string,
371
+ ): Promise<EvalRecord[]> {
372
+ const db = await getDatabase(projectPath);
373
+
374
+ const conditions = ["project_key = $1"];
375
+ const params: (string | number)[] = [projectKey];
376
+ let paramIndex = 2;
377
+
378
+ if (options?.strategy) {
379
+ conditions.push(`strategy = $${paramIndex++}`);
380
+ params.push(options.strategy);
381
+ }
382
+
383
+ const whereClause = conditions.join(" AND ");
384
+ let query = `
385
+ SELECT id, project_key, task, context, strategy, epic_title, subtasks,
386
+ outcomes, overall_success, total_duration_ms, total_errors,
387
+ human_accepted, human_modified, human_notes,
388
+ file_overlap_count, scope_accuracy, time_balance_ratio,
389
+ created_at, updated_at
390
+ FROM eval_records
391
+ WHERE ${whereClause}
392
+ ORDER BY created_at DESC
393
+ `;
394
+
395
+ if (options?.limit) {
396
+ query += ` LIMIT $${paramIndex}`;
397
+ params.push(options.limit);
398
+ }
399
+
400
+ const result = await db.query<{
401
+ id: string;
402
+ project_key: string;
403
+ task: string;
404
+ context: string | null;
405
+ strategy: string;
406
+ epic_title: string;
407
+ subtasks: string;
408
+ outcomes: string | null;
409
+ overall_success: boolean | null;
410
+ total_duration_ms: number | null;
411
+ total_errors: number | null;
412
+ human_accepted: boolean | null;
413
+ human_modified: boolean | null;
414
+ human_notes: string | null;
415
+ file_overlap_count: number | null;
416
+ scope_accuracy: number | null;
417
+ time_balance_ratio: number | null;
418
+ created_at: string;
419
+ updated_at: string;
420
+ }>(query, params);
421
+
422
+ return result.rows.map((row) => ({
423
+ id: row.id,
424
+ project_key: row.project_key,
425
+ task: row.task,
426
+ context: row.context,
427
+ strategy: row.strategy,
428
+ epic_title: row.epic_title,
429
+ // PGlite returns JSONB columns as already-parsed objects
430
+ subtasks:
431
+ typeof row.subtasks === "string"
432
+ ? JSON.parse(row.subtasks)
433
+ : row.subtasks,
434
+ outcomes: row.outcomes
435
+ ? typeof row.outcomes === "string"
436
+ ? JSON.parse(row.outcomes)
437
+ : row.outcomes
438
+ : undefined,
439
+ overall_success: row.overall_success,
440
+ total_duration_ms: row.total_duration_ms,
441
+ total_errors: row.total_errors,
442
+ human_accepted: row.human_accepted,
443
+ human_modified: row.human_modified,
444
+ human_notes: row.human_notes,
445
+ file_overlap_count: row.file_overlap_count,
446
+ scope_accuracy: row.scope_accuracy,
447
+ time_balance_ratio: row.time_balance_ratio,
448
+ created_at: parseInt(row.created_at as string),
449
+ updated_at: parseInt(row.updated_at as string),
450
+ }));
451
+ }
452
+
453
+ /**
454
+ * Get eval statistics for a project
455
+ */
456
+ export async function getEvalStats(
457
+ projectKey: string,
458
+ projectPath?: string,
459
+ ): Promise<EvalStats> {
460
+ const db = await getDatabase(projectPath);
461
+
462
+ // Get overall stats
463
+ const overallResult = await db.query<{
464
+ total_records: string;
465
+ success_count: string;
466
+ avg_duration: string;
467
+ }>(
468
+ `SELECT
469
+ COUNT(*) as total_records,
470
+ COUNT(*) FILTER (WHERE overall_success = true) as success_count,
471
+ AVG(total_duration_ms) as avg_duration
472
+ FROM eval_records
473
+ WHERE project_key = $1`,
474
+ [projectKey],
475
+ );
476
+
477
+ const totalRecords = parseInt(overallResult.rows[0]?.total_records || "0");
478
+ const successCount = parseInt(overallResult.rows[0]?.success_count || "0");
479
+ const avgDurationMs = parseFloat(overallResult.rows[0]?.avg_duration || "0");
480
+
481
+ // Get by-strategy breakdown
482
+ const strategyResult = await db.query<{
483
+ strategy: string;
484
+ count: string;
485
+ }>(
486
+ `SELECT strategy, COUNT(*) as count
487
+ FROM eval_records
488
+ WHERE project_key = $1
489
+ GROUP BY strategy`,
490
+ [projectKey],
491
+ );
492
+
493
+ const byStrategy: Record<string, number> = {};
494
+ for (const row of strategyResult.rows) {
495
+ byStrategy[row.strategy] = parseInt(row.count);
496
+ }
497
+
498
+ return {
499
+ totalRecords,
500
+ successRate: totalRecords > 0 ? successCount / totalRecords : 0,
501
+ avgDurationMs,
502
+ byStrategy,
503
+ };
504
+ }
@@ -531,6 +531,28 @@ async function updateMaterializedViews(
531
531
  case "task_blocked":
532
532
  // No-op for now - could add task tracking table later
533
533
  break;
534
+
535
+ // Eval capture events - update eval_records projection
536
+ case "decomposition_generated":
537
+ await handleDecompositionGenerated(db, event);
538
+ break;
539
+
540
+ case "subtask_outcome":
541
+ await handleSubtaskOutcome(db, event);
542
+ break;
543
+
544
+ case "human_feedback":
545
+ await handleHumanFeedback(db, event);
546
+ break;
547
+
548
+ // Swarm checkpoint events - update swarm_contexts table
549
+ case "swarm_checkpointed":
550
+ await handleSwarmCheckpointed(db, event);
551
+ break;
552
+
553
+ case "swarm_recovered":
554
+ await handleSwarmRecovered(db, event);
555
+ break;
534
556
  }
535
557
  } catch (error) {
536
558
  console.error("[SwarmMail] Failed to update materialized views", {
@@ -707,6 +729,259 @@ async function handleFileReleased(
707
729
  }
708
730
  }
709
731
 
732
+ async function handleDecompositionGenerated(
733
+ db: Awaited<ReturnType<typeof getDatabase>>,
734
+ event: AgentEvent & { id: number; sequence: number },
735
+ ): Promise<void> {
736
+ if (event.type !== "decomposition_generated") return;
737
+
738
+ await db.query(
739
+ `INSERT INTO eval_records (
740
+ id, project_key, task, context, strategy, epic_title, subtasks,
741
+ created_at, updated_at
742
+ ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $8)
743
+ ON CONFLICT (id) DO NOTHING`,
744
+ [
745
+ event.epic_id,
746
+ event.project_key,
747
+ event.task,
748
+ event.context || null,
749
+ event.strategy,
750
+ event.epic_title,
751
+ JSON.stringify(event.subtasks),
752
+ event.timestamp,
753
+ ],
754
+ );
755
+ }
756
+
757
+ async function handleSubtaskOutcome(
758
+ db: Awaited<ReturnType<typeof getDatabase>>,
759
+ event: AgentEvent & { id: number; sequence: number },
760
+ ): Promise<void> {
761
+ if (event.type !== "subtask_outcome") return;
762
+
763
+ // Fetch current record to compute metrics
764
+ const result = await db.query<{
765
+ outcomes: string | null;
766
+ subtasks: string;
767
+ }>(`SELECT outcomes, subtasks FROM eval_records WHERE id = $1`, [
768
+ event.epic_id,
769
+ ]);
770
+
771
+ if (!result.rows[0]) {
772
+ console.warn(
773
+ `[SwarmMail] No eval_record found for epic_id ${event.epic_id}`,
774
+ );
775
+ return;
776
+ }
777
+
778
+ const row = result.rows[0];
779
+ // PGlite returns JSONB columns as already-parsed objects
780
+ const subtasks = (
781
+ typeof row.subtasks === "string" ? JSON.parse(row.subtasks) : row.subtasks
782
+ ) as Array<{
783
+ title: string;
784
+ files: string[];
785
+ }>;
786
+ const outcomes = row.outcomes
787
+ ? ((typeof row.outcomes === "string"
788
+ ? JSON.parse(row.outcomes)
789
+ : row.outcomes) as Array<{
790
+ bead_id: string;
791
+ planned_files: string[];
792
+ actual_files: string[];
793
+ duration_ms: number;
794
+ error_count: number;
795
+ retry_count: number;
796
+ success: boolean;
797
+ }>)
798
+ : [];
799
+
800
+ // Create new outcome
801
+ const newOutcome = {
802
+ bead_id: event.bead_id,
803
+ planned_files: event.planned_files,
804
+ actual_files: event.actual_files,
805
+ duration_ms: event.duration_ms,
806
+ error_count: event.error_count,
807
+ retry_count: event.retry_count,
808
+ success: event.success,
809
+ };
810
+
811
+ // Append to outcomes array
812
+ const updatedOutcomes = [...outcomes, newOutcome];
813
+
814
+ // Compute metrics
815
+ const fileOverlapCount = computeFileOverlap(subtasks);
816
+ const scopeAccuracy = computeScopeAccuracy(
817
+ event.planned_files,
818
+ event.actual_files,
819
+ );
820
+ const timeBalanceRatio = computeTimeBalanceRatio(updatedOutcomes);
821
+ const overallSuccess = updatedOutcomes.every((o) => o.success);
822
+ const totalDurationMs = updatedOutcomes.reduce(
823
+ (sum, o) => sum + o.duration_ms,
824
+ 0,
825
+ );
826
+ const totalErrors = updatedOutcomes.reduce(
827
+ (sum, o) => sum + o.error_count,
828
+ 0,
829
+ );
830
+
831
+ // Update record
832
+ await db.query(
833
+ `UPDATE eval_records SET
834
+ outcomes = $1,
835
+ file_overlap_count = $2,
836
+ scope_accuracy = $3,
837
+ time_balance_ratio = $4,
838
+ overall_success = $5,
839
+ total_duration_ms = $6,
840
+ total_errors = $7,
841
+ updated_at = $8
842
+ WHERE id = $9`,
843
+ [
844
+ JSON.stringify(updatedOutcomes),
845
+ fileOverlapCount,
846
+ scopeAccuracy,
847
+ timeBalanceRatio,
848
+ overallSuccess,
849
+ totalDurationMs,
850
+ totalErrors,
851
+ event.timestamp,
852
+ event.epic_id,
853
+ ],
854
+ );
855
+ }
856
+
857
+ async function handleHumanFeedback(
858
+ db: Awaited<ReturnType<typeof getDatabase>>,
859
+ event: AgentEvent & { id: number; sequence: number },
860
+ ): Promise<void> {
861
+ if (event.type !== "human_feedback") return;
862
+
863
+ await db.query(
864
+ `UPDATE eval_records SET
865
+ human_accepted = $1,
866
+ human_modified = $2,
867
+ human_notes = $3,
868
+ updated_at = $4
869
+ WHERE id = $5`,
870
+ [
871
+ event.accepted,
872
+ event.modified,
873
+ event.notes || null,
874
+ event.timestamp,
875
+ event.epic_id,
876
+ ],
877
+ );
878
+ }
879
+
880
+ async function handleSwarmCheckpointed(
881
+ db: Awaited<ReturnType<typeof getDatabase>>,
882
+ event: AgentEvent & { id: number; sequence: number },
883
+ ): Promise<void> {
884
+ if (event.type !== "swarm_checkpointed") return;
885
+
886
+ await db.query(
887
+ `INSERT INTO swarm_contexts (
888
+ project_key, epic_id, bead_id, strategy, files, dependencies,
889
+ directives, recovery, checkpointed_at, updated_at
890
+ ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $9)
891
+ ON CONFLICT (project_key, epic_id, bead_id) DO UPDATE SET
892
+ strategy = EXCLUDED.strategy,
893
+ files = EXCLUDED.files,
894
+ dependencies = EXCLUDED.dependencies,
895
+ directives = EXCLUDED.directives,
896
+ recovery = EXCLUDED.recovery,
897
+ checkpointed_at = EXCLUDED.checkpointed_at,
898
+ updated_at = EXCLUDED.updated_at`,
899
+ [
900
+ event.project_key,
901
+ event.epic_id,
902
+ event.bead_id,
903
+ event.strategy,
904
+ JSON.stringify(event.files),
905
+ JSON.stringify(event.dependencies),
906
+ JSON.stringify(event.directives),
907
+ JSON.stringify(event.recovery),
908
+ event.timestamp,
909
+ ],
910
+ );
911
+ }
912
+
913
+ async function handleSwarmRecovered(
914
+ db: Awaited<ReturnType<typeof getDatabase>>,
915
+ event: AgentEvent & { id: number; sequence: number },
916
+ ): Promise<void> {
917
+ if (event.type !== "swarm_recovered") return;
918
+
919
+ // Update swarm_contexts to mark as recovered
920
+ await db.query(
921
+ `UPDATE swarm_contexts SET
922
+ recovered_at = $1,
923
+ recovered_from_checkpoint = $2,
924
+ updated_at = $1
925
+ WHERE project_key = $3 AND epic_id = $4 AND bead_id = $5`,
926
+ [
927
+ event.timestamp,
928
+ event.recovered_from_checkpoint,
929
+ event.project_key,
930
+ event.epic_id,
931
+ event.bead_id,
932
+ ],
933
+ );
934
+ }
935
+
936
+ // ============================================================================
937
+ // Metric Computation Helpers
938
+ // ============================================================================
939
+
940
+ /**
941
+ * Count files that appear in multiple subtasks
942
+ */
943
+ function computeFileOverlap(subtasks: Array<{ files: string[] }>): number {
944
+ const fileCount = new Map<string, number>();
945
+
946
+ for (const subtask of subtasks) {
947
+ for (const file of subtask.files) {
948
+ fileCount.set(file, (fileCount.get(file) || 0) + 1);
949
+ }
950
+ }
951
+
952
+ return Array.from(fileCount.values()).filter((count) => count > 1).length;
953
+ }
954
+
955
+ /**
956
+ * Compute scope accuracy: intersection(actual, planned) / planned.length
957
+ */
958
+ function computeScopeAccuracy(planned: string[], actual: string[]): number {
959
+ if (planned.length === 0) return 1.0;
960
+
961
+ const plannedSet = new Set(planned);
962
+ const intersection = actual.filter((file) => plannedSet.has(file));
963
+
964
+ return intersection.length / planned.length;
965
+ }
966
+
967
+ /**
968
+ * Compute time balance ratio: max(duration) / min(duration)
969
+ * Lower is better (more balanced)
970
+ */
971
+ function computeTimeBalanceRatio(
972
+ outcomes: Array<{ duration_ms: number }>,
973
+ ): number | null {
974
+ if (outcomes.length === 0) return null;
975
+
976
+ const durations = outcomes.map((o) => o.duration_ms);
977
+ const max = Math.max(...durations);
978
+ const min = Math.min(...durations);
979
+
980
+ if (min === 0) return null;
981
+
982
+ return max / min;
983
+ }
984
+
710
985
  // ============================================================================
711
986
  // Convenience Functions
712
987
  // ============================================================================