opencode-swarm-plugin 0.40.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
  2. package/.hive/analysis/session-data-quality-audit.md +320 -0
  3. package/.hive/eval-results.json +481 -24
  4. package/.hive/issues.jsonl +67 -16
  5. package/.hive/memories.jsonl +159 -1
  6. package/.opencode/eval-history.jsonl +315 -0
  7. package/.turbo/turbo-build.log +5 -5
  8. package/CHANGELOG.md +165 -0
  9. package/README.md +2 -0
  10. package/SCORER-ANALYSIS.md +598 -0
  11. package/bin/eval-gate.test.ts +158 -0
  12. package/bin/eval-gate.ts +74 -0
  13. package/bin/swarm.serve.test.ts +46 -0
  14. package/bin/swarm.test.ts +661 -732
  15. package/bin/swarm.ts +335 -0
  16. package/dist/compaction-hook.d.ts +7 -5
  17. package/dist/compaction-hook.d.ts.map +1 -1
  18. package/dist/compaction-prompt-scoring.d.ts +1 -0
  19. package/dist/compaction-prompt-scoring.d.ts.map +1 -1
  20. package/dist/eval-runner.d.ts +134 -0
  21. package/dist/eval-runner.d.ts.map +1 -0
  22. package/dist/hive.d.ts.map +1 -1
  23. package/dist/index.d.ts +29 -0
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +99741 -58858
  26. package/dist/memory-tools.d.ts +70 -2
  27. package/dist/memory-tools.d.ts.map +1 -1
  28. package/dist/memory.d.ts +37 -0
  29. package/dist/memory.d.ts.map +1 -1
  30. package/dist/observability-tools.d.ts +64 -0
  31. package/dist/observability-tools.d.ts.map +1 -1
  32. package/dist/plugin.js +99356 -58318
  33. package/dist/swarm-orchestrate.d.ts.map +1 -1
  34. package/dist/swarm-prompts.d.ts +32 -1
  35. package/dist/swarm-prompts.d.ts.map +1 -1
  36. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
  37. package/evals/ARCHITECTURE.md +1189 -0
  38. package/evals/example.eval.ts +3 -4
  39. package/evals/fixtures/compaction-prompt-cases.ts +6 -0
  40. package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
  41. package/evals/scorers/coordinator-discipline.ts +0 -323
  42. package/evals/swarm-decomposition.eval.ts +4 -2
  43. package/package.json +4 -3
  44. package/src/compaction-prompt-scorers.test.ts +185 -9
  45. package/src/compaction-prompt-scoring.ts +7 -5
  46. package/src/eval-runner.test.ts +128 -1
  47. package/src/eval-runner.ts +46 -0
  48. package/src/hive.ts +43 -42
  49. package/src/memory-tools.test.ts +84 -0
  50. package/src/memory-tools.ts +68 -3
  51. package/src/memory.test.ts +2 -112
  52. package/src/memory.ts +88 -49
  53. package/src/observability-tools.test.ts +13 -0
  54. package/src/observability-tools.ts +277 -0
  55. package/src/swarm-orchestrate.test.ts +162 -0
  56. package/src/swarm-orchestrate.ts +7 -5
  57. package/src/swarm-prompts.test.ts +168 -4
  58. package/src/swarm-prompts.ts +228 -7
  59. package/.env +0 -2
  60. package/.turbo/turbo-test.log +0 -481
  61. package/.turbo/turbo-typecheck.log +0 -1
@@ -14,19 +14,18 @@ evalite("Example: Basic scorer test", {
14
14
  data: async () => {
15
15
  return [
16
16
  {
17
- input: "Test task",
18
- output: JSON.stringify({
17
+ input: {
19
18
  epic: { title: "Test Epic", description: "Test" },
20
19
  subtasks: [
21
20
  { title: "Subtask 1", files: ["a.ts"], estimated_complexity: 1 },
22
21
  { title: "Subtask 2", files: ["b.ts"], estimated_complexity: 1 },
23
22
  ],
24
- }),
23
+ },
25
24
  },
26
25
  ];
27
26
  },
28
27
  task: async (input) => {
29
- return input; // passthrough for testing
28
+ return JSON.stringify(input);
30
29
  },
31
30
  scorers: [subtaskIndependence],
32
31
  });
@@ -78,6 +78,8 @@ Coordinators do NOT edit code directly. These tools are FORBIDDEN:
78
78
  - edit
79
79
  - write
80
80
  - bash (for file modifications)
81
+ - swarmmail_reserve (only workers reserve)
82
+ - git commit (workers commit)
81
83
 
82
84
  Use swarm_spawn_subtask to delegate work to workers.
83
85
 
@@ -249,6 +251,8 @@ You are the COORDINATOR of epic mjkweh7q9n4.
249
251
  - edit
250
252
  - write
251
253
  - bash (for file mods)
254
+ - swarmmail_reserve (only workers)
255
+ - git commit (workers only)
252
256
 
253
257
  NEVER edit files yourself.
254
258
  ALWAYS delegate to workers.
@@ -289,6 +293,8 @@ You are coordinating epics:
289
293
  - edit
290
294
  - write
291
295
  - bash
296
+ - swarmmail_reserve
297
+ - git commit
292
298
 
293
299
  ALWAYS check status first.
294
300
  NEVER edit files directly.
@@ -5,7 +5,7 @@ import { describe, expect, it } from "bun:test";
5
5
  import type { CoordinatorSession } from "../../src/eval-capture.js";
6
6
  import {
7
7
  overallDiscipline,
8
- reviewEfficiency,
8
+
9
9
  reviewThoroughness,
10
10
  spawnEfficiency,
11
11
  timeToFirstSpawn,
@@ -537,164 +537,3 @@ describe("overallDiscipline", () => {
537
537
  });
538
538
  });
539
539
 
540
- describe("reviewEfficiency", () => {
541
- it("scores 1.0 for ideal 1:1 ratio (one review per spawn)", async () => {
542
- const session: CoordinatorSession = {
543
- session_id: "test-session",
544
- epic_id: "test-epic",
545
- start_time: "2025-01-01T00:00:00Z",
546
- events: [
547
- {
548
- session_id: "test-session",
549
- epic_id: "test-epic",
550
- timestamp: "2025-01-01T00:00:10Z",
551
- event_type: "DECISION",
552
- decision_type: "worker_spawned",
553
- payload: { bead_id: "bd-1" },
554
- },
555
- {
556
- session_id: "test-session",
557
- epic_id: "test-epic",
558
- timestamp: "2025-01-01T00:00:20Z",
559
- event_type: "DECISION",
560
- decision_type: "worker_spawned",
561
- payload: { bead_id: "bd-2" },
562
- },
563
- {
564
- session_id: "test-session",
565
- epic_id: "test-epic",
566
- timestamp: "2025-01-01T00:10:00Z",
567
- event_type: "DECISION",
568
- decision_type: "review_completed",
569
- payload: { bead_id: "bd-1" },
570
- },
571
- {
572
- session_id: "test-session",
573
- epic_id: "test-epic",
574
- timestamp: "2025-01-01T00:10:10Z",
575
- event_type: "DECISION",
576
- decision_type: "review_completed",
577
- payload: { bead_id: "bd-2" },
578
- },
579
- ],
580
- };
581
-
582
- const result = await reviewEfficiency({
583
- output: JSON.stringify(session),
584
- expected: {},
585
- input: undefined,
586
- });
587
-
588
- expect(result.score).toBe(1.0);
589
- expect(result.message).toContain("2 reviews / 2 spawns");
590
- });
591
-
592
- it("penalizes over-reviewing (>2:1 ratio)", async () => {
593
- // 6 reviews for 2 spawns = 3:1 ratio (over-reviewing)
594
- const session: CoordinatorSession = {
595
- session_id: "test-session",
596
- epic_id: "test-epic",
597
- start_time: "2025-01-01T00:00:00Z",
598
- events: [
599
- {
600
- session_id: "test-session",
601
- epic_id: "test-epic",
602
- timestamp: "2025-01-01T00:00:10Z",
603
- event_type: "DECISION",
604
- decision_type: "worker_spawned",
605
- payload: { bead_id: "bd-1" },
606
- },
607
- {
608
- session_id: "test-session",
609
- epic_id: "test-epic",
610
- timestamp: "2025-01-01T00:00:20Z",
611
- event_type: "DECISION",
612
- decision_type: "worker_spawned",
613
- payload: { bead_id: "bd-2" },
614
- },
615
- ...Array.from({ length: 6 }, (_, i) => ({
616
- session_id: "test-session",
617
- epic_id: "test-epic",
618
- timestamp: `2025-01-01T00:10:${String(i * 10).padStart(2, "0")}Z`,
619
- event_type: "DECISION" as const,
620
- decision_type: "review_completed" as const,
621
- payload: { bead_id: `bd-${(i % 2) + 1}` },
622
- })),
623
- ],
624
- };
625
-
626
- const result = await reviewEfficiency({
627
- output: JSON.stringify(session),
628
- expected: {},
629
- input: undefined,
630
- });
631
-
632
- // 3:1 ratio should be penalized (score < 0.5)
633
- expect(result.score).toBeLessThan(0.5);
634
- expect(result.message).toContain("6 reviews / 2 spawns");
635
- });
636
-
637
- it("handles no spawns gracefully", async () => {
638
- const session: CoordinatorSession = {
639
- session_id: "test-session",
640
- epic_id: "test-epic",
641
- start_time: "2025-01-01T00:00:00Z",
642
- events: [
643
- {
644
- session_id: "test-session",
645
- epic_id: "test-epic",
646
- timestamp: "2025-01-01T00:00:00Z",
647
- event_type: "DECISION",
648
- decision_type: "strategy_selected",
649
- payload: { strategy: "file-based" },
650
- },
651
- ],
652
- };
653
-
654
- const result = await reviewEfficiency({
655
- output: JSON.stringify(session),
656
- expected: {},
657
- input: undefined,
658
- });
659
-
660
- expect(result.score).toBe(1.0);
661
- expect(result.message).toContain("No workers spawned");
662
- });
663
-
664
- it("handles no reviews gracefully (0:N ratio)", async () => {
665
- const session: CoordinatorSession = {
666
- session_id: "test-session",
667
- epic_id: "test-epic",
668
- start_time: "2025-01-01T00:00:00Z",
669
- events: [
670
- {
671
- session_id: "test-session",
672
- epic_id: "test-epic",
673
- timestamp: "2025-01-01T00:00:10Z",
674
- event_type: "DECISION",
675
- decision_type: "worker_spawned",
676
- payload: { bead_id: "bd-1" },
677
- },
678
- {
679
- session_id: "test-session",
680
- epic_id: "test-epic",
681
- timestamp: "2025-01-01T00:00:20Z",
682
- event_type: "DECISION",
683
- decision_type: "worker_spawned",
684
- payload: { bead_id: "bd-2" },
685
- },
686
- ],
687
- };
688
-
689
- const result = await reviewEfficiency({
690
- output: JSON.stringify(session),
691
- expected: {},
692
- input: undefined,
693
- });
694
-
695
- // No reviews is bad (should use reviewThoroughness for this)
696
- // But this scorer focuses on over-reviewing, so no reviews = 1.0 (not over-reviewing)
697
- expect(result.score).toBe(1.0);
698
- expect(result.message).toContain("0 reviews / 2 spawns");
699
- });
700
- });
@@ -132,76 +132,6 @@ export const spawnEfficiency = createScorer({
132
132
  },
133
133
  });
134
134
 
135
- /**
136
- * Review Efficiency Scorer
137
- *
138
- * Measures review-to-spawn ratio to detect over-reviewing.
139
- * Ideal ratio is 1:1 (one review per spawned worker).
140
- * Penalizes >2:1 ratio (over-reviewing wastes context).
141
- *
142
- * Scoring:
143
- * - 0:N or 1:1 ratio = 1.0 (perfect)
144
- * - 2:1 ratio = 0.5 (threshold)
145
- * - >2:1 ratio = linear penalty toward 0.0
146
- *
147
- * Score: normalized to 0-1 (lower ratio is better)
148
- */
149
- export const reviewEfficiency = createScorer({
150
- name: "Review Efficiency",
151
- description: "Review-to-spawn ratio (penalize over-reviewing >2:1)",
152
- scorer: ({ output }) => {
153
- try {
154
- const session = JSON.parse(String(output)) as CoordinatorSession;
155
-
156
- // Count worker_spawned events
157
- const spawned = session.events.filter(
158
- (e) =>
159
- e.event_type === "DECISION" && e.decision_type === "worker_spawned"
160
- ).length;
161
-
162
- if (spawned === 0) {
163
- return {
164
- score: 1.0,
165
- message: "No workers spawned",
166
- };
167
- }
168
-
169
- // Count review_completed events
170
- const reviewed = session.events.filter(
171
- (e) =>
172
- e.event_type === "DECISION" && e.decision_type === "review_completed"
173
- ).length;
174
-
175
- const ratio = reviewed / spawned;
176
-
177
- // Scoring:
178
- // - ratio <= 1.0: perfect (1.0)
179
- // - ratio <= 2.0: linear decay from 1.0 to 0.5
180
- // - ratio > 2.0: linear penalty from 0.5 toward 0.0
181
- let score: number;
182
- if (ratio <= 1.0) {
183
- score = 1.0;
184
- } else if (ratio <= 2.0) {
185
- // Linear decay: 1.0 at ratio=1.0, 0.5 at ratio=2.0
186
- score = 1.0 - (ratio - 1.0) * 0.5;
187
- } else {
188
- // Penalty for extreme over-reviewing: 0.5 at ratio=2.0, 0.0 at ratio=4.0
189
- score = Math.max(0, 0.5 - (ratio - 2.0) * 0.25);
190
- }
191
-
192
- return {
193
- score,
194
- message: `${reviewed} reviews / ${spawned} spawns (${ratio.toFixed(1)}:1 ratio)`,
195
- };
196
- } catch (error) {
197
- return {
198
- score: 0,
199
- message: `Failed to parse CoordinatorSession: ${error}`,
200
- };
201
- }
202
- },
203
- });
204
-
205
135
  /**
206
136
  * Review Thoroughness Scorer
207
137
  *
@@ -334,259 +264,6 @@ export const timeToFirstSpawn = createScorer({
334
264
  },
335
265
  });
336
266
 
337
- /**
338
- * Researcher Spawn Rate Scorer
339
- *
340
- * Measures whether coordinator spawns researchers for unfamiliar technology.
341
- * Coordinators should delegate research instead of calling pdf-brain/context7 directly.
342
- *
343
- * Score: 1.0 if researcher_spawned events exist, 0.0 otherwise
344
- */
345
- export const researcherSpawnRate = createScorer({
346
- name: "Researcher Spawn Rate",
347
- description: "Coordinator spawned researchers for unfamiliar tech",
348
- scorer: ({ output }) => {
349
- try {
350
- const session = JSON.parse(String(output)) as CoordinatorSession;
351
-
352
- // Count researcher_spawned events
353
- const researchers = session.events.filter(
354
- (e) =>
355
- e.event_type === "DECISION" && e.decision_type === "researcher_spawned"
356
- );
357
-
358
- const count = researchers.length;
359
-
360
- if (count === 0) {
361
- return {
362
- score: 0.0,
363
- message: "No researchers spawned (may indicate coordinator queried docs directly)",
364
- };
365
- }
366
-
367
- return {
368
- score: 1.0,
369
- message: `${count} researcher(s) spawned`,
370
- };
371
- } catch (error) {
372
- return {
373
- score: 0,
374
- message: `Failed to parse CoordinatorSession: ${error}`,
375
- };
376
- }
377
- },
378
- });
379
-
380
- /**
381
- * Skill Loading Rate Scorer
382
- *
383
- * Measures whether coordinator loads relevant skills via skills_use().
384
- * Shows knowledge-seeking behavior.
385
- *
386
- * Score: 1.0 if skill_loaded events exist, 0.5 otherwise (not critical, but helpful)
387
- */
388
- export const skillLoadingRate = createScorer({
389
- name: "Skill Loading Rate",
390
- description: "Coordinator loaded relevant skills for domain knowledge",
391
- scorer: ({ output }) => {
392
- try {
393
- const session = JSON.parse(String(output)) as CoordinatorSession;
394
-
395
- // Count skill_loaded events
396
- const skills = session.events.filter(
397
- (e) =>
398
- e.event_type === "DECISION" && e.decision_type === "skill_loaded"
399
- );
400
-
401
- const count = skills.length;
402
-
403
- if (count === 0) {
404
- return {
405
- score: 0.5,
406
- message: "No skills loaded (not critical, but helpful)",
407
- };
408
- }
409
-
410
- return {
411
- score: 1.0,
412
- message: `${count} skill(s) loaded`,
413
- };
414
- } catch (error) {
415
- return {
416
- score: 0,
417
- message: `Failed to parse CoordinatorSession: ${error}`,
418
- };
419
- }
420
- },
421
- });
422
-
423
- /**
424
- * Inbox Monitoring Rate Scorer
425
- *
426
- * Measures how frequently coordinator checks inbox for worker messages.
427
- * Regular monitoring (every ~15min or when workers finish) shows good coordination.
428
- *
429
- * Score based on inbox_checked events relative to worker activity:
430
- * - 0 checks = 0.0 (coordinator not monitoring)
431
- * - 1+ checks = 1.0 (coordinator is responsive)
432
- */
433
- export const inboxMonitoringRate = createScorer({
434
- name: "Inbox Monitoring Rate",
435
- description: "Coordinator checked inbox regularly for worker messages",
436
- scorer: ({ output }) => {
437
- try {
438
- const session = JSON.parse(String(output)) as CoordinatorSession;
439
-
440
- // Count inbox_checked events
441
- const checks = session.events.filter(
442
- (e) =>
443
- e.event_type === "DECISION" && e.decision_type === "inbox_checked"
444
- );
445
-
446
- // Count worker activity (spawns + outcomes)
447
- const workerActivity = session.events.filter(
448
- (e) =>
449
- (e.event_type === "DECISION" && e.decision_type === "worker_spawned") ||
450
- (e.event_type === "OUTCOME" &&
451
- ["subtask_success", "subtask_failed", "blocker_detected"].includes(
452
- e.outcome_type
453
- ))
454
- );
455
-
456
- const checkCount = checks.length;
457
- const activityCount = workerActivity.length;
458
-
459
- if (activityCount === 0) {
460
- return {
461
- score: 1.0,
462
- message: "No worker activity to monitor",
463
- };
464
- }
465
-
466
- if (checkCount === 0) {
467
- return {
468
- score: 0.0,
469
- message: `${activityCount} worker events, 0 inbox checks (not monitoring)`,
470
- };
471
- }
472
-
473
- return {
474
- score: 1.0,
475
- message: `${checkCount} inbox check(s) for ${activityCount} worker events`,
476
- };
477
- } catch (error) {
478
- return {
479
- score: 0,
480
- message: `Failed to parse CoordinatorSession: ${error}`,
481
- };
482
- }
483
- },
484
- });
485
-
486
- /**
487
- * Blocker Response Time Scorer
488
- *
489
- * Measures how quickly coordinator responds to blocked workers.
490
- * Time between blocker_detected (OUTCOME) and blocker_resolved (DECISION).
491
- *
492
- * Normalization:
493
- * - < 5min: 1.0 (excellent)
494
- * - 5-15min: linear decay to 0.5
495
- * - > 15min: 0.0 (too slow, worker is idle)
496
- *
497
- * Score: Average response time across all blockers
498
- */
499
- export const blockerResponseTime = createScorer({
500
- name: "Blocker Response Time",
501
- description: "Coordinator unblocked workers quickly",
502
- scorer: ({ output }) => {
503
- try {
504
- const session = JSON.parse(String(output)) as CoordinatorSession;
505
-
506
- // Find blocker_detected events
507
- const blockers = session.events.filter(
508
- (e) =>
509
- e.event_type === "OUTCOME" && e.outcome_type === "blocker_detected"
510
- );
511
-
512
- if (blockers.length === 0) {
513
- return {
514
- score: 1.0,
515
- message: "No blockers detected",
516
- };
517
- }
518
-
519
- // Find blocker_resolved events
520
- const resolutions = session.events.filter(
521
- (e) =>
522
- e.event_type === "DECISION" && e.decision_type === "blocker_resolved"
523
- );
524
-
525
- if (resolutions.length === 0) {
526
- return {
527
- score: 0.0,
528
- message: `${blockers.length} blocker(s) detected, 0 resolved (workers still blocked)`,
529
- };
530
- }
531
-
532
- // Match blockers to resolutions by subtask_id and calculate response times
533
- const responseTimes: number[] = [];
534
- for (const blocker of blockers) {
535
- const subtaskId = (blocker.payload as any).subtask_id;
536
- const blockerTime = new Date(blocker.timestamp).getTime();
537
-
538
- // Find resolution for this subtask
539
- const resolution = resolutions.find(
540
- (r) => (r.payload as any).subtask_id === subtaskId
541
- );
542
-
543
- if (resolution) {
544
- const resolutionTime = new Date(resolution.timestamp).getTime();
545
- const deltaMs = resolutionTime - blockerTime;
546
- responseTimes.push(deltaMs);
547
- }
548
- }
549
-
550
- if (responseTimes.length === 0) {
551
- return {
552
- score: 0.5,
553
- message: `${blockers.length} blocker(s) detected, ${resolutions.length} resolution(s), but no matches by subtask_id`,
554
- };
555
- }
556
-
557
- // Calculate average response time
558
- const avgResponseMs =
559
- responseTimes.reduce((sum, t) => sum + t, 0) / responseTimes.length;
560
-
561
- // Normalize: < 5min = 1.0, > 15min = 0.0, linear in between
562
- const EXCELLENT_MS = 5 * 60 * 1000; // 5 min
563
- const POOR_MS = 15 * 60 * 1000; // 15 min
564
-
565
- let score: number;
566
- if (avgResponseMs < EXCELLENT_MS) {
567
- score = 1.0;
568
- } else if (avgResponseMs > POOR_MS) {
569
- score = 0.0;
570
- } else {
571
- // Linear decay from 1.0 to 0.0
572
- score = 1.0 - (avgResponseMs - EXCELLENT_MS) / (POOR_MS - EXCELLENT_MS);
573
- }
574
-
575
- const avgMinutes = Math.round(avgResponseMs / 1000 / 60);
576
-
577
- return {
578
- score,
579
- message: `Avg response time: ${avgMinutes}min (${responseTimes.length}/${blockers.length} blockers resolved)`,
580
- };
581
- } catch (error) {
582
- return {
583
- score: 0,
584
- message: `Failed to parse CoordinatorSession: ${error}`,
585
- };
586
- }
587
- },
588
- });
589
-
590
267
  /**
591
268
  * Overall Discipline Scorer
592
269
  *
@@ -34,7 +34,9 @@ import {
34
34
  } from "./lib/data-loader.js";
35
35
 
36
36
  // Determine project key from current directory
37
- const PROJECT_KEY = "opencode-swarm-plugin";
37
+ // NOTE: project_key in eval_records is the full path (from getHiveWorkingDirectory),
38
+ // not a short name. Use process.cwd() to match.
39
+ const PROJECT_KEY = process.cwd();
38
40
  const PROJECT_PATH = process.cwd();
39
41
 
40
42
  // Check if we have enough real data to use instead of fixtures
@@ -42,7 +44,7 @@ const useRealData = await hasRealEvalData(PROJECT_KEY, 5, PROJECT_PATH);
42
44
 
43
45
  // Load data based on availability
44
46
  const evalCases = useRealData
45
- ? await loadEvalCases(PROJECT_KEY, { limit: 20, projectPath: PROJECT_PATH })
47
+ ? await loadEvalCases(PROJECT_KEY, { limit: 20, projectPath: PROJECT_PATH }) // PROJECT_KEY is now process.cwd()
46
48
  : decompositionCases.map((testCase) => ({
47
49
  input: testCase.input,
48
50
  expected: testCase.expected,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-swarm-plugin",
3
- "version": "0.40.0",
3
+ "version": "0.42.1",
4
4
  "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -23,7 +23,7 @@
23
23
  "registry": "https://registry.npmjs.org/"
24
24
  },
25
25
  "scripts": {
26
- "build": "bun build ./src/index.ts --outdir ./dist --target node --external @electric-sql/pglite --external swarm-mail && bun build ./src/plugin.ts --outfile ./dist/plugin.js --target node --external @electric-sql/pglite --external swarm-mail && tsc",
26
+ "build": "bun build ./src/index.ts --outdir ./dist --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && bun build ./src/plugin.ts --outfile ./dist/plugin.js --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && tsc",
27
27
  "dev": "bun --watch src/index.ts",
28
28
  "test": "bun test --timeout 10000 src/anti-patterns.test.ts src/mandate-promotion.test.ts src/mandate-storage.test.ts src/output-guardrails.test.ts src/pattern-maturity.test.ts src/skills.test.ts src/structured.test.ts src/schemas/",
29
29
  "test:integration": "bun test --timeout 60000 src/*.integration.test.ts",
@@ -34,6 +34,7 @@
34
34
  "eval:decomposition": "bun --env-file=.env run bunx evalite run evals/swarm-decomposition.eval.ts",
35
35
  "eval:coordinator": "bun --env-file=.env run bunx evalite run evals/coordinator-session.eval.ts",
36
36
  "eval:compaction": "bun --env-file=.env run bunx evalite run evals/compaction-prompt.eval.ts",
37
+ "eval:gate": "bun run bin/eval-gate.ts",
37
38
  "migrate:sessions": "bun run scripts/migrate-unknown-sessions.ts",
38
39
  "postinstall": "node -e \"console.log('\\n\\x1b[33m Run \\x1b[36mswarm setup\\x1b[33m to configure OpenCode integration\\x1b[0m\\n')\""
39
40
  },
@@ -46,7 +47,7 @@
46
47
  "minimatch": "^10.1.1",
47
48
  "pino": "^9.6.0",
48
49
  "pino-roll": "^1.3.0",
49
- "swarm-mail": "1.5.2",
50
+ "swarm-mail": "1.5.4",
50
51
  "yaml": "^2.8.2",
51
52
  "zod": "4.1.8"
52
53
  },