@bradtaylorsf/alpha-loop 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/cli.js +80 -1
  2. package/dist/cli.js.map +1 -1
  3. package/dist/commands/eval.d.ts +53 -0
  4. package/dist/commands/eval.js +538 -0
  5. package/dist/commands/eval.js.map +1 -0
  6. package/dist/commands/evolve.d.ts +25 -0
  7. package/dist/commands/evolve.js +270 -0
  8. package/dist/commands/evolve.js.map +1 -0
  9. package/dist/commands/history.d.ts +1 -1
  10. package/dist/commands/history.js +4 -4
  11. package/dist/commands/history.js.map +1 -1
  12. package/dist/commands/run.js +139 -0
  13. package/dist/commands/run.js.map +1 -1
  14. package/dist/lib/agent.d.ts +8 -0
  15. package/dist/lib/agent.js +28 -3
  16. package/dist/lib/agent.js.map +1 -1
  17. package/dist/lib/config.d.ts +22 -0
  18. package/dist/lib/config.js +63 -0
  19. package/dist/lib/config.js.map +1 -1
  20. package/dist/lib/eval-checks.d.ts +91 -0
  21. package/dist/lib/eval-checks.js +254 -0
  22. package/dist/lib/eval-checks.js.map +1 -0
  23. package/dist/lib/eval-runner.d.ts +29 -0
  24. package/dist/lib/eval-runner.js +439 -0
  25. package/dist/lib/eval-runner.js.map +1 -0
  26. package/dist/lib/eval.d.ts +170 -0
  27. package/dist/lib/eval.js +507 -0
  28. package/dist/lib/eval.js.map +1 -0
  29. package/dist/lib/pipeline.d.ts +9 -0
  30. package/dist/lib/pipeline.js +186 -10
  31. package/dist/lib/pipeline.js.map +1 -1
  32. package/dist/lib/prompts.d.ts +18 -0
  33. package/dist/lib/prompts.js +48 -0
  34. package/dist/lib/prompts.js.map +1 -1
  35. package/dist/lib/score.d.ts +80 -0
  36. package/dist/lib/score.js +172 -0
  37. package/dist/lib/score.js.map +1 -0
  38. package/dist/lib/session.d.ts +2 -1
  39. package/dist/lib/session.js +16 -0
  40. package/dist/lib/session.js.map +1 -1
  41. package/dist/lib/traces.d.ts +173 -0
  42. package/dist/lib/traces.js +272 -0
  43. package/dist/lib/traces.js.map +1 -0
  44. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -8,7 +8,7 @@ import { syncCommand } from './commands/sync.js';
8
8
  program
9
9
  .name('alpha-loop')
10
10
  .description('Agent-agnostic automated development loop')
11
- .version('1.3.0');
11
+ .version('1.4.0');
12
12
  program
13
13
  .command('init')
14
14
  .description('Full project onboarding: config, templates, vision, scan, sync')
@@ -76,5 +76,84 @@ program
76
76
  const { reviewCommand } = await import('./commands/review.js');
77
77
  await reviewCommand(options);
78
78
  });
79
+ // Eval subcommands
80
+ const evalCmd = program
81
+ .command('eval')
82
+ .description('Run eval suite, capture failures, list cases, view scores');
83
+ evalCmd
84
+ .command('run', { isDefault: true })
85
+ .description('Run the eval suite and compute composite score')
86
+ .option('--tags <tags>', 'Filter by tags (comma-separated)')
87
+ .option('--suite <suite>', 'Run only a suite: step (fast) or e2e (slow)')
88
+ .option('--case <id>', 'Run a single eval case by ID prefix')
89
+ .option('--type <type>', 'Filter by type: full or step')
90
+ .option('--step <step>', 'Filter by pipeline step (plan, implement, test, review, verify)')
91
+ .option('--verbose', 'Show detailed output')
92
+ .action(async (options) => {
93
+ const { evalRunCommand } = await import('./commands/eval.js');
94
+ await evalRunCommand(options);
95
+ });
96
+ evalCmd
97
+ .command('capture [issue]')
98
+ .description('Capture failures as eval cases (interactive)')
99
+ .action(async (issue) => {
100
+ const { evalCaptureCommand } = await import('./commands/eval.js');
101
+ await evalCaptureCommand({ issue });
102
+ });
103
+ evalCmd
104
+ .command('list')
105
+ .description('Show eval cases and recent scores')
106
+ .action(async () => {
107
+ const { evalListCommand } = await import('./commands/eval.js');
108
+ evalListCommand();
109
+ });
110
+ evalCmd
111
+ .command('scores')
112
+ .description('Show score history over time')
113
+ .action(async () => {
114
+ const { evalScoresCommand } = await import('./commands/eval.js');
115
+ evalScoresCommand();
116
+ });
117
+ evalCmd
118
+ .command('search')
119
+ .description('Greedy search over model/agent configurations')
120
+ .option('--models <models>', 'Models to test (comma-separated)')
121
+ .option('--agents <agents>', 'Agents to test (comma-separated)')
122
+ .option('--max-runs <n>', 'Maximum number of eval runs')
123
+ .action(async (options) => {
124
+ const { evalSearchCommand } = await import('./commands/eval.js');
125
+ await evalSearchCommand(options);
126
+ });
127
+ evalCmd
128
+ .command('pareto')
129
+ .description('Show score/cost Pareto frontier')
130
+ .action(async () => {
131
+ const { evalParetoCommand } = await import('./commands/eval.js');
132
+ evalParetoCommand();
133
+ });
134
+ evalCmd
135
+ .command('compare <run1> <run2>')
136
+ .description('Compare two eval runs showing per-case changes')
137
+ .action(async (run1, run2) => {
138
+ const { evalCompareCommand } = await import('./commands/eval.js');
139
+ evalCompareCommand(run1, run2);
140
+ });
141
+ evalCmd
142
+ .command('import-swebench')
143
+ .description('Import eval cases from SWE-bench dataset')
144
+ .action(async () => {
145
+ const { evalImportSwebenchCommand } = await import('./commands/eval.js');
146
+ await evalImportSwebenchCommand();
147
+ });
148
+ program
149
+ .command('evolve')
150
+ .description('Meta-Harness-style automated optimization loop')
151
+ .option('--max-iterations <n>', 'Maximum optimization iterations (default: 5)')
152
+ .option('--dry-run', 'Preview without making changes')
153
+ .option('--verbose', 'Show detailed agent output')
154
+ .action(async (options) => {
155
+ const { evolveCommand } = await import('./commands/evolve.js');
156
+ await evolveCommand(options);
157
+ });
79
158
  program.parse();
80
159
  //# sourceMappingURL=cli.js.map
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,2CAA2C,CAAC;KACxD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,gEAAgE,CAAC;KAC7E,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC3D,MAAM,WAAW,EAAE,CAAC;AACtB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,2EAA2E,CAAC;KACxF,MAAM,CAAC,WAAW,EAAE,yBAAyB,CAAC;KAC9C,MAAM,CAAC,iBAAiB,EAAE,iBAAiB,CAAC;KAC5C,MAAM,CAAC,cAAc,EAAE,qBAAqB,CAAC;KAC7C,MAAM,CAAC,eAAe,EAAE,kBAAkB,CAAC;KAC3C,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,oBAAoB,EAAE,uCAAuC,CAAC;KACrE,MAAM,CAAC,cAAc,EAAE,kCAAkC,CAAC;KAC1D,MAAM,CAAC,qBAAqB,EAAE,wDAAwD,CAAC;KACvF,MAAM,CAAC,QAAQ,EAAE,4BAA4B,CAAC;KAC9C,MAAM,CAAC,WAAW,EAAE,sCAAsC,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;IACzD,IAAI,OAAO,CAAC,IAAI;QAAE,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxC,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,mBAAmB,CAAC;KAC5B,WAAW,CAAC,sBAAsB,CAAC;KACnC,MAAM,CAAC,MAAM,EAAE,+BAA+B,CAAC;KAC/C,MAAM,CAAC,SAAS,EAAE,yBAAyB,CAAC;KAC5C,MAAM,CAAC,cAAc,CAAC,CAAC;AAE1B,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,aAAa,CAAC,CAAC;AAEzB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,yDAAyD,CAAC;KACtE,MAAM,CAAC,SAAS,EAAE,iEAAiE,CAAC;KACpF,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,4DAA4D,CAAC;KACzE,MAAM,CAAC,eAAe,EAAE,qCAAqC,CAAC;KAC9D,MAAM,CAAC,kBAAkB,EAAE,0CAA0C,CAAC;KACtE,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/D,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;AAC/B,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,2FAA2F,CAAC;KACxG,MAAM,CAAC,SAAS,EAAE,4CAA4C,CAAC;KAC/D,MAAM,CAAC,kBAAkB,EAAE,gDAAgD,CAAC;KAC5E,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/D,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;AAC/B,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,2CAA2C,CAAC;KACxD,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,gEAAgE,CAAC;KAC7E,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC3D,MAAM,WAAW,EAAE,CAAC;AACtB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,2EAA2E,CAAC;KACxF,MAAM,CAAC,WAAW,EAAE,yBAAyB,CAAC;KAC9C,MAAM,CAAC,iBAAiB,EAAE,iBAAiB,CAAC;KAC5C,MAAM,CAAC,cAAc,EAAE,qBAAqB,CAAC;KAC7C,MAAM,CAAC,eAAe,EAAE,kBAAkB,CAAC;KAC3C,MAAM,CAAC,cAAc,EAAE,0BAA0B,CAAC;KAClD,MAAM,CAAC,oBAAoB,EAAE,uCAAuC,CAAC;KACrE,MAAM,CAAC,cAAc,EAAE,kCAAkC,CAAC;KAC1D,MAAM,CAAC,qBAAqB,EAAE,wDAAwD,CAAC;KACvF,MAAM,CAAC,QAAQ,EAAE,4BAA4B,CAAC;KAC9C,MAAM,CAAC,WAAW,EAAE,sCAAsC,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;IACzD,IAAI,OAAO,CAAC,IAAI;QAAE,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IACxC,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,mBAAmB,CAAC;KAC5B,WAAW,CAAC,sBAAsB,CAAC;KACnC,MAAM,CAAC,MAAM,EAAE,+BAA+B,CAAC;KAC/C,MAAM,CAAC,SAAS,EAAE,yBAAyB,CAAC;KAC5C,MAAM,CAAC,cAAc,CAAC,CAAC;AAE1B,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,aAAa,CAAC,CAAC;AAEzB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,kCAAkC,CAAC;KAC/C,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,yDAAyD,CAAC;KACtE,MAAM,CAAC,SAAS,EAAE,iEAAiE,CAAC;KACpF,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,4DAA4D,CAAC;KACzE,MAAM,CAAC,eAAe,EAAE,qCAAqC,CAAC;KAC9D,MAAM,CAAC,kBAAkB,EAAE,0CAA0C,CAAC;KACtE,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/D,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;AAC/B,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,2FAA2F,CAAC;KACxG,MAAM,CAAC,SAAS,EAAE,4CAA4C,CAAC;KAC/D,MAAM,CAAC,kBAAkB,EAAE,gDAAgD,CAAC;KAC5E,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/D,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;AAC/B,CAAC,CAAC,CAAC;AAEL,mBAAmB;AACnB,MAAM,OAAO,GAAG,OAAO;KACpB,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,2DAA2D,CAAC,CAAC;AAE5E,OAAO;KACJ,OAAO,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;KACnC,WAAW,CAAC,gDAAgD,CAAC;KAC7D,MAAM,CAAC,eAAe,EAAE,kCAAkC,CAAC;KAC3D,MAAM,CAAC,iBAAiB,EAAE,6CAA6C,CAAC;KACxE,MAAM,CAAC,aAAa,EAAE,qCAAqC,CAAC;KAC5D,MAAM,CAAC,eAAe,EAAE,8BAA8B,CAAC;KACvD,MAAM,CAAC,eAAe,EAAE,iEAAiE,CAAC;KAC1F,MAAM,CAAC,WAAW,EAAE,sBAAsB,CAAC;KAC3C,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC9D,MAAM,cAAc,CAAC,OAAO,CAAC,CAAC;AAChC,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,iBAAiB,CAAC;KAC1B,WAAW,CAAC,8CAA8C,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;IACtB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAClE,MAAM,kBAAkB,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,mCAAmC,CAAC;KAChD,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC/D,eAAe,EAAE,CAAC;AACpB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,8BAA8B,CAAC;KAC3C,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IACjE,iBAAiB,EAAE,CAAC;AACtB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,+CAA+C,CAAC;KAC5D,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,CAAC;KAC/D,MAAM,CAAC,mBAAmB,EAAE,kCAAkC,CAAC;KAC/D,MAAM,CAAC,gBAAgB,EAAE,6BAA6B,CAAC;KACvD,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IACjE,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,iCAAiC,CAAC;KAC9C,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IACjE,iBAAiB,EAAE,CAAC;AACtB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,uBAAuB,CAAC;KAChC,WAAW,CAAC,gDAAgD,CAAC;KAC7D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAAY,EAAE,EAAE;IAC3C,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAClE,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;AACjC,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,iBAAiB,CAAC;KAC1B,WAAW,CAAC,0CAA0C,CAAC;KACvD,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,MAAM,EAAE,yBAAyB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;IACzE,MAAM,yBAAyB,EAAE,CAAC;AACpC,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,gDAAgD,CAAC;KAC7D,MAAM,CAAC,sBAAsB,EAAE,8CAA8C,CAAC;KAC9E,MAAM,CAAC,WAAW,EAAE,gCAAgC,CAAC;KACrD,MAAM,CAAC,WAAW,EAAE,4BAA4B,CAAC;KACjD,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC/D,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;AAC/B,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,53 @@
1
+ export type EvalOptions = {
2
+ tags?: string;
3
+ suite?: string;
4
+ case?: string;
5
+ type?: 'full' | 'step';
6
+ step?: string;
7
+ verbose?: boolean;
8
+ };
9
+ export type EvalCaptureOptions = {
10
+ issue?: string;
11
+ };
12
+ export type EvalSearchOptions = {
13
+ models?: string;
14
+ agents?: string;
15
+ maxRuns?: string;
16
+ };
17
+ /**
18
+ * Run the eval suite.
19
+ */
20
+ export declare function evalRunCommand(options: EvalOptions): Promise<void>;
21
+ /**
22
+ * Compare two eval runs.
23
+ */
24
+ export declare function evalCompareCommand(run1: string, run2: string): void;
25
+ /**
26
+ * Capture a failure as an eval case — interactive walkthrough.
27
+ *
28
+ * Flow:
29
+ * 1. Show unannotated (auto-captured) skeleton cases first, prompt to annotate
30
+ * 2. Show recent session failures grouped by session
31
+ * 3. For each failure: show step, test/verify status, prompt for diagnosis
32
+ */
33
+ export declare function evalCaptureCommand(options: EvalCaptureOptions): Promise<void>;
34
+ /**
35
+ * List eval cases and recent scores.
36
+ */
37
+ export declare function evalListCommand(): void;
38
+ /**
39
+ * Show score history.
40
+ */
41
+ export declare function evalScoresCommand(): void;
42
+ /**
43
+ * Show score/cost Pareto frontier.
44
+ */
45
+ export declare function evalParetoCommand(): void;
46
+ /**
47
+ * Greedy search over model/agent configs.
48
+ */
49
+ export declare function evalSearchCommand(options: EvalSearchOptions): Promise<void>;
50
+ /**
51
+ * Import SWE-bench cases (placeholder — requires HuggingFace download).
52
+ */
53
+ export declare function evalImportSwebenchCommand(): Promise<void>;