snapeval 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/bin/snapeval.ts +30 -1
  2. package/dist/bin/snapeval.js +25 -0
  3. package/dist/bin/snapeval.js.map +1 -1
  4. package/dist/src/adapters/copilot-sdk-client.js +1 -1
  5. package/dist/src/adapters/copilot-sdk-client.js.map +1 -1
  6. package/dist/src/adapters/harness/copilot-sdk.d.ts +11 -0
  7. package/dist/src/adapters/harness/copilot-sdk.js +101 -0
  8. package/dist/src/adapters/harness/copilot-sdk.js.map +1 -0
  9. package/dist/src/adapters/harness/resolve.js +10 -2
  10. package/dist/src/adapters/harness/resolve.js.map +1 -1
  11. package/dist/src/adapters/inference/copilot-sdk.js +4 -1
  12. package/dist/src/adapters/inference/copilot-sdk.js.map +1 -1
  13. package/dist/src/adapters/report/terminal.js +89 -9
  14. package/dist/src/adapters/report/terminal.js.map +1 -1
  15. package/dist/src/commands/eval.d.ts +3 -0
  16. package/dist/src/commands/eval.js +106 -17
  17. package/dist/src/commands/eval.js.map +1 -1
  18. package/dist/src/commands/review.d.ts +1 -0
  19. package/dist/src/commands/review.js.map +1 -1
  20. package/dist/src/config.js +2 -1
  21. package/dist/src/config.js.map +1 -1
  22. package/dist/src/engine/grader.js +67 -9
  23. package/dist/src/engine/grader.js.map +1 -1
  24. package/dist/src/engine/runner.js +14 -12
  25. package/dist/src/engine/runner.js.map +1 -1
  26. package/dist/src/errors.d.ts +6 -0
  27. package/dist/src/errors.js +21 -3
  28. package/dist/src/errors.js.map +1 -1
  29. package/dist/src/types.d.ts +1 -0
  30. package/package.json +4 -1
  31. package/plugin.json +1 -1
  32. package/skills/snapeval/SKILL.md +33 -18
  33. package/src/adapters/copilot-sdk-client.ts +1 -1
  34. package/src/adapters/harness/copilot-sdk.ts +126 -0
  35. package/src/adapters/harness/resolve.ts +13 -2
  36. package/src/adapters/inference/copilot-sdk.ts +5 -1
  37. package/src/adapters/report/terminal.ts +100 -10
  38. package/src/commands/eval.ts +133 -31
  39. package/src/commands/review.ts +1 -1
  40. package/src/config.ts +2 -1
  41. package/src/engine/grader.ts +59 -8
  42. package/src/engine/runner.ts +14 -13
  43. package/src/errors.ts +24 -3
  44. package/src/types.ts +1 -0
package/bin/snapeval.ts CHANGED
@@ -7,6 +7,7 @@ import { evalCommand } from '../src/commands/eval.js';
7
7
  import { reviewCommand } from '../src/commands/review.js';
8
8
  import { TerminalReporter } from '../src/adapters/report/terminal.js';
9
9
  import { SnapevalError } from '../src/errors.js';
10
+ import { stopClient } from '../src/adapters/copilot-sdk-client.js';
10
11
  import * as path from 'node:path';
11
12
 
12
13
  const program = new Command();
@@ -24,6 +25,9 @@ program
24
25
  .option('--inference <inference>', 'Inference adapter to use')
25
26
  .option('--workspace <path>', 'Workspace directory')
26
27
  .option('--runs <n>', 'Runs per eval for statistical significance', '1')
28
+ .option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
29
+ .option('--only <ids>', 'Run only specific eval IDs (comma-separated, e.g. --only 1,3,5)')
30
+ .option('--threshold <rate>', 'Minimum pass rate (0-1) for exit code 0. Below threshold exits with code 1.')
27
31
  .option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
28
32
  .option('--verbose', 'Verbose output')
29
33
  .argument('[skill-dir]', 'Path to skill directory', process.cwd())
@@ -36,15 +40,26 @@ program
36
40
  inference: opts.inference as string,
37
41
  workspace: opts.workspace as string,
38
42
  runs: opts.runs ? parseInt(opts.runs as string, 10) : undefined,
43
+ concurrency: opts.concurrency ? parseInt(opts.concurrency as string, 10) : undefined,
39
44
  },
40
45
  process.cwd(), skillPath
41
46
  );
42
47
  const harness = resolveHarness(config.harness);
43
48
  const inference = resolveInference(config.inference);
44
49
 
50
+ const only = opts.only
51
+ ? (opts.only as string).split(',').map((s) => parseInt(s.trim(), 10))
52
+ : undefined;
53
+ const threshold = opts.threshold
54
+ ? parseFloat(opts.threshold as string)
55
+ : undefined;
56
+
45
57
  const results = await evalCommand(skillPath, harness, inference, {
46
58
  workspace: config.workspace,
47
59
  runs: config.runs,
60
+ concurrency: config.concurrency,
61
+ only,
62
+ threshold,
48
63
  oldSkill: opts.oldSkill as string | undefined,
49
64
  });
50
65
 
@@ -52,7 +67,15 @@ program
52
67
  await terminal.report(results);
53
68
  console.log(`Results at ${results.iterationDir}`);
54
69
  process.exit(0);
55
- } catch (err) { handleError(err); }
70
+ } catch (err: any) {
71
+ // ThresholdError has results attached — show them before failing
72
+ if (err.results) {
73
+ const terminal = new TerminalReporter();
74
+ await terminal.report(err.results);
75
+ console.log(`Results at ${err.results.iterationDir}`);
76
+ }
77
+ handleError(err);
78
+ }
56
79
  });
57
80
 
58
81
  // --- review ---
@@ -63,6 +86,7 @@ program
63
86
  .option('--inference <inference>', 'Inference adapter to use')
64
87
  .option('--workspace <path>', 'Workspace directory')
65
88
  .option('--runs <n>', 'Runs per eval for statistical significance', '1')
89
+ .option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
66
90
  .option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
67
91
  .option('--no-open', 'Do not open browser')
68
92
  .option('--verbose', 'Verbose output')
@@ -76,6 +100,7 @@ program
76
100
  inference: opts.inference as string,
77
101
  workspace: opts.workspace as string,
78
102
  runs: opts.runs ? parseInt(opts.runs as string, 10) : undefined,
103
+ concurrency: opts.concurrency ? parseInt(opts.concurrency as string, 10) : undefined,
79
104
  },
80
105
  process.cwd(), skillPath
81
106
  );
@@ -85,6 +110,7 @@ program
85
110
  await reviewCommand(skillPath, harness, inference, {
86
111
  workspace: config.workspace,
87
112
  runs: config.runs,
113
+ concurrency: config.concurrency,
88
114
  oldSkill: opts.oldSkill as string | undefined,
89
115
  noOpen: opts.open === false,
90
116
  });
@@ -92,6 +118,9 @@ program
92
118
  } catch (err) { handleError(err); }
93
119
  });
94
120
 
121
+ // Clean up SDK client on exit (no-op if never started)
122
+ process.on('exit', () => { stopClient().catch(() => {}); });
123
+
95
124
  function handleError(err: unknown): never {
96
125
  if (err instanceof SnapevalError) {
97
126
  console.error(`Error: ${err.message}`);
@@ -7,6 +7,7 @@ import { evalCommand } from '../src/commands/eval.js';
7
7
  import { reviewCommand } from '../src/commands/review.js';
8
8
  import { TerminalReporter } from '../src/adapters/report/terminal.js';
9
9
  import { SnapevalError } from '../src/errors.js';
10
+ import { stopClient } from '../src/adapters/copilot-sdk-client.js';
10
11
  import * as path from 'node:path';
11
12
  const program = new Command();
12
13
  program
@@ -21,6 +22,9 @@ program
21
22
  .option('--inference <inference>', 'Inference adapter to use')
22
23
  .option('--workspace <path>', 'Workspace directory')
23
24
  .option('--runs <n>', 'Runs per eval for statistical significance', '1')
25
+ .option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
26
+ .option('--only <ids>', 'Run only specific eval IDs (comma-separated, e.g. --only 1,3,5)')
27
+ .option('--threshold <rate>', 'Minimum pass rate (0-1) for exit code 0. Below threshold exits with code 1.')
24
28
  .option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
25
29
  .option('--verbose', 'Verbose output')
26
30
  .argument('[skill-dir]', 'Path to skill directory', process.cwd())
@@ -32,12 +36,22 @@ program
32
36
  inference: opts.inference,
33
37
  workspace: opts.workspace,
34
38
  runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
39
+ concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
35
40
  }, process.cwd(), skillPath);
36
41
  const harness = resolveHarness(config.harness);
37
42
  const inference = resolveInference(config.inference);
43
+ const only = opts.only
44
+ ? opts.only.split(',').map((s) => parseInt(s.trim(), 10))
45
+ : undefined;
46
+ const threshold = opts.threshold
47
+ ? parseFloat(opts.threshold)
48
+ : undefined;
38
49
  const results = await evalCommand(skillPath, harness, inference, {
39
50
  workspace: config.workspace,
40
51
  runs: config.runs,
52
+ concurrency: config.concurrency,
53
+ only,
54
+ threshold,
41
55
  oldSkill: opts.oldSkill,
42
56
  });
43
57
  const terminal = new TerminalReporter();
@@ -46,6 +60,12 @@ program
46
60
  process.exit(0);
47
61
  }
48
62
  catch (err) {
63
+ // ThresholdError has results attached — show them before failing
64
+ if (err.results) {
65
+ const terminal = new TerminalReporter();
66
+ await terminal.report(err.results);
67
+ console.log(`Results at ${err.results.iterationDir}`);
68
+ }
49
69
  handleError(err);
50
70
  }
51
71
  });
@@ -57,6 +77,7 @@ program
57
77
  .option('--inference <inference>', 'Inference adapter to use')
58
78
  .option('--workspace <path>', 'Workspace directory')
59
79
  .option('--runs <n>', 'Runs per eval for statistical significance', '1')
80
+ .option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
60
81
  .option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
61
82
  .option('--no-open', 'Do not open browser')
62
83
  .option('--verbose', 'Verbose output')
@@ -69,12 +90,14 @@ program
69
90
  inference: opts.inference,
70
91
  workspace: opts.workspace,
71
92
  runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
93
+ concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
72
94
  }, process.cwd(), skillPath);
73
95
  const harness = resolveHarness(config.harness);
74
96
  const inference = resolveInference(config.inference);
75
97
  await reviewCommand(skillPath, harness, inference, {
76
98
  workspace: config.workspace,
77
99
  runs: config.runs,
100
+ concurrency: config.concurrency,
78
101
  oldSkill: opts.oldSkill,
79
102
  noOpen: opts.open === false,
80
103
  });
@@ -84,6 +107,8 @@ program
84
107
  handleError(err);
85
108
  }
86
109
  });
110
+ // Clean up SDK client on exit (no-op if never started)
111
+ process.on('exit', () => { stopClient().catch(() => { }); });
87
112
  function handleError(err) {
88
113
  if (err instanceof SnapevalError) {
89
114
  console.error(`Error: ${err.message}`);
@@ -1 +1 @@
1
- {"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SAChE,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
1
+ {"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,uCAAuC,CAAC;AACnE,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,EAAE,GAAG,CAAC;KAClF,MAAM,CAAC,cAAc,EAAE,iEAAiE,CAAC;KACzF,MAAM,CAAC,oBAAoB,EAAE,6EAA6E,CAAC;KAC3G,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAqB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SACrF,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI;YACpB,CAAC,CAAE,IAAI,CAAC,IAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;YACrE,CAAC,CAAC,SAAS,CAAC;QACd,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS;YAC9B,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,SAAmB,CAAC;YACtC,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,IAAI;YACJ,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,iEAAiE;QACjE,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;YAChB,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;YACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,WAAW,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,EAAE,GAAG,CAAC;KAClF,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAqB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SACrF,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,uDAAuD;AACvD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,UAAU,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAE5D,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -25,7 +25,7 @@ export async function getClient() {
25
25
  if (!CopilotClient) {
26
26
  throw new Error('Could not find CopilotClient export in @github/copilot-sdk. The package may have changed its API.');
27
27
  }
28
- clientInstance = new CopilotClient();
28
+ clientInstance = new CopilotClient({ logLevel: 'none' });
29
29
  await clientInstance.start();
30
30
  clientStarted = true;
31
31
  return clientInstance;
@@ -1 +1 @@
1
- {"version":3,"file":"copilot-sdk-client.js","sourceRoot":"","sources":["../../../src/adapters/copilot-sdk-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,iEAAiE;AACjE,4DAA4D;AAC5D,IAAI,cAAc,GAAQ,IAAI,CAAC;AAC/B,IAAI,aAAa,GAAG,KAAK,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,IAAI,cAAc,IAAI,aAAa;QAAE,OAAO,cAAc,CAAC;IAE3D,IAAI,GAAQ,CAAC;IACb,IAAI,CAAC;QACH,+DAA+D;QAC/D,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,MAAM,aAAa,GAAG,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC;IACtE,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,cAAc,GAAG,IAAI,aAAa,EAAE,CAAC;IACrC,MAAM,cAAc,CAAC,KAAK,EAAE,CAAC;IAC7B,aAAa,GAAG,IAAI,CAAC;IACrB,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,IAAI,cAAc,IAAI,aAAa,EAAE,CAAC;QACpC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QAC5B,aAAa,GAAG,KAAK,CAAC;QACtB,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,iEAAiE;IACjE,mEAAmE;IACnE,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IACxB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,EAAE,SAAS,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;QAC3F,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO,IAAI,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM;QAC1B,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
1
+ {"version":3,"file":"copilot-sdk-client.js","sourceRoot":"","sources":["../../../src/adapters/copilot-sdk-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,iEAAiE;AACjE,4DAA4D;AAC5D,IAAI,cAAc,GAAQ,IAAI,CAAC;AAC/B,IAAI,aAAa,GAAG,KAAK,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,IAAI,cAAc,IAAI,aAAa;QAAE,OAAO,cAAc,CAAC;IAE3D,IAAI,GAAQ,CAAC;IACb,IAAI,CAAC;QACH,+DAA+D;QAC/D,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,MAAM,aAAa,GAAG,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC;IACtE,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,cAAc,GAAG,IAAI,aAAa,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,MAAM,cAAc,CAAC,KAAK,EAAE,CAAC;IAC7B,aAAa,GAAG,IAAI,CAAC;IACrB,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,IAAI,cAAc,IAAI,aAAa,EAAE,CAAC;QACpC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QAC5B,aAAa,GAAG,KAAK,CAAC;QACtB,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,iEAAiE;IACjE,mEAAmE;IACnE,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IACxB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,EAAE,SAAS,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;QAC3F,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO,IAAI,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM;QAC1B,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { Harness, HarnessRunResult } from '../../types.js';
2
+ export declare class CopilotSDKHarness implements Harness {
3
+ readonly name = "copilot-sdk";
4
+ run(options: {
5
+ skillPath?: string;
6
+ prompt: string;
7
+ files?: string[];
8
+ outputDir: string;
9
+ }): Promise<HarnessRunResult>;
10
+ isAvailable(): Promise<boolean>;
11
+ }
@@ -0,0 +1,101 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ import { getClient, isSDKInstalled } from '../copilot-sdk-client.js';
4
+ export class CopilotSDKHarness {
5
+ name = 'copilot-sdk';
6
+ async run(options) {
7
+ const startMs = Date.now();
8
+ const client = await getClient();
9
+ fs.mkdirSync(options.outputDir, { recursive: true });
10
+ // Dynamically import SDK for approveAll
11
+ // @ts-ignore — module may not be installed (optional dep)
12
+ const { approveAll } = await import('@github/copilot-sdk');
13
+ // Build session config
14
+ const sessionConfig = {
15
+ model: 'gpt-4.1',
16
+ onPermissionRequest: approveAll,
17
+ workingDirectory: options.outputDir,
18
+ infiniteSessions: { enabled: false },
19
+ };
20
+ // Native skill loading: point skillDirectories at the skill's parent
21
+ if (options.skillPath) {
22
+ sessionConfig.skillDirectories = [options.skillPath];
23
+ }
24
+ const session = await client.createSession(sessionConfig);
25
+ try {
26
+ // Attach input files if provided
27
+ const attachments = [];
28
+ if (options.files) {
29
+ for (const file of options.files) {
30
+ // Copy to outputDir for script assertions, and attach for the model
31
+ const dest = path.join(options.outputDir, path.basename(file));
32
+ fs.copyFileSync(file, dest);
33
+ attachments.push({ type: 'file', path: dest, displayName: path.basename(file) });
34
+ }
35
+ }
36
+ const response = await session.sendAndWait({
37
+ prompt: options.prompt,
38
+ ...(attachments.length > 0 ? { attachments } : {}),
39
+ }, 300_000);
40
+ const raw = response?.data?.content ?? '';
41
+ // Collect full transcript from session events
42
+ const events = await session.getMessages();
43
+ const transcript = buildTranscript(events);
44
+ // Extract token count from events if available
45
+ const totalTokens = extractTokenCount(events);
46
+ const durationMs = Date.now() - startMs;
47
+ return {
48
+ raw: raw.trim(),
49
+ transcript,
50
+ files: [],
51
+ total_tokens: totalTokens,
52
+ duration_ms: durationMs,
53
+ };
54
+ }
55
+ finally {
56
+ await session.disconnect();
57
+ }
58
+ }
59
+ async isAvailable() {
60
+ return isSDKInstalled();
61
+ }
62
+ }
63
+ function buildTranscript(events) {
64
+ const lines = [];
65
+ for (const event of events) {
66
+ switch (event.type) {
67
+ case 'user.message':
68
+ lines.push(`[user] ${event.data?.content ?? ''}`);
69
+ break;
70
+ case 'assistant.message':
71
+ lines.push(`[assistant] ${event.data?.content ?? ''}`);
72
+ break;
73
+ case 'tool.execution_start':
74
+ lines.push(`[tool:start] ${event.data?.toolName ?? 'unknown'}(${JSON.stringify(event.data?.arguments ?? {})})`);
75
+ break;
76
+ case 'tool.execution_complete':
77
+ lines.push(`[tool:done] ${event.data?.toolName ?? 'unknown'} → ${truncate(event.data?.result ?? '', 200)}`);
78
+ break;
79
+ case 'skill.invoked':
80
+ lines.push(`[skill] ${event.data?.name ?? 'unknown'} (${event.data?.path ?? ''})`);
81
+ break;
82
+ case 'session.error':
83
+ lines.push(`[error] ${event.data?.message ?? ''}`);
84
+ break;
85
+ }
86
+ }
87
+ return lines.join('\n');
88
+ }
89
+ function extractTokenCount(events) {
90
+ let total = 0;
91
+ for (const event of events) {
92
+ if (event.type === 'assistant.usage') {
93
+ total += (event.data?.inputTokens ?? 0) + (event.data?.outputTokens ?? 0);
94
+ }
95
+ }
96
+ return total;
97
+ }
98
+ function truncate(str, max) {
99
+ return str.length > max ? str.slice(0, max) + '...' : str;
100
+ }
101
+ //# sourceMappingURL=copilot-sdk.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"copilot-sdk.js","sourceRoot":"","sources":["../../../../src/adapters/harness/copilot-sdk.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAErE,MAAM,OAAO,iBAAiB;IACnB,IAAI,GAAG,aAAa,CAAC;IAE9B,KAAK,CAAC,GAAG,CAAC,OAKT;QACC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;QAEjC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAErD,wCAAwC;QACxC,0DAA0D;QAC1D,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;QAE3D,uBAAuB;QACvB,MAAM,aAAa,GAA4B;YAC7C,KAAK,EAAE,SAAS;YAChB,mBAAmB,EAAE,UAAU;YAC/B,gBAAgB,EAAE,OAAO,CAAC,SAAS;YACnC,gBAAgB,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;SACrC,CAAC;QAEF,qEAAqE;QACrE,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,aAAa,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;QAE1D,IAAI,CAAC;YACH,iCAAiC;YACjC,MAAM,WAAW,GAAgE,EAAE,CAAC;YACpF,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClB,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBACjC,oEAAoE;oBACpE,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC/D,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;oBAC5B,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACnF,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,CACxC;gBACE,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACnD,EACD,OAAO,CACR,CAAC;YAEF,MAAM,GAAG,GAAG,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;YAE1C,8CAA8C;YAC9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YAE3C,+CAA+C;YAC/C,MAAM,WAAW,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;YAE9C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;YAExC,OAAO;gBACL,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE;gBACf,UAAU;gBACV,KAAK,EAAE,EAAE;gBACT,YAAY,EAAE,WAAW;gBACzB,WAAW,EAAE,UAAU;aACxB,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,cAAc,EAAE,CAAC;IAC1B,CAAC;CACF;AAED,SAAS,eAAe,CAAC,MAAa;IACpC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBAClD,MAAM;YACR,KAAK,mBAAmB;gBACtB,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBACvD,MAAM;YACR,KAAK,sBAAsB;gBACzB,KAAK,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;gBAChH,MAAM;YACR,KAAK,yBAAyB;gBAC5B,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,IAAI,EAAE,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC5G,MAAM;YACR,KAAK,eAAe;gBAClB,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,EAAE,GAAG,CAAC,CAAC;gBACnF,MAAM;YACR,KAAK,eAAe;gBAClB,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBACnD,MAAM;QACV,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CAAC,MAAa;IACtC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;YACrC,KAAK,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,GAAW,EAAE,GAAW;IACxC,OAAO,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC;AAC5D,CAAC"}
@@ -1,9 +1,17 @@
1
1
  import { CopilotCLIHarness } from './copilot-cli.js';
2
- import { SnapevalError } from '../../errors.js';
2
+ import { CopilotSDKHarness } from './copilot-sdk.js';
3
+ import { AdapterNotAvailableError, SnapevalError } from '../../errors.js';
4
+ import { isSDKInstalled } from '../copilot-sdk-client.js';
3
5
  export function resolveHarness(name) {
6
+ if (name === 'copilot-sdk') {
7
+ if (!isSDKInstalled()) {
8
+ throw new AdapterNotAvailableError('copilot-sdk', '@github/copilot-sdk is not installed. Install with: npm install @github/copilot-sdk');
9
+ }
10
+ return new CopilotSDKHarness();
11
+ }
4
12
  if (name === 'copilot-cli') {
5
13
  return new CopilotCLIHarness();
6
14
  }
7
- throw new SnapevalError(`Unknown harness "${name}". Built-in options: copilot-cli.`);
15
+ throw new SnapevalError(`Unknown harness "${name}". Built-in options: copilot-sdk, copilot-cli.`);
8
16
  }
9
17
  //# sourceMappingURL=resolve.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../../../src/adapters/harness/resolve.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QAC3B,OAAO,IAAI,iBAAiB,EAAE,CAAC;IACjC,CAAC;IACD,MAAM,IAAI,aAAa,CAAC,oBAAoB,IAAI,mCAAmC,CAAC,CAAC;AACvF,CAAC"}
1
+ {"version":3,"file":"resolve.js","sourceRoot":"","sources":["../../../../src/adapters/harness/resolve.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,wBAAwB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE1D,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QAC3B,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;YACtB,MAAM,IAAI,wBAAwB,CAChC,aAAa,EACb,qFAAqF,CACtF,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,iBAAiB,EAAE,CAAC;IACjC,CAAC;IACD,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QAC3B,OAAO,IAAI,iBAAiB,EAAE,CAAC;IACjC,CAAC;IACD,MAAM,IAAI,aAAa,CAAC,oBAAoB,IAAI,gDAAgD,CAAC,CAAC;AACpG,CAAC"}
@@ -3,6 +3,8 @@ export class CopilotSDKInference {
3
3
  name = 'copilot-sdk';
4
4
  async chat(messages, _options) {
5
5
  const client = await getClient();
6
+ // @ts-ignore — module may not be installed (optional dep)
7
+ const { approveAll } = await import('@github/copilot-sdk');
6
8
  const systemMessages = messages.filter((m) => m.role === 'system');
7
9
  const nonSystemMessages = messages.filter((m) => m.role !== 'system');
8
10
  const systemContent = systemMessages.map((m) => m.content).join('\n');
@@ -12,7 +14,8 @@ export class CopilotSDKInference {
12
14
  ...(systemContent
13
15
  ? { systemMessage: { content: systemContent } }
14
16
  : {}),
15
- onPermissionRequest: async () => ({ kind: 'approved' }),
17
+ onPermissionRequest: approveAll,
18
+ infiniteSessions: { enabled: false },
16
19
  });
17
20
  try {
18
21
  const response = await session.sendAndWait({ prompt: userPrompt });
@@ -1 +1 @@
1
- {"version":3,"file":"copilot-sdk.js","sourceRoot":"","sources":["../../../../src/adapters/inference/copilot-sdk.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,MAAM,OAAO,mBAAmB;IACrB,IAAI,GAAG,aAAa,CAAC;IAE9B,KAAK,CAAC,IAAI,CAAC,QAAmB,EAAE,QAAsB;QACpD,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;QAEjC,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACnE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACtE,MAAM,aAAa,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtE,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEtE,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC;YACzC,KAAK,EAAE,SAAS;YAChB,GAAG,CAAC,aAAa;gBACf,CAAC,CAAC,EAAE,aAAa,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;gBAC/C,CAAC,CAAC,EAAE,CAAC;YACP,mBAAmB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;SACxD,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;YACnE,OAAO,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7B,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"copilot-sdk.js","sourceRoot":"","sources":["../../../../src/adapters/inference/copilot-sdk.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,MAAM,OAAO,mBAAmB;IACrB,IAAI,GAAG,aAAa,CAAC;IAE9B,KAAK,CAAC,IAAI,CAAC,QAAmB,EAAE,QAAsB;QACpD,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;QAEjC,0DAA0D;QAC1D,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;QAE3D,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACnE,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QACtE,MAAM,aAAa,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtE,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEtE,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC;YACzC,KAAK,EAAE,SAAS;YAChB,GAAG,CAAC,aAAa;gBACf,CAAC,CAAC,EAAE,aAAa,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;gBAC/C,CAAC,CAAC,EAAE,CAAC;YACP,mBAAmB,EAAE,UAAU;YAC/B,gBAAgB,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;YACnE,OAAO,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7B,CAAC;IACH,CAAC;CACF"}
@@ -1,25 +1,105 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
1
3
  import chalk from 'chalk';
4
+ function loadPreviousIteration(iterationDir) {
5
+ const workspaceDir = path.dirname(iterationDir);
6
+ const currentName = path.basename(iterationDir);
7
+ const currentNum = parseInt(currentName.replace('iteration-', ''), 10);
8
+ if (isNaN(currentNum) || currentNum <= 1)
9
+ return null;
10
+ const prevDir = path.join(workspaceDir, `iteration-${currentNum - 1}`);
11
+ const prevBenchmarkPath = path.join(prevDir, 'benchmark.json');
12
+ if (!fs.existsSync(prevBenchmarkPath))
13
+ return null;
14
+ try {
15
+ const benchmark = JSON.parse(fs.readFileSync(prevBenchmarkPath, 'utf-8'));
16
+ const gradings = new Map();
17
+ const evalDirs = fs.readdirSync(prevDir).filter(d => d.startsWith('eval-'));
18
+ for (const evalDir of evalDirs) {
19
+ const wsPath = path.join(prevDir, evalDir, 'with_skill', 'grading.json');
20
+ const wosPath = path.join(prevDir, evalDir, 'without_skill', 'grading.json');
21
+ const ws = fs.existsSync(wsPath) ? JSON.parse(fs.readFileSync(wsPath, 'utf-8')) : undefined;
22
+ const wos = fs.existsSync(wosPath) ? JSON.parse(fs.readFileSync(wosPath, 'utf-8')) : undefined;
23
+ gradings.set(evalDir, { withSkill: ws, withoutSkill: wos });
24
+ }
25
+ return { benchmark, gradings };
26
+ }
27
+ catch {
28
+ return null;
29
+ }
30
+ }
31
+ function evalLabel(run) {
32
+ // Use expected_output or slug as a readable label instead of truncated prompt
33
+ if (run.slug && run.slug !== `${run.evalId}`)
34
+ return run.slug;
35
+ // Truncate prompt but show first meaningful line
36
+ const firstLine = run.prompt.split('\n')[0].slice(0, 60);
37
+ return firstLine;
38
+ }
2
39
  export class TerminalReporter {
3
40
  name = 'terminal';
4
41
  async report(results) {
5
42
  const { skillName, evalRuns, benchmark } = results;
6
43
  console.log(chalk.bold(`\nsnapeval — ${skillName}`));
7
- console.log(chalk.dim('─'.repeat(50)));
44
+ console.log(chalk.dim(`Baseline = without SKILL.md (raw AI response)`));
45
+ console.log(chalk.dim('─'.repeat(60)));
46
+ const prev = loadPreviousIteration(results.iterationDir);
8
47
  for (const run of evalRuns) {
9
- const wsRate = run.withSkill.grading?.summary.pass_rate;
48
+ const wsGrading = run.withSkill.grading;
49
+ const wsRate = wsGrading?.summary.pass_rate;
10
50
  const wosRate = run.withoutSkill.grading?.summary.pass_rate;
11
51
  const wsLabel = wsRate !== undefined ? `${(wsRate * 100).toFixed(0)}%` : 'n/a';
12
52
  const wosLabel = wosRate !== undefined ? `${(wosRate * 100).toFixed(0)}%` : 'n/a';
13
- const tokens = run.withSkill.output.total_tokens;
14
- const durationS = (run.withSkill.output.duration_ms / 1000).toFixed(2);
15
- console.log(` ${chalk.cyan(`#${run.evalId}`)} ${run.prompt.slice(0, 60)}`);
16
- console.log(` with_skill: ${wsLabel} | without_skill: ${wosLabel} | ${tokens} tokens, ${durationS}s`);
53
+ const wsColor = wsRate === 1 ? chalk.green : wsRate === 0 ? chalk.red : chalk.yellow;
54
+ const durationS = (run.withSkill.output.duration_ms / 1000).toFixed(1);
55
+ // Show per-eval delta from previous iteration
56
+ let perEvalDelta = '';
57
+ if (prev) {
58
+ const prevGrading = prev.gradings.get(`eval-${run.slug}`);
59
+ const prevRate = prevGrading?.withSkill?.summary.pass_rate;
60
+ if (prevRate !== undefined && wsRate !== undefined) {
61
+ const change = wsRate - prevRate;
62
+ if (change !== 0) {
63
+ const arrow = change > 0 ? chalk.green('↑') : chalk.red('↓');
64
+ perEvalDelta = ` ${arrow} was ${(prevRate * 100).toFixed(0)}%`;
65
+ }
66
+ }
67
+ }
68
+ console.log(` ${chalk.cyan(`#${run.evalId}`)} ${evalLabel(run)}`);
69
+ console.log(` Skill: ${wsColor(wsLabel)}${perEvalDelta} | Baseline: ${wosLabel} | ${durationS}s`);
70
+ // Show failed assertions inline
71
+ if (wsGrading) {
72
+ const failed = wsGrading.assertion_results.filter((a) => !a.passed);
73
+ for (const f of failed) {
74
+ console.log(chalk.red(` FAIL: ${f.text}`));
75
+ if (f.evidence) {
76
+ console.log(chalk.dim(` ${f.evidence.slice(0, 100)}`));
77
+ }
78
+ }
79
+ }
17
80
  }
18
- console.log(chalk.dim('─'.repeat(50)));
81
+ console.log(chalk.dim('─'.repeat(60)));
82
+ const ws = benchmark.run_summary.with_skill;
83
+ const wos = benchmark.run_summary.without_skill;
19
84
  const delta = benchmark.run_summary.delta;
20
85
  const deltaColor = delta.pass_rate > 0 ? chalk.green : delta.pass_rate < 0 ? chalk.red : chalk.dim;
21
- console.log(`Delta: ${deltaColor(`${(delta.pass_rate * 100).toFixed(1)}% pass rate`)} | ${delta.time_seconds.toFixed(1)}s time | ${delta.tokens.toFixed(0)} tokens`);
22
- console.log(chalk.dim(`with_skill avg: ${(benchmark.run_summary.with_skill.pass_rate.mean * 100).toFixed(1)}% | without_skill avg: ${(benchmark.run_summary.without_skill.pass_rate.mean * 100).toFixed(1)}%`));
86
+ console.log(chalk.bold('Summary:'));
87
+ console.log(` Skill pass rate: ${(ws.pass_rate.mean * 100).toFixed(1)}%`);
88
+ console.log(` Baseline pass rate: ${(wos.pass_rate.mean * 100).toFixed(1)}%`);
89
+ console.log(` Improvement: ${deltaColor(`${delta.pass_rate > 0 ? '+' : ''}${(delta.pass_rate * 100).toFixed(1)}%`)}`);
90
+ if (prev) {
91
+ const prevRate = prev.benchmark.run_summary.with_skill.pass_rate.mean;
92
+ const currRate = ws.pass_rate.mean;
93
+ const change = currRate - prevRate;
94
+ const changeColor = change > 0 ? chalk.green : change < 0 ? chalk.red : chalk.dim;
95
+ console.log(` vs previous: ${changeColor(`${change > 0 ? '+' : ''}${(change * 100).toFixed(1)}%`)} (was ${(prevRate * 100).toFixed(1)}%)`);
96
+ // Note if eval set size changed
97
+ const prevEvalCount = prev.gradings.size;
98
+ const currEvalCount = evalRuns.length;
99
+ if (prevEvalCount !== currEvalCount) {
100
+ console.log(chalk.dim(` Note: eval set changed (${prevEvalCount} → ${currEvalCount} evals)`));
101
+ }
102
+ }
23
103
  }
24
104
  }
25
105
  //# sourceMappingURL=terminal.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"terminal.js","sourceRoot":"","sources":["../../../../src/adapters/report/terminal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,UAAU,CAAC;IAE3B,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC/B,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC;QAEnD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,gBAAgB,SAAS,EAAE,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC;YACxD,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC;YAC5D,MAAM,OAAO,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;YAC/E,MAAM,QAAQ,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;YAClF,MAAM,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,YAAY,CAAC;YACjD,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACvE,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;YAC5E,OAAO,CAAC,GAAG,CAAC,mBAAmB,OAAO,qBAAqB,QAAQ,MAAM,MAAM,YAAY,SAAS,GAAG,CAAC,CAAC;QAC3G,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,KAAK,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;QACnG,OAAO,CAAC,GAAG,CAAC,UAAU,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACrK,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,mBAAmB,CAAC,SAAS,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC,SAAS,CAAC,WAAW,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAClN,CAAC;CACF"}
1
+ {"version":3,"file":"terminal.js","sourceRoot":"","sources":["../../../../src/adapters/report/terminal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,MAAM,OAAO,CAAC;AAQ1B,SAAS,qBAAqB,CAAC,YAAoB;IACjD,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;IAChD,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;IACvE,IAAI,KAAK,CAAC,UAAU,CAAC,IAAI,UAAU,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,aAAa,UAAU,GAAG,CAAC,EAAE,CAAC,CAAC;IACvE,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IAC/D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC;QAAE,OAAO,IAAI,CAAC;IACnD,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC,CAAC;QAC1E,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAuE,CAAC;QAChG,MAAM,QAAQ,GAAG,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5E,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,cAAc,CAAC,CAAC;YACzE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,cAAc,CAAC,CAAC;YAC7E,MAAM,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5F,MAAM,GAAG,GAAG,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC/F,QAAQ,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9D,CAAC;QACD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,GAAqD;IACtE,8EAA8E;IAC9E,IAAI,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,KAAK,GAAG,GAAG,CAAC,MAAM,EAAE;QAAE,OAAO,GAAG,CAAC,IAAI,CAAC;IAC9D,iDAAiD;IACjD,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACzD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,UAAU,CAAC;IAE3B,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC/B,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC;QAEnD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,gBAAgB,SAAS,EAAE,CAAC,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC,CAAC;QACxE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC;YACxC,MAAM,MAAM,GAAG,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC;YAC5C,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC;YAC5D,MAAM,OAAO,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;YAC/E,MAAM,QAAQ,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;YAClF,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;YACrF,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEvE,8CAA8C;YAC9C,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC1D,MAAM,QAAQ,GAAG,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC;gBAC3D,IAAI,QAAQ,KAAK,SAAS,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;oBACnD,MAAM,MAAM,GAAG,MAAM,GAAG,QAAQ,CAAC;oBACjC,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;wBACjB,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;wBAC7D,YAAY,GAAG,IAAI,KAAK,QAAQ,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;oBACjE,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACnE,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,OAAO,CAAC,GAAG,YAAY,gBAAgB,QAAQ,MAAM,SAAS,GAAG,CAAC,CAAC;YAErG,gCAAgC;YAChC,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,SAAS,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBACpE,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;oBACvB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;oBAC9C,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;wBACf,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;oBAClE,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEvC,MAAM,EAAE,GAAG,SAAS,CAAC,WAAW,CAAC,UAAU,CAAC;QAC5C,MAAM,GAAG,GAAG,SAAS,CAAC,WAAW,CAAC,aAAa,CAAC;QAChD,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,KAAK,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;QAEnG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC9E,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/E,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,CAAC,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAE9H,IAAI,IAAI,EAAE,CAAC;YACT,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC;YACtE,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC;YACnC,MAAM,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;YACnC,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;YAClF,OAAO,CAAC,GAAG,CAAC,yBAAyB,WAAW,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAEnJ,gCAAgC;YAChC,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;YACzC,MAAM,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAC;YACtC,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;gBACpC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,aAAa,MAAM,aAAa,SAAS,CAAC,CAAC,CAAC;YACjG,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
@@ -3,4 +3,7 @@ export declare function evalCommand(skillPath: string, harness: Harness, inferen
3
3
  workspace?: string;
4
4
  runs?: number;
5
5
  oldSkill?: string;
6
+ concurrency?: number;
7
+ only?: number[];
8
+ threshold?: number;
6
9
  }): Promise<EvalResults>;