snapeval 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +2 -13
  2. package/bin/snapeval.ts +7 -44
  3. package/dist/bin/snapeval.d.ts +1 -1
  4. package/dist/bin/snapeval.js +6 -42
  5. package/dist/bin/snapeval.js.map +1 -1
  6. package/dist/src/adapters/copilot-sdk-client.d.ts +0 -4
  7. package/dist/src/adapters/copilot-sdk-client.js +2 -23
  8. package/dist/src/adapters/copilot-sdk-client.js.map +1 -1
  9. package/dist/src/adapters/harness/copilot-cli.js +1 -0
  10. package/dist/src/adapters/harness/copilot-cli.js.map +1 -1
  11. package/dist/src/adapters/harness/copilot-sdk.js +6 -16
  12. package/dist/src/adapters/harness/copilot-sdk.js.map +1 -1
  13. package/dist/src/adapters/harness/resolve.js +1 -5
  14. package/dist/src/adapters/harness/resolve.js.map +1 -1
  15. package/dist/src/adapters/inference/copilot-sdk.d.ts +1 -1
  16. package/dist/src/adapters/inference/copilot-sdk.js +4 -2
  17. package/dist/src/adapters/inference/copilot-sdk.js.map +1 -1
  18. package/dist/src/adapters/inference/github-models.js +3 -0
  19. package/dist/src/adapters/inference/github-models.js.map +1 -1
  20. package/dist/src/adapters/inference/resolve.js +6 -32
  21. package/dist/src/adapters/inference/resolve.js.map +1 -1
  22. package/dist/src/commands/eval.d.ts +1 -0
  23. package/dist/src/commands/eval.js +8 -0
  24. package/dist/src/commands/eval.js.map +1 -1
  25. package/dist/src/errors.d.ts +0 -6
  26. package/dist/src/errors.js +1 -13
  27. package/dist/src/errors.js.map +1 -1
  28. package/package.json +8 -11
  29. package/plugin.json +4 -2
  30. package/skills/create-evals/SKILL.md +152 -0
  31. package/skills/run-evals/SKILL.md +132 -0
  32. package/src/adapters/copilot-sdk-client.ts +2 -22
  33. package/src/adapters/harness/copilot-cli.ts +1 -0
  34. package/src/adapters/harness/copilot-sdk.ts +6 -17
  35. package/src/adapters/harness/resolve.ts +1 -8
  36. package/src/adapters/inference/copilot-sdk.ts +4 -2
  37. package/src/adapters/inference/github-models.ts +3 -0
  38. package/src/adapters/inference/resolve.ts +8 -43
  39. package/src/commands/eval.ts +14 -1
  40. package/src/errors.ts +1 -15
  41. package/assets/ideation-viewer.html +0 -469
  42. package/dist/src/adapters/inference/copilot.d.ts +0 -5
  43. package/dist/src/adapters/inference/copilot.js +0 -10
  44. package/dist/src/adapters/inference/copilot.js.map +0 -1
  45. package/dist/src/commands/review.d.ts +0 -8
  46. package/dist/src/commands/review.js +0 -32
  47. package/dist/src/commands/review.js.map +0 -1
  48. package/src/adapters/inference/copilot.ts +0 -12
  49. package/src/commands/review.ts +0 -46
@@ -1,19 +1,7 @@
1
- import { execFileSync } from 'node:child_process';
2
1
  import type { InferenceAdapter } from '../../types.js';
3
2
  import { AdapterNotAvailableError } from '../../errors.js';
4
3
  import { GitHubModelsInference } from './github-models.js';
5
- import { CopilotInference } from './copilot.js';
6
4
  import { CopilotSDKInference } from './copilot-sdk.js';
7
- import { isSDKInstalled } from '../copilot-sdk-client.js';
8
-
9
- function isCopilotAvailable(): boolean {
10
- try {
11
- execFileSync('copilot', ['--version'], { encoding: 'utf-8', stdio: 'pipe' });
12
- return true;
13
- } catch {
14
- return false;
15
- }
16
- }
17
5
 
18
6
  function isGitHubTokenAvailable(): boolean {
19
7
  return Boolean(process.env.GITHUB_TOKEN);
@@ -21,31 +9,18 @@ function isGitHubTokenAvailable(): boolean {
21
9
 
22
10
  export function resolveInference(preference: string): InferenceAdapter {
23
11
  if (preference === 'auto') {
24
- const copilotAvailable = isCopilotAvailable();
25
- const tokenAvailable = isGitHubTokenAvailable();
26
-
27
- if (copilotAvailable) {
28
- return new CopilotInference();
29
- }
30
-
31
- if (tokenAvailable) {
32
- return new GitHubModelsInference();
33
- }
12
+ return new CopilotSDKInference();
13
+ }
34
14
 
15
+ if (preference === 'copilot') {
35
16
  throw new AdapterNotAvailableError(
36
- 'inference',
37
- 'No inference adapter available. Install GitHub Copilot CLI (`npm install -g @github/copilot`) or set GITHUB_TOKEN.'
17
+ 'copilot',
18
+ 'The copilot CLI inference adapter has been removed. Use --inference copilot-sdk instead.'
38
19
  );
39
20
  }
40
21
 
41
- if (preference === 'copilot') {
42
- if (!isCopilotAvailable()) {
43
- throw new AdapterNotAvailableError(
44
- 'copilot',
45
- 'GitHub Copilot CLI is not available. Install with: npm install -g @github/copilot'
46
- );
47
- }
48
- return new CopilotInference();
22
+ if (preference === 'copilot-sdk') {
23
+ return new CopilotSDKInference();
49
24
  }
50
25
 
51
26
  if (preference === 'github-models') {
@@ -58,18 +33,8 @@ export function resolveInference(preference: string): InferenceAdapter {
58
33
  return new GitHubModelsInference();
59
34
  }
60
35
 
61
- if (preference === 'copilot-sdk') {
62
- if (!isSDKInstalled()) {
63
- throw new AdapterNotAvailableError(
64
- 'copilot-sdk',
65
- '@github/copilot-sdk is not installed. Install with: npm install @github/copilot-sdk'
66
- );
67
- }
68
- return new CopilotSDKInference();
69
- }
70
-
71
36
  throw new AdapterNotAvailableError(
72
37
  preference,
73
- `Unknown inference adapter "${preference}". Valid options: auto, copilot, copilot-sdk, github-models.`
38
+ `Unknown inference adapter "${preference}". Valid options: auto, copilot-sdk, github-models.`
74
39
  );
75
40
  }
@@ -7,6 +7,7 @@ import type {
7
7
  EvalResults,
8
8
  EvalRunResult,
9
9
  GradingResult,
10
+ FeedbackData,
10
11
  } from '../types.js';
11
12
  import { WorkspaceManager } from '../engine/workspace.js';
12
13
  import { runEval } from '../engine/runner.js';
@@ -86,7 +87,7 @@ export async function evalCommand(
86
87
  skillPath: string,
87
88
  harness: Harness,
88
89
  inference: InferenceAdapter,
89
- options: { workspace?: string; runs?: number; oldSkill?: string; concurrency?: number; only?: number[]; threshold?: number }
90
+ options: { workspace?: string; runs?: number; oldSkill?: string; concurrency?: number; only?: number[]; threshold?: number; feedback?: boolean }
90
91
  ): Promise<EvalResults> {
91
92
  const evalsPath = path.join(skillPath, 'evals', 'evals.json');
92
93
  if (!fs.existsSync(evalsPath)) {
@@ -224,6 +225,18 @@ export async function evalCommand(
224
225
  typeof value === 'number' ? Math.round(value * 10000) / 10000 : value, 2)
225
226
  );
226
227
 
228
+ // Write feedback template if requested
229
+ if (options.feedback) {
230
+ const feedback: FeedbackData = {};
231
+ for (const run of evalRuns) {
232
+ feedback[`eval-${run.slug}`] = '';
233
+ }
234
+ fs.writeFileSync(
235
+ path.join(iterationDir, 'feedback.json'),
236
+ JSON.stringify(feedback, null, 2)
237
+ );
238
+ }
239
+
227
240
  // Check threshold if set (for CI gating)
228
241
  if (options.threshold !== undefined) {
229
242
  const passRate = benchmark.run_summary.with_skill.pass_rate.mean;
package/src/errors.ts CHANGED
@@ -35,21 +35,7 @@ export class AdapterNotAvailableError extends SnapevalError {
35
35
 
36
36
  export class RateLimitError extends SnapevalError {
37
37
  constructor(adapterName: string) {
38
- super(`${adapterName} rate limit exceeded. Try again later or use a different adapter.`);
38
+ super(`${adapterName} rate limit exceeded. Try again later or use a different adapter.`, 4);
39
39
  this.name = 'RateLimitError';
40
40
  }
41
41
  }
42
-
43
- export class TimeoutError extends SnapevalError {
44
- constructor(evalId: number, timeoutMs: number) {
45
- super(`Eval ${evalId} timed out after ${timeoutMs}ms.`, 4);
46
- this.name = 'TimeoutError';
47
- }
48
- }
49
-
50
- export class GradingError extends SnapevalError {
51
- constructor(evalId: number, detail: string) {
52
- super(`Grading failed for eval ${evalId}: ${detail}`, 4);
53
- this.name = 'GradingError';
54
- }
55
- }
@@ -1,469 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>snapeval — Scenario Ideation</title>
7
- <style>
8
- /* === Reset & Base === */
9
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
10
- body {
11
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
12
- background: #faf9f6;
13
- color: #1a1a1a;
14
- line-height: 1.6;
15
- padding: 2rem;
16
- max-width: 1200px;
17
- margin: 0 auto;
18
- }
19
- code, pre, .mono { font-family: 'SF Mono', 'Fira Code', 'Cascadia Code', monospace; }
20
-
21
- /* === Colors (shared with eval viewer) === */
22
- :root {
23
- --accent: #2563eb;
24
- --accent-light: #dbeafe;
25
- --pass: #16a34a;
26
- --pass-bg: #dcfce7;
27
- --fail: #dc2626;
28
- --fail-bg: #fee2e2;
29
- --warn: #ca8a04;
30
- --warn-bg: #fef9c3;
31
- --gray: #6b7280;
32
- --gray-light: #f3f4f6;
33
- --gray-border: #e5e7eb;
34
- --bg: #faf9f6;
35
- --card-bg: #ffffff;
36
- }
37
-
38
- /* === Layout === */
39
- header { margin-bottom: 2rem; }
40
- header h1 { font-size: 1.5rem; font-weight: 600; }
41
- header .subtitle { color: var(--gray); font-size: 0.875rem; margin-top: 0.25rem; }
42
- .stats { display: flex; gap: 1.5rem; margin-top: 1rem; flex-wrap: wrap; }
43
- .stat { background: var(--card-bg); border: 1px solid var(--gray-border); border-radius: 8px; padding: 0.75rem 1rem; }
44
- .stat-value { font-size: 1.25rem; font-weight: 600; }
45
- .stat-label { font-size: 0.75rem; color: var(--gray); text-transform: uppercase; letter-spacing: 0.05em; }
46
-
47
- /* === Sections === */
48
- section { margin-bottom: 2rem; }
49
- section h2 { font-size: 1.125rem; font-weight: 600; margin-bottom: 1rem; padding-bottom: 0.5rem; border-bottom: 1px solid var(--gray-border); }
50
- section h3 { font-size: 0.875rem; font-weight: 600; color: var(--gray); text-transform: uppercase; letter-spacing: 0.05em; margin-bottom: 0.75rem; }
51
-
52
- /* === Cards === */
53
- .card-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 1rem; }
54
- .card {
55
- background: var(--card-bg);
56
- border: 1px solid var(--gray-border);
57
- border-radius: 8px;
58
- padding: 1rem;
59
- transition: border-color 0.15s;
60
- }
61
- .card:hover { border-color: var(--accent); }
62
- .card-title { font-weight: 600; font-size: 0.9rem; margin-bottom: 0.5rem; }
63
- .card-desc { font-size: 0.85rem; color: var(--gray); }
64
-
65
- /* === Ambiguities === */
66
- .ambiguity-card { border-left: 3px solid var(--warn); }
67
- .ambiguity-why { font-size: 0.8rem; color: var(--gray); margin: 0.5rem 0; font-style: italic; }
68
- .scope-toggle { display: flex; gap: 0.5rem; margin-top: 0.75rem; }
69
- .scope-btn {
70
- padding: 0.3rem 0.75rem;
71
- border: 1px solid var(--gray-border);
72
- border-radius: 4px;
73
- background: var(--card-bg);
74
- cursor: pointer;
75
- font-size: 0.8rem;
76
- transition: all 0.15s;
77
- }
78
- .scope-btn:hover { border-color: var(--accent); }
79
- .scope-btn.active-in { background: var(--pass-bg); border-color: var(--pass); color: var(--pass); }
80
- .scope-btn.active-out { background: var(--gray-light); border-color: var(--gray); color: var(--gray); }
81
-
82
- /* === Scenario Cards === */
83
- .scenario-card { position: relative; }
84
- .scenario-card.disabled { opacity: 0.5; }
85
- .scenario-toggle {
86
- position: absolute;
87
- top: 1rem;
88
- right: 1rem;
89
- width: 40px;
90
- height: 22px;
91
- background: var(--pass);
92
- border-radius: 11px;
93
- cursor: pointer;
94
- border: none;
95
- transition: background 0.2s;
96
- }
97
- .scenario-toggle.off { background: var(--gray); }
98
- .scenario-toggle::after {
99
- content: '';
100
- position: absolute;
101
- top: 2px;
102
- left: 2px;
103
- width: 18px;
104
- height: 18px;
105
- background: white;
106
- border-radius: 50%;
107
- transition: transform 0.2s;
108
- }
109
- .scenario-toggle.off::after { transform: translateX(0); }
110
- .scenario-toggle:not(.off)::after { transform: translateX(18px); }
111
-
112
- .scenario-prompt {
113
- background: var(--gray-light);
114
- border-radius: 4px;
115
- padding: 0.75rem;
116
- font-family: monospace;
117
- font-size: 0.85rem;
118
- margin: 0.75rem 0;
119
- white-space: pre-wrap;
120
- word-break: break-word;
121
- }
122
- .scenario-why { font-size: 0.8rem; color: var(--gray); margin-bottom: 0.5rem; }
123
- .scenario-expected { font-size: 0.85rem; margin-top: 0.5rem; }
124
- .scenario-expected strong { font-weight: 600; }
125
-
126
- .editable {
127
- border: 1px solid transparent;
128
- border-radius: 4px;
129
- padding: 0.25rem;
130
- transition: border-color 0.15s;
131
- cursor: text;
132
- }
133
- .editable:hover { border-color: var(--gray-border); }
134
- .editable:focus { border-color: var(--accent); outline: none; background: var(--accent-light); }
135
-
136
- /* === Add Scenario === */
137
- .add-form {
138
- background: var(--card-bg);
139
- border: 2px dashed var(--gray-border);
140
- border-radius: 8px;
141
- padding: 1.25rem;
142
- margin-top: 1rem;
143
- }
144
- .add-form label { display: block; font-size: 0.85rem; font-weight: 600; margin-bottom: 0.25rem; margin-top: 0.75rem; }
145
- .add-form label:first-child { margin-top: 0; }
146
- .add-form textarea, .add-form input[type="text"] {
147
- width: 100%;
148
- padding: 0.5rem;
149
- border: 1px solid var(--gray-border);
150
- border-radius: 4px;
151
- font-family: inherit;
152
- font-size: 0.85rem;
153
- resize: vertical;
154
- }
155
- .add-form textarea:focus, .add-form input[type="text"]:focus { border-color: var(--accent); outline: none; }
156
-
157
- /* === Notes === */
158
- #user-notes {
159
- width: 100%;
160
- min-height: 80px;
161
- padding: 0.75rem;
162
- border: 1px solid var(--gray-border);
163
- border-radius: 8px;
164
- font-family: inherit;
165
- font-size: 0.85rem;
166
- resize: vertical;
167
- }
168
- #user-notes:focus { border-color: var(--accent); outline: none; }
169
-
170
- /* === Buttons === */
171
- .btn {
172
- display: inline-flex;
173
- align-items: center;
174
- gap: 0.5rem;
175
- padding: 0.6rem 1.25rem;
176
- border: none;
177
- border-radius: 6px;
178
- font-size: 0.9rem;
179
- font-weight: 500;
180
- cursor: pointer;
181
- transition: all 0.15s;
182
- }
183
- .btn-primary { background: var(--accent); color: white; }
184
- .btn-primary:hover { background: #1d4ed8; }
185
- .btn-secondary { background: var(--gray-light); color: var(--gray); border: 1px solid var(--gray-border); }
186
- .btn-secondary:hover { background: var(--gray-border); }
187
- .btn-add { background: var(--pass-bg); color: var(--pass); border: 1px solid var(--pass); }
188
- .btn-add:hover { background: var(--pass); color: white; }
189
-
190
- .actions { display: flex; gap: 1rem; margin-top: 2rem; padding-top: 1.5rem; border-top: 2px solid var(--gray-border); }
191
- .actions .spacer { flex: 1; }
192
- </style>
193
- </head>
194
- <body>
195
-
196
- <script>
197
- const DATA = __ANALYSIS_DATA_PLACEHOLDER__;
198
-
199
- // State
200
- const state = {
201
- scenarios: DATA.scenarios.map(s => ({ ...s })),
202
- ambiguityDecisions: DATA.ambiguities.map(a => ({ description: a.description, decision: a.in_scope === true ? 'in_scope' : a.in_scope === false ? 'out_of_scope' : null })),
203
- customScenarios: [],
204
- userNotes: '',
205
- };
206
-
207
- function render() {
208
- document.getElementById('app').innerHTML = `
209
- ${renderHeader()}
210
- ${renderSkillMap()}
211
- ${renderAmbiguities()}
212
- ${renderScenarios()}
213
- ${renderAddForm()}
214
- ${renderNotes()}
215
- ${renderActions()}
216
- `;
217
- bindEvents();
218
- }
219
-
220
- function renderHeader() {
221
- const enabled = state.scenarios.filter(s => s.enabled).length;
222
- return `
223
- <header>
224
- <h1>snapeval — ${DATA.skill_name}</h1>
225
- <div class="subtitle">Interactive Scenario Ideation</div>
226
- <div class="stats">
227
- <div class="stat"><div class="stat-value">${DATA.behaviors.length}</div><div class="stat-label">Behaviors</div></div>
228
- <div class="stat"><div class="stat-value">${DATA.dimensions.length}</div><div class="stat-label">Dimensions</div></div>
229
- <div class="stat"><div class="stat-value">${enabled} / ${state.scenarios.length + state.customScenarios.length}</div><div class="stat-label">Scenarios</div></div>
230
- <div class="stat"><div class="stat-value">${DATA.ambiguities.length}</div><div class="stat-label">Ambiguities</div></div>
231
- </div>
232
- </header>
233
- `;
234
- }
235
-
236
- function renderSkillMap() {
237
- const behaviorCards = DATA.behaviors.map(b => `
238
- <div class="card">
239
- <div class="card-title">${esc(b.name)}</div>
240
- <div class="card-desc">${esc(b.description)}</div>
241
- </div>
242
- `).join('');
243
-
244
- const dimensionCards = DATA.dimensions.map(d => `
245
- <div class="card">
246
- <div class="card-title">${esc(d.name)}</div>
247
- <div class="card-desc">${d.values.map(v => esc(v)).join(', ')}</div>
248
- </div>
249
- `).join('');
250
-
251
- return `
252
- <section>
253
- <h2>Skill Map</h2>
254
- <h3>Behaviors</h3>
255
- <div class="card-grid">${behaviorCards}</div>
256
- <h3 style="margin-top:1.5rem">Input Dimensions</h3>
257
- <div class="card-grid">${dimensionCards}</div>
258
- </section>
259
- `;
260
- }
261
-
262
- function renderAmbiguities() {
263
- if (DATA.ambiguities.length === 0) return '';
264
- const cards = DATA.ambiguities.map((a, i) => {
265
- const decision = state.ambiguityDecisions[i]?.decision;
266
- return `
267
- <div class="card ambiguity-card">
268
- <div class="card-title">${esc(a.description)}</div>
269
- <div class="ambiguity-why">${esc(a.why_it_matters)}</div>
270
- <div class="scope-toggle">
271
- <button class="scope-btn ${decision === 'in_scope' ? 'active-in' : ''}" data-amb-idx="${i}" data-decision="in_scope">In Scope</button>
272
- <button class="scope-btn ${decision === 'out_of_scope' ? 'active-out' : ''}" data-amb-idx="${i}" data-decision="out_of_scope">Out of Scope</button>
273
- </div>
274
- </div>
275
- `;
276
- }).join('');
277
-
278
- return `
279
- <section>
280
- <h2>Gaps & Ambiguities</h2>
281
- <div class="card-grid">${cards}</div>
282
- </section>
283
- `;
284
- }
285
-
286
- function renderScenarios() {
287
- const cards = state.scenarios.map((s, i) => `
288
- <div class="card scenario-card ${s.enabled ? '' : 'disabled'}">
289
- <button class="scenario-toggle ${s.enabled ? '' : 'off'}" data-scenario-idx="${i}" title="${s.enabled ? 'Enabled' : 'Disabled'}"></button>
290
- <div class="card-title">Scenario ${s.id}</div>
291
- <div class="scenario-why">${esc(s.why)}</div>
292
- <div class="scenario-prompt editable" contenteditable="true" data-field="prompt" data-scenario-idx="${i}">${esc(s.prompt)}</div>
293
- <div class="scenario-expected"><strong>Expected:</strong> <span class="editable" contenteditable="true" data-field="expected_behavior" data-scenario-idx="${i}">${esc(s.expected_behavior)}</span></div>
294
- </div>
295
- `).join('');
296
-
297
- const customCards = state.customScenarios.map((s, i) => `
298
- <div class="card scenario-card" style="border-color:var(--pass)">
299
- <div class="card-title" style="color:var(--pass)">Custom #${i + 1} <button class="btn-secondary" style="font-size:0.7rem;padding:0.15rem 0.4rem;margin-left:0.5rem" data-remove-custom="${i}">Remove</button></div>
300
- <div class="scenario-prompt">${esc(s.prompt)}</div>
301
- <div class="scenario-expected"><strong>Expected:</strong> ${esc(s.expected_behavior)}</div>
302
- </div>
303
- `).join('');
304
-
305
- return `
306
- <section>
307
- <h2>Proposed Scenarios</h2>
308
- <div class="card-grid">${cards}${customCards}</div>
309
- </section>
310
- `;
311
- }
312
-
313
- function renderAddForm() {
314
- return `
315
- <section>
316
- <h2>Add Custom Scenario</h2>
317
- <div class="add-form">
318
- <label for="custom-prompt">User Prompt</label>
319
- <textarea id="custom-prompt" rows="3" placeholder="Type a realistic user prompt..."></textarea>
320
- <label for="custom-expected">Expected Behavior</label>
321
- <input type="text" id="custom-expected" placeholder="What should happen?" />
322
- <div style="margin-top:0.75rem">
323
- <button class="btn btn-add" id="add-scenario-btn">Add Scenario</button>
324
- </div>
325
- </div>
326
- </section>
327
- `;
328
- }
329
-
330
- function renderNotes() {
331
- return `
332
- <section>
333
- <h2>Notes for AI</h2>
334
- <textarea id="user-notes" placeholder="Add any context, constraints, or known issues you want the AI to consider...">${esc(state.userNotes)}</textarea>
335
- </section>
336
- `;
337
- }
338
-
339
- function renderActions() {
340
- const enabledCount = state.scenarios.filter(s => s.enabled).length + state.customScenarios.length;
341
- return `
342
- <div class="actions">
343
- <span style="color:var(--gray);font-size:0.85rem;align-self:center">${enabledCount} scenario${enabledCount !== 1 ? 's' : ''} will be exported</span>
344
- <span class="spacer"></span>
345
- <button class="btn btn-primary" id="confirm-btn">Confirm & Run</button>
346
- </div>
347
- `;
348
- }
349
-
350
- function bindEvents() {
351
- // Scenario toggles
352
- document.querySelectorAll('.scenario-toggle').forEach(btn => {
353
- btn.addEventListener('click', () => {
354
- const idx = parseInt(btn.dataset.scenarioIdx);
355
- state.scenarios[idx].enabled = !state.scenarios[idx].enabled;
356
- render();
357
- });
358
- });
359
-
360
- // Ambiguity scope buttons
361
- document.querySelectorAll('.scope-btn').forEach(btn => {
362
- btn.addEventListener('click', () => {
363
- const idx = parseInt(btn.dataset.ambIdx);
364
- const decision = btn.dataset.decision;
365
- const current = state.ambiguityDecisions[idx].decision;
366
- state.ambiguityDecisions[idx].decision = current === decision ? null : decision;
367
- render();
368
- });
369
- });
370
-
371
- // Editable fields (blur saves)
372
- document.querySelectorAll('.editable[data-scenario-idx]').forEach(el => {
373
- el.addEventListener('blur', () => {
374
- const idx = parseInt(el.dataset.scenarioIdx);
375
- const field = el.dataset.field;
376
- state.scenarios[idx][field] = el.textContent.trim();
377
- });
378
- });
379
-
380
- // Remove custom scenario
381
- document.querySelectorAll('[data-remove-custom]').forEach(btn => {
382
- btn.addEventListener('click', () => {
383
- const idx = parseInt(btn.dataset.removeCustom);
384
- state.customScenarios.splice(idx, 1);
385
- render();
386
- });
387
- });
388
-
389
- // Add scenario
390
- const addBtn = document.getElementById('add-scenario-btn');
391
- if (addBtn) {
392
- addBtn.addEventListener('click', () => {
393
- const prompt = document.getElementById('custom-prompt').value.trim();
394
- const expected = document.getElementById('custom-expected').value.trim();
395
- if (!prompt) return;
396
- state.customScenarios.push({ prompt, expected_behavior: expected || 'Not specified' });
397
- render();
398
- });
399
- }
400
-
401
- // Notes
402
- const notes = document.getElementById('user-notes');
403
- if (notes) {
404
- notes.addEventListener('input', () => { state.userNotes = notes.value; });
405
- }
406
-
407
- // Confirm & Run
408
- const confirmBtn = document.getElementById('confirm-btn');
409
- if (confirmBtn) {
410
- confirmBtn.addEventListener('click', exportPlan);
411
- }
412
- }
413
-
414
- function exportPlan() {
415
- const plan = {
416
- version: 1,
417
- confirmed_scenarios: state.scenarios
418
- .filter(s => s.enabled)
419
- .map(s => ({
420
- id: s.id,
421
- prompt: s.prompt,
422
- expected_behavior: s.expected_behavior,
423
- covers: s.covers,
424
- why: s.why,
425
- })),
426
- custom_scenarios: state.customScenarios.map(s => ({
427
- prompt: s.prompt,
428
- expected_behavior: s.expected_behavior,
429
- })),
430
- ambiguity_decisions: state.ambiguityDecisions.filter(a => a.decision !== null),
431
- user_notes: state.userNotes || '',
432
- };
433
-
434
- const blob = new Blob([JSON.stringify(plan, null, 2)], { type: 'application/json' });
435
- const url = URL.createObjectURL(blob);
436
- const a = document.createElement('a');
437
- a.href = url;
438
- a.download = 'scenario_plan.json';
439
- document.body.appendChild(a);
440
- a.click();
441
- document.body.removeChild(a);
442
- URL.revokeObjectURL(url);
443
-
444
- const confirmBtn = document.getElementById('confirm-btn');
445
- if (confirmBtn) {
446
- confirmBtn.textContent = 'Exported! Return to your terminal.';
447
- confirmBtn.disabled = true;
448
- confirmBtn.style.background = 'var(--pass)';
449
- }
450
- }
451
-
452
- function esc(str) {
453
- if (!str) return '';
454
- const div = document.createElement('div');
455
- div.textContent = str;
456
- return div.innerHTML;
457
- }
458
-
459
- // Boot
460
- document.addEventListener('DOMContentLoaded', () => {
461
- const app = document.createElement('div');
462
- app.id = 'app';
463
- document.body.appendChild(app);
464
- render();
465
- });
466
- </script>
467
-
468
- </body>
469
- </html>
@@ -1,5 +0,0 @@
1
- import type { InferenceAdapter, Message, ChatOptions } from '../../types.js';
2
- export declare class CopilotInference implements InferenceAdapter {
3
- readonly name = "copilot";
4
- chat(messages: Message[], _options?: ChatOptions): Promise<string>;
5
- }
@@ -1,10 +0,0 @@
1
- import { execFileSync } from 'node:child_process';
2
- export class CopilotInference {
3
- name = 'copilot';
4
- async chat(messages, _options) {
5
- const prompt = messages.map((m) => m.content).join('\n');
6
- const result = execFileSync('copilot', ['-s', '--no-ask-user', '--model', 'gpt-4.1', '-p', prompt], { encoding: 'utf-8' });
7
- return result.trim();
8
- }
9
- }
10
- //# sourceMappingURL=copilot.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"copilot.js","sourceRoot":"","sources":["../../../../src/adapters/inference/copilot.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGlD,MAAM,OAAO,gBAAgB;IAClB,IAAI,GAAG,SAAS,CAAC;IAE1B,KAAK,CAAC,IAAI,CAAC,QAAmB,EAAE,QAAsB;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,eAAe,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3H,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC;CACF"}
@@ -1,8 +0,0 @@
1
- import type { Harness, InferenceAdapter } from '../types.js';
2
- export declare function reviewCommand(skillPath: string, harness: Harness, inference: InferenceAdapter, options: {
3
- workspace?: string;
4
- runs?: number;
5
- oldSkill?: string;
6
- noOpen?: boolean;
7
- concurrency?: number;
8
- }): Promise<void>;
@@ -1,32 +0,0 @@
1
- import { execFile } from 'node:child_process';
2
- import * as fs from 'node:fs';
3
- import * as path from 'node:path';
4
- import * as process from 'node:process';
5
- import { evalCommand } from './eval.js';
6
- import { TerminalReporter } from '../adapters/report/terminal.js';
7
- export async function reviewCommand(skillPath, harness, inference, options) {
8
- const results = await evalCommand(skillPath, harness, inference, options);
9
- const terminal = new TerminalReporter();
10
- await terminal.report(results);
11
- // feedback.json template
12
- const feedback = {};
13
- for (const run of results.evalRuns) {
14
- feedback[`eval-${run.slug}`] = '';
15
- }
16
- fs.writeFileSync(path.join(results.iterationDir, 'feedback.json'), JSON.stringify(feedback, null, 2));
17
- // Open in browser (placeholder - HTML reporter will be wired later)
18
- if (!options.noOpen) {
19
- const reportPath = path.join(results.iterationDir, 'benchmark.json');
20
- openInBrowser(reportPath);
21
- }
22
- }
23
- function openInBrowser(filePath) {
24
- const cmd = process.platform === 'darwin' ? 'open' :
25
- process.platform === 'win32' ? 'cmd' : 'xdg-open';
26
- const args = process.platform === 'win32' ? ['/c', 'start', '', filePath] : [filePath];
27
- execFile(cmd, args, (err) => {
28
- if (err)
29
- console.warn(`Could not open browser: ${err.message}`);
30
- });
31
- }
32
- //# sourceMappingURL=review.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"review.js","sourceRoot":"","sources":["../../../src/commands/review.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,OAAO,MAAM,cAAc,CAAC;AAExC,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AAElE,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,SAAiB,EACjB,OAAgB,EAChB,SAA2B,EAC3B,OAAyG;IAEzG,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAE1E,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE/B,yBAAyB;IACzB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACnC,QAAQ,CAAC,QAAQ,GAAG,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,CAAC;IACpC,CAAC;IACD,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,eAAe,CAAC,EAChD,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAClC,CAAC;IAEF,oEAAoE;IACpE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;QACrE,aAAa,CAAC,UAAU,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,GAAG,GACP,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACxC,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;IACpD,MAAM,IAAI,GACR,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC5E,QAAQ,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE;QAC1B,IAAI,GAAG;YAAE,OAAO,CAAC,IAAI,CAAC,2BAA2B,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;AACL,CAAC"}