snapeval 2.0.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -104
- package/bin/snapeval.ts +39 -1
- package/dist/bin/snapeval.js +33 -0
- package/dist/bin/snapeval.js.map +1 -1
- package/dist/src/adapters/copilot-sdk-client.js +3 -1
- package/dist/src/adapters/copilot-sdk-client.js.map +1 -1
- package/dist/src/adapters/harness/copilot-sdk.d.ts +11 -0
- package/dist/src/adapters/harness/copilot-sdk.js +101 -0
- package/dist/src/adapters/harness/copilot-sdk.js.map +1 -0
- package/dist/src/adapters/harness/resolve.js +10 -2
- package/dist/src/adapters/harness/resolve.js.map +1 -1
- package/dist/src/adapters/inference/copilot-sdk.js +4 -1
- package/dist/src/adapters/inference/copilot-sdk.js.map +1 -1
- package/dist/src/adapters/report/terminal.js +89 -9
- package/dist/src/adapters/report/terminal.js.map +1 -1
- package/dist/src/commands/eval.d.ts +3 -0
- package/dist/src/commands/eval.js +146 -17
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/commands/review.d.ts +1 -0
- package/dist/src/commands/review.js.map +1 -1
- package/dist/src/config.js +2 -1
- package/dist/src/config.js.map +1 -1
- package/dist/src/engine/grader.js +67 -9
- package/dist/src/engine/grader.js.map +1 -1
- package/dist/src/engine/runner.d.ts +1 -0
- package/dist/src/engine/runner.js +15 -12
- package/dist/src/engine/runner.js.map +1 -1
- package/dist/src/errors.d.ts +6 -0
- package/dist/src/errors.js +21 -3
- package/dist/src/errors.js.map +1 -1
- package/dist/src/types.d.ts +3 -0
- package/package.json +4 -1
- package/plugin.json +1 -1
- package/skills/snapeval/SKILL.md +132 -39
- package/src/adapters/copilot-sdk-client.ts +3 -1
- package/src/adapters/harness/copilot-sdk.ts +126 -0
- package/src/adapters/harness/resolve.ts +13 -2
- package/src/adapters/inference/copilot-sdk.ts +5 -1
- package/src/adapters/report/terminal.ts +99 -10
- package/src/commands/eval.ts +183 -31
- package/src/commands/review.ts +1 -1
- package/src/config.ts +2 -1
- package/src/engine/grader.ts +59 -8
- package/src/engine/runner.ts +16 -13
- package/src/errors.ts +24 -3
- package/src/types.ts +3 -0
package/README.md
CHANGED
|
@@ -1,131 +1,178 @@
|
|
|
1
1
|
# snapeval
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Harness-agnostic eval runner for [agentskills.io](https://agentskills.io) skills.
|
|
4
4
|
|
|
5
5
|
[](https://github.com/matantsach/snapeval/actions/workflows/ci.yml)
|
|
6
6
|
[](https://www.npmjs.com/package/snapeval)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
8
|
|
|
9
|
-
snapeval
|
|
9
|
+
snapeval runs every eval case **with and without** your skill, grades assertions, and computes a benchmark delta — so you can see exactly what value your skill adds.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
```
|
|
12
|
+
snapeval — greeter
|
|
13
|
+
Baseline = without SKILL.md (raw AI response)
|
|
14
|
+
────────────────────────────────────────────────────────────
|
|
15
|
+
#1 formal greeting for Eleanor
|
|
16
|
+
Skill: 100% | Baseline: 33% | 5.2s
|
|
17
|
+
#2 casual greeting for Marcus
|
|
18
|
+
Skill: 100% ↑ was 67% | Baseline: 67% | 2.7s
|
|
19
|
+
#3 pirate greeting for Zoe
|
|
20
|
+
Skill: 100% | Baseline: 67% | 2.5s
|
|
21
|
+
────────────────────────────────────────────────────────────
|
|
22
|
+
Summary:
|
|
23
|
+
Skill pass rate: 100.0%
|
|
24
|
+
Baseline pass rate: 55.6%
|
|
25
|
+
Improvement: +44.4%
|
|
26
|
+
```
|
|
12
27
|
|
|
13
|
-
|
|
14
|
-
- **Zero assertions** — No test logic to write. The AI generates realistic, messy prompts that mirror how real users actually type.
|
|
15
|
-
- **Semantic comparison** — Tiered pipeline: schema check (free) → LLM judge with order-swap debiasing (when needed). Most checks cost $0.
|
|
16
|
-
- **Free inference** — Uses gpt-5-mini via Copilot CLI and GitHub Models API.
|
|
17
|
-
- **Platform-agnostic** — Adapter-based architecture. Copilot CLI first, others coming.
|
|
28
|
+
## How it works
|
|
18
29
|
|
|
19
|
-
|
|
30
|
+
1. You write a `SKILL.md` and an `evals.json` with test cases and assertions
|
|
31
|
+
2. snapeval runs each eval **twice** — once with your skill loaded, once without (baseline)
|
|
32
|
+
3. Assertions are graded by an LLM judge (semantic) and/or shell scripts (deterministic)
|
|
33
|
+
4. A benchmark shows where your skill adds value vs. where the raw AI already handles it
|
|
20
34
|
|
|
21
|
-
|
|
35
|
+
## Quick start
|
|
22
36
|
|
|
23
|
-
|
|
37
|
+
### As a Copilot plugin
|
|
24
38
|
|
|
25
39
|
```bash
|
|
26
|
-
copilot plugin
|
|
27
|
-
copilot plugin install snapeval@snapeval-marketplace
|
|
40
|
+
copilot plugin install matantsach/snapeval
|
|
28
41
|
```
|
|
29
42
|
|
|
30
|
-
|
|
43
|
+
Then in Copilot CLI, just say `evaluate my skill` — the snapeval skill handles the rest.
|
|
44
|
+
|
|
45
|
+
### Standalone CLI
|
|
31
46
|
|
|
32
47
|
```bash
|
|
33
|
-
|
|
48
|
+
git clone https://github.com/matantsach/snapeval.git
|
|
49
|
+
cd snapeval && npm install
|
|
50
|
+
npx tsx bin/snapeval.ts eval <skill-dir>
|
|
34
51
|
```
|
|
35
52
|
|
|
36
|
-
|
|
53
|
+
## Eval format
|
|
37
54
|
|
|
38
|
-
```
|
|
39
|
-
|
|
55
|
+
```
|
|
56
|
+
my-skill/
|
|
57
|
+
├── SKILL.md
|
|
58
|
+
└── evals/
|
|
59
|
+
├── evals.json
|
|
60
|
+
└── scripts/ ← optional deterministic checks
|
|
61
|
+
└── validate.sh
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**evals.json:**
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"skill_name": "greeter",
|
|
69
|
+
"evals": [
|
|
70
|
+
{
|
|
71
|
+
"id": 1,
|
|
72
|
+
"label": "formal greeting for Eleanor",
|
|
73
|
+
"prompt": "Can you give me a formal greeting for Eleanor?",
|
|
74
|
+
"expected_output": "Returns the formal greeting addressed to Eleanor.",
|
|
75
|
+
"assertions": [
|
|
76
|
+
"Output contains the name Eleanor",
|
|
77
|
+
"Output uses a formal tone",
|
|
78
|
+
"script:validate.sh"
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
40
83
|
```
|
|
41
84
|
|
|
42
|
-
|
|
85
|
+
| Field | Required | Description |
|
|
86
|
+
|-------|----------|-------------|
|
|
87
|
+
| `id` | yes | Unique numeric identifier |
|
|
88
|
+
| `prompt` | yes | The user prompt sent to the harness |
|
|
89
|
+
| `expected_output` | yes | Human description of the expected behavior |
|
|
90
|
+
| `label` | no | Human-readable name shown in terminal output |
|
|
91
|
+
| `slug` | no | Filesystem-safe name for the eval directory |
|
|
92
|
+
| `assertions` | no | List of assertions to grade (LLM semantic or `script:` prefixed) |
|
|
93
|
+
| `files` | no | Input files to attach to the prompt |
|
|
43
94
|
|
|
44
|
-
|
|
95
|
+
### Assertions
|
|
96
|
+
|
|
97
|
+
**Semantic** — graded by an LLM. Write specific, verifiable statements:
|
|
45
98
|
|
|
46
99
|
```
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
> check if I broke anything in my-skill
|
|
50
|
-
> approve scenario 3
|
|
100
|
+
"Output contains a YAML block with an 'id' field for each issue"
|
|
101
|
+
"Response declines because the pipeline already has unclaimed issues"
|
|
51
102
|
```
|
|
52
103
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
### What happens when you evaluate
|
|
104
|
+
**Script** — prefix with `script:`. Scripts live in `evals/scripts/`, receive the output directory as `$1`, and pass on exit code 0:
|
|
56
105
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
4. **Capture** — snapeval writes `evals.json` and runs the scenarios against your skill, saving baseline snapshots
|
|
106
|
+
```
|
|
107
|
+
"script:validate-json-structure.sh"
|
|
108
|
+
```
|
|
61
109
|
|
|
62
|
-
|
|
110
|
+
## CLI reference
|
|
63
111
|
|
|
64
|
-
|
|
112
|
+
### `eval`
|
|
65
113
|
|
|
66
|
-
|
|
114
|
+
Run evals, grade assertions, compute benchmark.
|
|
67
115
|
|
|
68
|
-
```
|
|
69
|
-
snapeval
|
|
70
|
-
snapeval capture [skill-dir] Run scenarios and save baseline snapshots
|
|
71
|
-
snapeval check [skill-dir] Compare current output against baselines
|
|
72
|
-
snapeval approve [skill-dir] Approve regressed scenarios as new baselines
|
|
73
|
-
snapeval report [skill-dir] Write results with optional HTML viewer
|
|
74
|
-
snapeval ideate [skill-dir] Open the interactive scenario ideation viewer
|
|
116
|
+
```bash
|
|
117
|
+
npx snapeval eval [skill-dir] [options]
|
|
75
118
|
```
|
|
76
119
|
|
|
77
120
|
| Flag | Description | Default |
|
|
78
121
|
|------|-------------|---------|
|
|
79
|
-
| `--
|
|
80
|
-
| `--inference <name>` | Inference adapter | `auto` |
|
|
81
|
-
| `--
|
|
82
|
-
| `--runs <n>` |
|
|
83
|
-
| `--
|
|
84
|
-
| `--
|
|
85
|
-
| `--
|
|
122
|
+
| `--harness <name>` | Harness adapter | `copilot-sdk` |
|
|
123
|
+
| `--inference <name>` | Inference adapter for grading | `auto` |
|
|
124
|
+
| `--workspace <path>` | Output directory | `../{skill_name}-workspace` |
|
|
125
|
+
| `--runs <n>` | Harness invocations per eval for statistical averaging | `1` |
|
|
126
|
+
| `--concurrency <n>` | Parallel eval cases (1-10) | `1` |
|
|
127
|
+
| `--only <ids>` | Run specific eval IDs (e.g. `--only 1,3,5`) | all |
|
|
128
|
+
| `--threshold <rate>` | Minimum pass rate 0-1 for exit code 0 | none |
|
|
129
|
+
| `--old-skill <path>` | Compare against old skill version | none |
|
|
86
130
|
| `--verbose` | Verbose output | off |
|
|
87
131
|
|
|
88
|
-
|
|
132
|
+
### `review`
|
|
89
133
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
↓
|
|
95
|
-
Schema match? → PASS (free, instant)
|
|
96
|
-
LLM Judge agrees? → PASS/REGRESSED
|
|
134
|
+
Run eval + generate HTML report + open in browser.
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
npx snapeval review [skill-dir] [options]
|
|
97
138
|
```
|
|
98
139
|
|
|
99
|
-
|
|
140
|
+
Same flags as `eval`, plus `--no-open` to skip opening the browser.
|
|
100
141
|
|
|
101
|
-
|
|
102
|
-
|------|--------|------|-----------|
|
|
103
|
-
| 1 | Schema check | Free | Structural skeleton matches |
|
|
104
|
-
| 2 | LLM judge (order-swap) | Cheap | Schema differs, needs semantic comparison |
|
|
142
|
+
### Exit codes
|
|
105
143
|
|
|
106
|
-
|
|
144
|
+
| Code | Meaning |
|
|
145
|
+
|------|---------|
|
|
146
|
+
| 0 | Success |
|
|
147
|
+
| 1 | Threshold not met (eval ran but pass rate below `--threshold`) |
|
|
148
|
+
| 2 | Config/input error (bad JSON, missing fields, invalid flags) |
|
|
149
|
+
| 3 | File not found (missing skill dir, evals.json, or script) |
|
|
150
|
+
| 4 | Runtime error (harness failure, grading failure, timeout) |
|
|
107
151
|
|
|
108
|
-
##
|
|
152
|
+
## Output artifacts
|
|
109
153
|
|
|
110
|
-
|
|
154
|
+
Each run creates an iteration directory:
|
|
111
155
|
|
|
112
156
|
```
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
├──
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
157
|
+
workspace/
|
|
158
|
+
└── iteration-1/
|
|
159
|
+
├── benchmark.json ← aggregate stats with delta
|
|
160
|
+
├── SKILL.md.snapshot ← copy of skill used
|
|
161
|
+
└── eval-{slug}/
|
|
162
|
+
├── with_skill/
|
|
163
|
+
│ ├── outputs/output.txt
|
|
164
|
+
│ ├── timing.json
|
|
165
|
+
│ ├── grading.json
|
|
166
|
+
│ └── transcript.log
|
|
167
|
+
└── without_skill/
|
|
168
|
+
├── outputs/output.txt
|
|
122
169
|
├── timing.json
|
|
123
|
-
└──
|
|
170
|
+
└── grading.json
|
|
124
171
|
```
|
|
125
172
|
|
|
126
|
-
|
|
173
|
+
**benchmark.json** includes metadata: `eval_count`, `eval_ids`, `skill_name`, `runs_per_eval`, `timestamp`.
|
|
127
174
|
|
|
128
|
-
|
|
175
|
+
## CI integration
|
|
129
176
|
|
|
130
177
|
```yaml
|
|
131
178
|
name: Skill Evaluation
|
|
@@ -140,22 +187,10 @@ jobs:
|
|
|
140
187
|
with:
|
|
141
188
|
node-version: 22
|
|
142
189
|
- run: npm ci
|
|
143
|
-
- run: npx snapeval
|
|
190
|
+
- run: npx snapeval eval skills/my-skill --threshold 0.8 --runs 3
|
|
144
191
|
```
|
|
145
192
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
```bash
|
|
149
|
-
git clone https://github.com/matantsach/snapeval.git
|
|
150
|
-
cd snapeval && npm install
|
|
151
|
-
npx tsx bin/snapeval.ts check <skill-path>
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
Or load as a local plugin:
|
|
155
|
-
|
|
156
|
-
```bash
|
|
157
|
-
copilot plugin install ./path/to/snapeval
|
|
158
|
-
```
|
|
193
|
+
Exit code 1 when pass rate falls below threshold — blocks the PR.
|
|
159
194
|
|
|
160
195
|
## Configuration
|
|
161
196
|
|
|
@@ -163,32 +198,37 @@ Create `snapeval.config.json` in your skill or project root:
|
|
|
163
198
|
|
|
164
199
|
```json
|
|
165
200
|
{
|
|
166
|
-
"
|
|
201
|
+
"harness": "copilot-sdk",
|
|
167
202
|
"inference": "auto",
|
|
168
|
-
"
|
|
169
|
-
"
|
|
203
|
+
"workspace": "../{skill_name}-workspace",
|
|
204
|
+
"runs": 1,
|
|
205
|
+
"concurrency": 1
|
|
170
206
|
}
|
|
171
207
|
```
|
|
172
208
|
|
|
173
|
-
|
|
209
|
+
Resolution order: defaults → project config → skill-dir config → CLI flags.
|
|
174
210
|
|
|
175
|
-
##
|
|
211
|
+
## Harness adapters
|
|
176
212
|
|
|
177
|
-
|
|
213
|
+
| Adapter | Description | Default |
|
|
214
|
+
|---------|-------------|---------|
|
|
215
|
+
| `copilot-sdk` | Programmatic via `@github/copilot-sdk` with native skill loading | yes |
|
|
216
|
+
| `copilot-cli` | Shells out to `copilot` CLI binary | no |
|
|
178
217
|
|
|
179
|
-
|
|
180
|
-
- **CLI** (`npx snapeval`) — Headless backend for CI and power users.
|
|
181
|
-
- **GitHub Action** — CI wrapper (planned).
|
|
218
|
+
The SDK harness loads skills natively via `skillDirectories`, captures full transcripts, and extracts real token counts from `assistant.usage` events.
|
|
182
219
|
|
|
183
|
-
|
|
220
|
+
## Inference adapters
|
|
184
221
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
222
|
+
| Adapter | Description |
|
|
223
|
+
|---------|-------------|
|
|
224
|
+
| `auto` | Copilot CLI if available, else GitHub Models API |
|
|
225
|
+
| `copilot` | Copilot CLI (`copilot` binary) |
|
|
226
|
+
| `copilot-sdk` | `@github/copilot-sdk` programmatic |
|
|
227
|
+
| `github-models` | GitHub Models API (requires `GITHUB_TOKEN`) |
|
|
188
228
|
|
|
189
229
|
## Contributing
|
|
190
230
|
|
|
191
|
-
See [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
231
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
192
232
|
|
|
193
233
|
## License
|
|
194
234
|
|
package/bin/snapeval.ts
CHANGED
|
@@ -1,4 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env tsx
|
|
2
|
+
|
|
3
|
+
// Suppress Node.js ExperimentalWarning (e.g., SQLite) from polluting output
|
|
4
|
+
const _origEmit = process.emit;
|
|
5
|
+
// @ts-ignore — override to filter warnings
|
|
6
|
+
process.emit = function (event: string, ...args: any[]) {
|
|
7
|
+
if (event === 'warning' && args[0]?.name === 'ExperimentalWarning') return false;
|
|
8
|
+
return _origEmit.apply(process, [event, ...args] as any);
|
|
9
|
+
};
|
|
10
|
+
|
|
2
11
|
import { Command } from 'commander';
|
|
3
12
|
import { resolveConfig } from '../src/config.js';
|
|
4
13
|
import { resolveInference } from '../src/adapters/inference/resolve.js';
|
|
@@ -7,6 +16,7 @@ import { evalCommand } from '../src/commands/eval.js';
|
|
|
7
16
|
import { reviewCommand } from '../src/commands/review.js';
|
|
8
17
|
import { TerminalReporter } from '../src/adapters/report/terminal.js';
|
|
9
18
|
import { SnapevalError } from '../src/errors.js';
|
|
19
|
+
import { stopClient } from '../src/adapters/copilot-sdk-client.js';
|
|
10
20
|
import * as path from 'node:path';
|
|
11
21
|
|
|
12
22
|
const program = new Command();
|
|
@@ -24,6 +34,9 @@ program
|
|
|
24
34
|
.option('--inference <inference>', 'Inference adapter to use')
|
|
25
35
|
.option('--workspace <path>', 'Workspace directory')
|
|
26
36
|
.option('--runs <n>', 'Runs per eval for statistical significance', '1')
|
|
37
|
+
.option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
|
|
38
|
+
.option('--only <ids>', 'Run only specific eval IDs (comma-separated, e.g. --only 1,3,5)')
|
|
39
|
+
.option('--threshold <rate>', 'Minimum pass rate (0-1) for exit code 0. Below threshold exits with code 1.')
|
|
27
40
|
.option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
|
|
28
41
|
.option('--verbose', 'Verbose output')
|
|
29
42
|
.argument('[skill-dir]', 'Path to skill directory', process.cwd())
|
|
@@ -36,15 +49,26 @@ program
|
|
|
36
49
|
inference: opts.inference as string,
|
|
37
50
|
workspace: opts.workspace as string,
|
|
38
51
|
runs: opts.runs ? parseInt(opts.runs as string, 10) : undefined,
|
|
52
|
+
concurrency: opts.concurrency ? parseInt(opts.concurrency as string, 10) : undefined,
|
|
39
53
|
},
|
|
40
54
|
process.cwd(), skillPath
|
|
41
55
|
);
|
|
42
56
|
const harness = resolveHarness(config.harness);
|
|
43
57
|
const inference = resolveInference(config.inference);
|
|
44
58
|
|
|
59
|
+
const only = opts.only
|
|
60
|
+
? (opts.only as string).split(',').map((s) => parseInt(s.trim(), 10))
|
|
61
|
+
: undefined;
|
|
62
|
+
const threshold = opts.threshold
|
|
63
|
+
? parseFloat(opts.threshold as string)
|
|
64
|
+
: undefined;
|
|
65
|
+
|
|
45
66
|
const results = await evalCommand(skillPath, harness, inference, {
|
|
46
67
|
workspace: config.workspace,
|
|
47
68
|
runs: config.runs,
|
|
69
|
+
concurrency: config.concurrency,
|
|
70
|
+
only,
|
|
71
|
+
threshold,
|
|
48
72
|
oldSkill: opts.oldSkill as string | undefined,
|
|
49
73
|
});
|
|
50
74
|
|
|
@@ -52,7 +76,15 @@ program
|
|
|
52
76
|
await terminal.report(results);
|
|
53
77
|
console.log(`Results at ${results.iterationDir}`);
|
|
54
78
|
process.exit(0);
|
|
55
|
-
} catch (err) {
|
|
79
|
+
} catch (err: any) {
|
|
80
|
+
// ThresholdError has results attached — show them before failing
|
|
81
|
+
if (err.results) {
|
|
82
|
+
const terminal = new TerminalReporter();
|
|
83
|
+
await terminal.report(err.results);
|
|
84
|
+
console.log(`Results at ${err.results.iterationDir}`);
|
|
85
|
+
}
|
|
86
|
+
handleError(err);
|
|
87
|
+
}
|
|
56
88
|
});
|
|
57
89
|
|
|
58
90
|
// --- review ---
|
|
@@ -63,6 +95,7 @@ program
|
|
|
63
95
|
.option('--inference <inference>', 'Inference adapter to use')
|
|
64
96
|
.option('--workspace <path>', 'Workspace directory')
|
|
65
97
|
.option('--runs <n>', 'Runs per eval for statistical significance', '1')
|
|
98
|
+
.option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
|
|
66
99
|
.option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
|
|
67
100
|
.option('--no-open', 'Do not open browser')
|
|
68
101
|
.option('--verbose', 'Verbose output')
|
|
@@ -76,6 +109,7 @@ program
|
|
|
76
109
|
inference: opts.inference as string,
|
|
77
110
|
workspace: opts.workspace as string,
|
|
78
111
|
runs: opts.runs ? parseInt(opts.runs as string, 10) : undefined,
|
|
112
|
+
concurrency: opts.concurrency ? parseInt(opts.concurrency as string, 10) : undefined,
|
|
79
113
|
},
|
|
80
114
|
process.cwd(), skillPath
|
|
81
115
|
);
|
|
@@ -85,6 +119,7 @@ program
|
|
|
85
119
|
await reviewCommand(skillPath, harness, inference, {
|
|
86
120
|
workspace: config.workspace,
|
|
87
121
|
runs: config.runs,
|
|
122
|
+
concurrency: config.concurrency,
|
|
88
123
|
oldSkill: opts.oldSkill as string | undefined,
|
|
89
124
|
noOpen: opts.open === false,
|
|
90
125
|
});
|
|
@@ -92,6 +127,9 @@ program
|
|
|
92
127
|
} catch (err) { handleError(err); }
|
|
93
128
|
});
|
|
94
129
|
|
|
130
|
+
// Clean up SDK client on exit (no-op if never started)
|
|
131
|
+
process.on('exit', () => { stopClient().catch(() => {}); });
|
|
132
|
+
|
|
95
133
|
function handleError(err: unknown): never {
|
|
96
134
|
if (err instanceof SnapevalError) {
|
|
97
135
|
console.error(`Error: ${err.message}`);
|
package/dist/bin/snapeval.js
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env tsx
|
|
2
|
+
// Suppress Node.js ExperimentalWarning (e.g., SQLite) from polluting output
|
|
3
|
+
const _origEmit = process.emit;
|
|
4
|
+
// @ts-ignore — override to filter warnings
|
|
5
|
+
process.emit = function (event, ...args) {
|
|
6
|
+
if (event === 'warning' && args[0]?.name === 'ExperimentalWarning')
|
|
7
|
+
return false;
|
|
8
|
+
return _origEmit.apply(process, [event, ...args]);
|
|
9
|
+
};
|
|
2
10
|
import { Command } from 'commander';
|
|
3
11
|
import { resolveConfig } from '../src/config.js';
|
|
4
12
|
import { resolveInference } from '../src/adapters/inference/resolve.js';
|
|
@@ -7,6 +15,7 @@ import { evalCommand } from '../src/commands/eval.js';
|
|
|
7
15
|
import { reviewCommand } from '../src/commands/review.js';
|
|
8
16
|
import { TerminalReporter } from '../src/adapters/report/terminal.js';
|
|
9
17
|
import { SnapevalError } from '../src/errors.js';
|
|
18
|
+
import { stopClient } from '../src/adapters/copilot-sdk-client.js';
|
|
10
19
|
import * as path from 'node:path';
|
|
11
20
|
const program = new Command();
|
|
12
21
|
program
|
|
@@ -21,6 +30,9 @@ program
|
|
|
21
30
|
.option('--inference <inference>', 'Inference adapter to use')
|
|
22
31
|
.option('--workspace <path>', 'Workspace directory')
|
|
23
32
|
.option('--runs <n>', 'Runs per eval for statistical significance', '1')
|
|
33
|
+
.option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
|
|
34
|
+
.option('--only <ids>', 'Run only specific eval IDs (comma-separated, e.g. --only 1,3,5)')
|
|
35
|
+
.option('--threshold <rate>', 'Minimum pass rate (0-1) for exit code 0. Below threshold exits with code 1.')
|
|
24
36
|
.option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
|
|
25
37
|
.option('--verbose', 'Verbose output')
|
|
26
38
|
.argument('[skill-dir]', 'Path to skill directory', process.cwd())
|
|
@@ -32,12 +44,22 @@ program
|
|
|
32
44
|
inference: opts.inference,
|
|
33
45
|
workspace: opts.workspace,
|
|
34
46
|
runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
|
|
47
|
+
concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
|
|
35
48
|
}, process.cwd(), skillPath);
|
|
36
49
|
const harness = resolveHarness(config.harness);
|
|
37
50
|
const inference = resolveInference(config.inference);
|
|
51
|
+
const only = opts.only
|
|
52
|
+
? opts.only.split(',').map((s) => parseInt(s.trim(), 10))
|
|
53
|
+
: undefined;
|
|
54
|
+
const threshold = opts.threshold
|
|
55
|
+
? parseFloat(opts.threshold)
|
|
56
|
+
: undefined;
|
|
38
57
|
const results = await evalCommand(skillPath, harness, inference, {
|
|
39
58
|
workspace: config.workspace,
|
|
40
59
|
runs: config.runs,
|
|
60
|
+
concurrency: config.concurrency,
|
|
61
|
+
only,
|
|
62
|
+
threshold,
|
|
41
63
|
oldSkill: opts.oldSkill,
|
|
42
64
|
});
|
|
43
65
|
const terminal = new TerminalReporter();
|
|
@@ -46,6 +68,12 @@ program
|
|
|
46
68
|
process.exit(0);
|
|
47
69
|
}
|
|
48
70
|
catch (err) {
|
|
71
|
+
// ThresholdError has results attached — show them before failing
|
|
72
|
+
if (err.results) {
|
|
73
|
+
const terminal = new TerminalReporter();
|
|
74
|
+
await terminal.report(err.results);
|
|
75
|
+
console.log(`Results at ${err.results.iterationDir}`);
|
|
76
|
+
}
|
|
49
77
|
handleError(err);
|
|
50
78
|
}
|
|
51
79
|
});
|
|
@@ -57,6 +85,7 @@ program
|
|
|
57
85
|
.option('--inference <inference>', 'Inference adapter to use')
|
|
58
86
|
.option('--workspace <path>', 'Workspace directory')
|
|
59
87
|
.option('--runs <n>', 'Runs per eval for statistical significance', '1')
|
|
88
|
+
.option('--concurrency <n>', 'Number of eval cases to run in parallel (1-10)', '1')
|
|
60
89
|
.option('--old-skill <path>', 'Compare against old skill version instead of no-skill')
|
|
61
90
|
.option('--no-open', 'Do not open browser')
|
|
62
91
|
.option('--verbose', 'Verbose output')
|
|
@@ -69,12 +98,14 @@ program
|
|
|
69
98
|
inference: opts.inference,
|
|
70
99
|
workspace: opts.workspace,
|
|
71
100
|
runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
|
|
101
|
+
concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
|
|
72
102
|
}, process.cwd(), skillPath);
|
|
73
103
|
const harness = resolveHarness(config.harness);
|
|
74
104
|
const inference = resolveInference(config.inference);
|
|
75
105
|
await reviewCommand(skillPath, harness, inference, {
|
|
76
106
|
workspace: config.workspace,
|
|
77
107
|
runs: config.runs,
|
|
108
|
+
concurrency: config.concurrency,
|
|
78
109
|
oldSkill: opts.oldSkill,
|
|
79
110
|
noOpen: opts.open === false,
|
|
80
111
|
});
|
|
@@ -84,6 +115,8 @@ program
|
|
|
84
115
|
handleError(err);
|
|
85
116
|
}
|
|
86
117
|
});
|
|
118
|
+
// Clean up SDK client on exit (no-op if never started)
|
|
119
|
+
process.on('exit', () => { stopClient().catch(() => { }); });
|
|
87
120
|
function handleError(err) {
|
|
88
121
|
if (err instanceof SnapevalError) {
|
|
89
122
|
console.error(`Error: ${err.message}`);
|
package/dist/bin/snapeval.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"file":"snapeval.js","sourceRoot":"","sources":["../../bin/snapeval.ts"],"names":[],"mappings":";AAEA,4EAA4E;AAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;AAC/B,2CAA2C;AAC3C,OAAO,CAAC,IAAI,GAAG,UAAU,KAAa,EAAE,GAAG,IAAW;IACpD,IAAI,KAAK,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,qBAAqB;QAAE,OAAO,KAAK,CAAC;IACjF,OAAO,SAAS,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,GAAG,IAAI,CAAQ,CAAC,CAAC;AAC3D,CAAC,CAAC;AAEF,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,uCAAuC,CAAC;AACnE,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,wDAAwD,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,eAAe;AACf,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,qEAAqE,CAAC;KAClF,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,EAAE,GAAG,CAAC;KAClF,MAAM,CAAC,cAAc,EAAE,iEAAiE,CAAC;KACzF,MAAM,CAAC,oBAAoB,EAAE,6EAA6E,CAAC;KAC3G,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAqB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SACrF,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI;YACpB,CAAC,CAAE,IAAI,CAAC,IAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;YACrE,CAAC,CAAC,SAAS,CAAC;QACd,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS;YAC9B,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,SAAmB,CAAC;YACtC,CAAC,CAAC,SAAS,CAAC;QAEd,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/D,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,IAAI;YACJ,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,QAA8B;SAC9C,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,iEAAiE;QACjE,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;YAChB,MAAM,QAAQ,GAAG,IAAI,gBAAgB,EAAE,CAAC;YACxC,MAAM,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,WAAW,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,iBAAiB;AACjB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,gBAAgB,CAAC;KAC/C,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,oBAAoB,EAAE,qBAAqB,CAAC;KACnD,MAAM,CAAC,YAAY,EAAE,4CAA4C,EAAE,GAAG,CAAC;KACvE,MAAM,CAAC,mBAAmB,EAAE,gDAAgD,EAAE,GAAG,CAAC;KAClF,MAAM,CAAC,oBAAoB,EAAE,uDAAuD,CAAC;KACrF,MAAM,CAAC,WAAW,EAAE,qBAAqB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC;KACrC,QAAQ,CAAC,aAAa,EAAE,yBAAyB,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;KACjE,MAAM,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAsC,EAAE,EAAE;IACzE,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,aAAa,CAC1B;YACE,OAAO,EAAE,IAAI,CAAC,OAAiB;YAC/B,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,SAAS,EAAE,IAAI,CAAC,SAAmB;YACnC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAqB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;SACrF,EACD,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CACzB,CAAC;QACF,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAErD,MAAM,aAAa,CAAC,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE;YACjD,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAA8B;YAC7C,MAAM,EAAE,IAAI,CAAC,IAAI,KAAK,KAAK;SAC5B,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEL,uDAAuD;AACvD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,UAAU,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAE5D,SAAS,WAAW,CAAC,GAAY;IAC/B,IAAI,GAAG,YAAY,aAAa,EAAE,CAAC;QACjC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
|
@@ -25,7 +25,9 @@ export async function getClient() {
|
|
|
25
25
|
if (!CopilotClient) {
|
|
26
26
|
throw new Error('Could not find CopilotClient export in @github/copilot-sdk. The package may have changed its API.');
|
|
27
27
|
}
|
|
28
|
-
|
|
28
|
+
// Suppress ExperimentalWarning (e.g., SQLite) in the spawned CLI subprocess
|
|
29
|
+
const env = { ...process.env, NODE_OPTIONS: [process.env.NODE_OPTIONS, '--no-warnings'].filter(Boolean).join(' ') };
|
|
30
|
+
clientInstance = new CopilotClient({ logLevel: 'none', env });
|
|
29
31
|
await clientInstance.start();
|
|
30
32
|
clientStarted = true;
|
|
31
33
|
return clientInstance;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"copilot-sdk-client.js","sourceRoot":"","sources":["../../../src/adapters/copilot-sdk-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,iEAAiE;AACjE,4DAA4D;AAC5D,IAAI,cAAc,GAAQ,IAAI,CAAC;AAC/B,IAAI,aAAa,GAAG,KAAK,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,IAAI,cAAc,IAAI,aAAa;QAAE,OAAO,cAAc,CAAC;IAE3D,IAAI,GAAQ,CAAC;IACb,IAAI,CAAC;QACH,+DAA+D;QAC/D,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,MAAM,aAAa,GAAG,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC;IACtE,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,cAAc,GAAG,IAAI,aAAa,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"copilot-sdk-client.js","sourceRoot":"","sources":["../../../src/adapters/copilot-sdk-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,iEAAiE;AACjE,4DAA4D;AAC5D,IAAI,cAAc,GAAQ,IAAI,CAAC;AAC/B,IAAI,aAAa,GAAG,KAAK,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,IAAI,cAAc,IAAI,aAAa;QAAE,OAAO,cAAc,CAAC;IAE3D,IAAI,GAAQ,CAAC;IACb,IAAI,CAAC;QACH,+DAA+D;QAC/D,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,MAAM,aAAa,GAAG,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC;IACtE,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CACb,mGAAmG,CACpG,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,MAAM,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,YAAY,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IACpH,cAAc,GAAG,IAAI,aAAa,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IAC9D,MAAM,cAAc,CAAC,KAAK,EAAE,CAAC;IAC7B,aAAa,GAAG,IAAI,CAAC;IACrB,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,IAAI,cAAc,IAAI,aAAa,EAAE,CAAC;QACpC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QAC5B,aAAa,GAAG,KAAK,CAAC;QACtB,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,iEAAiE;IACjE,mEAAmE;IACnE,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IACxB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,EAAE,SAAS,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC;QAC3F,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO,IAAI,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM;QAC1B,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Harness, HarnessRunResult } from '../../types.js';
|
|
2
|
+
export declare class CopilotSDKHarness implements Harness {
|
|
3
|
+
readonly name = "copilot-sdk";
|
|
4
|
+
run(options: {
|
|
5
|
+
skillPath?: string;
|
|
6
|
+
prompt: string;
|
|
7
|
+
files?: string[];
|
|
8
|
+
outputDir: string;
|
|
9
|
+
}): Promise<HarnessRunResult>;
|
|
10
|
+
isAvailable(): Promise<boolean>;
|
|
11
|
+
}
|