coding-agent-benchmarks 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -257
- package/dist/adapters/claudeCodeCLI.d.ts.map +1 -1
- package/dist/adapters/claudeCodeCLI.js +4 -6
- package/dist/adapters/claudeCodeCLI.js.map +1 -1
- package/dist/adapters/copilotCLI.d.ts +0 -3
- package/dist/adapters/copilotCLI.d.ts.map +1 -1
- package/dist/adapters/copilotCLI.js +4 -9
- package/dist/adapters/copilotCLI.js.map +1 -1
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +0 -7
- package/dist/config/loader.js.map +1 -1
- package/dist/evaluator.d.ts +0 -12
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +24 -40
- package/dist/evaluator.js.map +1 -1
- package/dist/reporter.d.ts +5 -0
- package/dist/reporter.d.ts.map +1 -1
- package/dist/reporter.js +36 -6
- package/dist/reporter.js.map +1 -1
- package/dist/runner.js +13 -10
- package/dist/runner.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/utils/baselineManager.d.ts.map +1 -1
- package/dist/utils/baselineManager.js +0 -4
- package/dist/utils/baselineManager.js.map +1 -1
- package/dist/utils/gitUtils.d.ts +1 -15
- package/dist/utils/gitUtils.d.ts.map +1 -1
- package/dist/utils/gitUtils.js +17 -28
- package/dist/utils/gitUtils.js.map +1 -1
- package/dist/utils/timeUtils.d.ts +9 -0
- package/dist/utils/timeUtils.d.ts.map +1 -0
- package/dist/utils/timeUtils.js +23 -0
- package/dist/utils/timeUtils.js.map +1 -0
- package/dist/utils/workspaceUtils.d.ts.map +1 -1
- package/dist/utils/workspaceUtils.js +0 -5
- package/dist/utils/workspaceUtils.js.map +1 -1
- package/dist/validators/llmJudge.d.ts.map +1 -1
- package/dist/validators/llmJudge.js +3 -3
- package/dist/validators/llmJudge.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,6 +5,12 @@ Open-source framework for evaluating AI coding assistants (like GitHub Copilot C
|
|
|
5
5
|
|
|
6
6
|

|
|
7
7
|
|
|
8
|
+
*Figure 1: Evaluation workflow - prompt → generate → validate → score*
|
|
9
|
+
|
|
10
|
+

|
|
11
|
+
|
|
12
|
+
*Figure 2: Example terminal output showing scenario evaluation results*
|
|
13
|
+
|
|
8
14
|
|
|
9
15
|
## Features
|
|
10
16
|
|
|
@@ -14,6 +20,20 @@ Open-source framework for evaluating AI coding assistants (like GitHub Copilot C
|
|
|
14
20
|
- **Extensible**: Easy to add custom scenarios, validators, and adapters
|
|
15
21
|
- **CLI & Programmatic API**: Use as a command-line tool or integrate into your workflow
|
|
16
22
|
|
|
23
|
+
## How It Works
|
|
24
|
+
|
|
25
|
+
The filesystem is both the **input context** (the agent reads your project structure, configs, and existing code) and the **output surface** (the agent generates or modifies files). Git is the mechanism to **observe and reset** that surface between runs.
|
|
26
|
+
|
|
27
|
+
### Workspace Cleanup
|
|
28
|
+
|
|
29
|
+
After each scenario completes, the framework automatically resets the workspace to its committed state using `git checkout` and `git clean`. This ensures:
|
|
30
|
+
|
|
31
|
+
- **Scenario isolation** — Each scenario starts from the same clean baseline, preventing leftover files from one scenario from contaminating the next.
|
|
32
|
+
- **Reproducible results** — The same scenario always runs against the same workspace state, regardless of execution order.
|
|
33
|
+
- **Validation integrity** — Validators (Pattern, ESLint, LLM Judge) only evaluate changes from the current scenario.
|
|
34
|
+
|
|
35
|
+
The `.benchmarks/` directory is excluded from cleanup so baseline results persist across scenarios.
|
|
36
|
+
|
|
17
37
|
## Installation
|
|
18
38
|
|
|
19
39
|
```bash
|
|
@@ -119,7 +139,7 @@ module.exports = {
|
|
|
119
139
|
defaultAdapter: 'copilot',
|
|
120
140
|
|
|
121
141
|
// Default LLM model for judge
|
|
122
|
-
defaultModel: 'openai/gpt-
|
|
142
|
+
defaultModel: 'openai/gpt-5',
|
|
123
143
|
|
|
124
144
|
// Default timeout for code generation (milliseconds)
|
|
125
145
|
// Individual scenarios can override this
|
|
@@ -156,33 +176,15 @@ module.exports = {
|
|
|
156
176
|
id: 'react-no-inline-styles',
|
|
157
177
|
category: 'react',
|
|
158
178
|
severity: 'major',
|
|
159
|
-
tags: ['react', 'styling', 'best-practices'],
|
|
160
179
|
description: 'Forbid inline style objects in React components',
|
|
161
|
-
prompt: 'Create a React functional component called Button that accepts a "label" prop and renders a styled button. Use CSS classes instead of inline styles.',
|
|
162
180
|
validationStrategy: {
|
|
163
181
|
patterns: {
|
|
164
|
-
forbiddenPatterns: [/style\s*=\s*\{\{
|
|
182
|
+
forbiddenPatterns: [/style\s*=\s*\{\{/],
|
|
165
183
|
requiredPatterns: [/className/],
|
|
166
184
|
},
|
|
167
185
|
},
|
|
168
186
|
},
|
|
169
|
-
|
|
170
|
-
id: 'async-error-handling',
|
|
171
|
-
category: 'general',
|
|
172
|
-
severity: 'critical',
|
|
173
|
-
tags: ['async', 'error-handling', 'robustness'],
|
|
174
|
-
description: 'Ensure async functions have proper error handling',
|
|
175
|
-
prompt: 'Create an async function called fetchUserData that takes a userId parameter, makes an HTTP request to fetch user data, and returns the user object. Handle errors appropriately.',
|
|
176
|
-
validationStrategy: {
|
|
177
|
-
patterns: {
|
|
178
|
-
requiredPatterns: [/async\s+function\s+fetchUserData|const\s+fetchUserData.*async/, /try|catch|\.catch\(/],
|
|
179
|
-
},
|
|
180
|
-
llmJudge: {
|
|
181
|
-
enabled: true,
|
|
182
|
-
judgmentPrompt: 'Evaluate the error handling in this async function. Does it use try/catch or .catch()? Are errors logged or re-thrown appropriately?',
|
|
183
|
-
},
|
|
184
|
-
},
|
|
185
|
-
},
|
|
187
|
+
// Add more scenarios...
|
|
186
188
|
],
|
|
187
189
|
};
|
|
188
190
|
```
|
|
@@ -195,29 +197,6 @@ You can configure timeouts at three levels (in order of precedence):
|
|
|
195
197
|
2. **Global default**: Set `defaultTimeout` in your config file
|
|
196
198
|
3. **Built-in default**: 120000ms (2 minutes) if nothing else is specified
|
|
197
199
|
|
|
198
|
-
```javascript
|
|
199
|
-
// In benchmarks.config.js
|
|
200
|
-
module.exports = {
|
|
201
|
-
// Global default applies to all scenarios
|
|
202
|
-
defaultTimeout: 180000, // 3 minutes
|
|
203
|
-
|
|
204
|
-
scenarios: [
|
|
205
|
-
{
|
|
206
|
-
id: 'quick-check',
|
|
207
|
-
prompt: '...',
|
|
208
|
-
timeout: 60000, // Override: 1 minute for this scenario
|
|
209
|
-
// ...
|
|
210
|
-
},
|
|
211
|
-
{
|
|
212
|
-
id: 'complex-task',
|
|
213
|
-
prompt: '...',
|
|
214
|
-
// Will use defaultTimeout (3 minutes)
|
|
215
|
-
// ...
|
|
216
|
-
},
|
|
217
|
-
],
|
|
218
|
-
};
|
|
219
|
-
```
|
|
220
|
-
|
|
221
200
|
**Why configure timeouts?**
|
|
222
201
|
- Complex code generation tasks may need more time
|
|
223
202
|
- Simple checks can complete faster with shorter timeouts
|
|
@@ -255,130 +234,73 @@ validationStrategy: {
|
|
|
255
234
|
|
|
256
235
|
### LLM-as-Judge
|
|
257
236
|
|
|
258
|
-
Semantic evaluation using AI:
|
|
237
|
+
Semantic evaluation using AI (requires `GITHUB_TOKEN`):
|
|
259
238
|
|
|
260
239
|
```javascript
|
|
261
240
|
validationStrategy: {
|
|
262
241
|
llmJudge: {
|
|
263
242
|
enabled: true,
|
|
264
|
-
model: 'openai/gpt-
|
|
243
|
+
model: 'openai/gpt-5',
|
|
265
244
|
judgmentPrompt: `Evaluate if the code follows best practices...`,
|
|
266
245
|
},
|
|
267
246
|
}
|
|
268
247
|
```
|
|
269
248
|
|
|
270
|
-
The LLM judge requires a `GITHUB_TOKEN` environment variable with access to GitHub Models API.
|
|
271
|
-
|
|
272
249
|
### ESLint Integration
|
|
273
250
|
|
|
251
|
+

|
|
252
|
+
|
|
253
|
+
*Figure 3: ESLint validator detecting code quality issues in generated code*
|
|
254
|
+
|
|
274
255
|
Run ESLint on generated code:
|
|
275
256
|
|
|
276
257
|
```javascript
|
|
277
258
|
validationStrategy: {
|
|
278
|
-
eslint: {
|
|
279
|
-
enabled: true,
|
|
280
|
-
configPath: '.eslintrc.js', // optional
|
|
281
|
-
},
|
|
259
|
+
eslint: { enabled: true, configPath: '.eslintrc.js' },
|
|
282
260
|
}
|
|
283
261
|
```
|
|
284
262
|
|
|
285
263
|
## Scoring System
|
|
286
264
|
|
|
287
|
-
The scoring system operates at three levels: per-validator scoring, per-scenario scoring, and summary scoring.
|
|
288
|
-
|
|
289
265
|
### Per-Validator Scoring
|
|
290
266
|
|
|
291
|
-
Each validator
|
|
292
|
-
|
|
293
|
-
#### Pattern Validator
|
|
294
|
-
|
|
295
|
-
Uses exponential decay based on weighted violations:
|
|
296
|
-
|
|
297
|
-
```
|
|
298
|
-
score = e^(-totalWeight)
|
|
299
|
-
```
|
|
300
|
-
|
|
301
|
-
Where `totalWeight` is the sum of violation weights:
|
|
302
|
-
- **Critical violations**: 1.0 weight each
|
|
303
|
-
- **Major violations**: 0.7 weight each
|
|
304
|
-
- **Minor violations**: 0.3 weight each
|
|
305
|
-
|
|
306
|
-
**Examples**:
|
|
307
|
-
- 0 violations → score = 1.0 (perfect)
|
|
308
|
-
- 1 critical violation → score ≈ 0.37
|
|
309
|
-
- 1 major violation → score ≈ 0.50
|
|
310
|
-
- 2 minor violations → score ≈ 0.55
|
|
311
|
-
|
|
312
|
-
#### LLM Judge Validator
|
|
313
|
-
|
|
314
|
-
The LLM (GPT-4 or other model) evaluates the code semantically and returns:
|
|
315
|
-
- An `overallScore` from 0.0 to 1.0
|
|
316
|
-
- A list of violations with explanations
|
|
317
|
-
- Passed if: score ≥ 0.7 AND no violations
|
|
318
|
-
|
|
319
|
-
The LLM judge provides semantic understanding beyond pattern matching, evaluating whether the code actually solves the problem correctly and follows best practices.
|
|
320
|
-
|
|
321
|
-
#### ESLint Validator
|
|
267
|
+
Each validator independently evaluates generated code and produces a score from 0.0 to 1.0:
|
|
322
268
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
Uses exponential decay
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
score = e^(-totalWeight / 2)
|
|
329
|
-
```
|
|
330
|
-
|
|
331
|
-
ESLint violations are mapped to severity:
|
|
332
|
-
- ESLint error (severity 2) → **Major** violation (0.7 weight)
|
|
333
|
-
- ESLint warning (severity 1) → **Minor** violation (0.3 weight)
|
|
334
|
-
|
|
335
|
-
The `/2` dampening factor makes ESLint less punitive since projects often have many minor linting issues.
|
|
269
|
+
| Validator | Scoring Method | Notes |
|
|
270
|
+
|-----------|----------------|-------|
|
|
271
|
+
| **Pattern** | Uses exponential decay based on weighted violations | Critical: 1.0 weight, Major: 0.7 weight, Minor: 0.3 weight |
|
|
272
|
+
| **LLM Judge** | AI evaluates semantically, returns 0.0-1.0 score | Passes if score ≥ 0.7 AND no violations |
|
|
273
|
+
| **ESLint** | Exponential decay with dampening factor (÷2) | ESLint errors → Major (0.7), warnings → Minor (0.3) |
|
|
336
274
|
|
|
337
275
|
### Per-Scenario Scoring
|
|
338
276
|
|
|
339
|
-
Each scenario receives an **overall score**
|
|
277
|
+
Each scenario receives an **overall score** = **average of all active validator scores**
|
|
340
278
|
|
|
341
|
-
|
|
342
|
-
overallScore = average of all active validator scores
|
|
343
|
-
```
|
|
344
|
-
|
|
345
|
-
**Active validators** are those that:
|
|
346
|
-
- Are configured in the scenario's `validationStrategy`
|
|
347
|
-
- Successfully ran (did not return score = -1)
|
|
279
|
+
**Active validators** are those configured in `validationStrategy` that successfully ran (score ≠ -1).
|
|
348
280
|
|
|
349
281
|
**Pass/Fail Criteria**:
|
|
350
282
|
- ✅ **PASS**: `overallScore ≥ 0.8` AND `violations.length === 0`
|
|
351
283
|
- ❌ **FAIL**: `overallScore < 0.8` OR `violations.length > 0`
|
|
352
284
|
- ⚠️ **SKIP**: An error occurred during evaluation (timeout, adapter failure, etc.)
|
|
353
285
|
|
|
354
|
-
**Example**:
|
|
355
|
-
```
|
|
356
|
-
overallScore = (0.9 + 0.8) / 2 = 0.85
|
|
357
|
-
```
|
|
286
|
+
**Example**: Pattern (0.9) + LLM Judge (0.8) + ESLint (skipped) → `overallScore = (0.9 + 0.8) / 2 = 0.85`
|
|
358
287
|
|
|
359
288
|
### Summary Scoring
|
|
360
289
|
|
|
361
|
-
After evaluating all scenarios, the framework calculates
|
|
290
|
+
After evaluating all scenarios, the framework calculates:
|
|
362
291
|
|
|
363
292
|
```javascript
|
|
364
293
|
{
|
|
365
|
-
total: 10, // Total
|
|
366
|
-
passed: 7, //
|
|
367
|
-
failed: 2, //
|
|
368
|
-
skipped: 1, //
|
|
369
|
-
averageScore: 0.78, // Average of all scenario
|
|
294
|
+
total: 10, // Total scenarios
|
|
295
|
+
passed: 7, // overallScore ≥ 0.8 and no violations
|
|
296
|
+
failed: 2, // Evaluated but didn't pass
|
|
297
|
+
skipped: 1, // Encountered errors
|
|
298
|
+
averageScore: 0.78, // Average of all scenario scores
|
|
370
299
|
totalViolations: 8 // Sum of violations across all scenarios
|
|
371
300
|
}
|
|
372
301
|
```
|
|
373
302
|
|
|
374
|
-
**
|
|
375
|
-
```
|
|
376
|
-
averageScore = (sum of all scenario scores) / total scenarios
|
|
377
|
-
```
|
|
378
|
-
|
|
379
|
-
This includes scores from failed scenarios, providing an overall quality metric across your entire test suite.
|
|
380
|
-
|
|
381
|
-
**Transparency**: When baselines are saved, the per-validator breakdown is included in the baseline file, allowing you to trace exactly which validator contributed what score. See [Baseline File Format](#baseline-file-format) for details.
|
|
303
|
+
**Transparency**: Baselines include per-validator breakdowns. See [Baseline File Format](#baseline-file-format) for details.
|
|
382
304
|
|
|
383
305
|
### Score Interpretation
|
|
384
306
|
|
|
@@ -398,11 +320,6 @@ When baseline tracking is enabled, you'll see delta metrics:
|
|
|
398
320
|
↑ +18.5% improvement from baseline
|
|
399
321
|
```
|
|
400
322
|
|
|
401
|
-
The percentage is calculated as:
|
|
402
|
-
```
|
|
403
|
-
percentage = (currentScore - baselineScore) / baselineScore * 100
|
|
404
|
-
```
|
|
405
|
-
|
|
406
323
|
## Baseline Tracking
|
|
407
324
|
|
|
408
325
|
Track evaluation results over time by enabling baseline management in your config file:
|
|
@@ -430,42 +347,21 @@ When `compareBaseline` is enabled, the report will show score deltas and whether
|
|
|
430
347
|
|
|
431
348
|
### Baseline File Format
|
|
432
349
|
|
|
433
|
-
|
|
350
|
+
Path: `.benchmarks/baselines/{adapter}/{model}/{scenario-id}.json`
|
|
351
|
+
|
|
352
|
+
Each baseline file provides complete score traceability:
|
|
434
353
|
|
|
435
354
|
```json
|
|
436
355
|
{
|
|
437
356
|
"scenarioId": "typescript-no-any",
|
|
438
357
|
"score": 0.85,
|
|
439
358
|
"violations": [
|
|
440
|
-
{
|
|
441
|
-
"type": "pattern",
|
|
442
|
-
"message": "Forbidden pattern found: :\\s*any\\b",
|
|
443
|
-
"file": "src/types.ts",
|
|
444
|
-
"line": 12,
|
|
445
|
-
"severity": "critical",
|
|
446
|
-
"details": "Matched: \"metadata: any\""
|
|
447
|
-
}
|
|
359
|
+
{ "type": "pattern", "message": "Forbidden pattern found: :\\s*any\\b", "file": "src/types.ts", ... }
|
|
448
360
|
],
|
|
449
361
|
"validationResults": [
|
|
450
|
-
{
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
"violations": [...],
|
|
454
|
-
"validatorType": "pattern"
|
|
455
|
-
},
|
|
456
|
-
{
|
|
457
|
-
"passed": true,
|
|
458
|
-
"score": 1.0,
|
|
459
|
-
"violations": [],
|
|
460
|
-
"validatorType": "llm-judge"
|
|
461
|
-
},
|
|
462
|
-
{
|
|
463
|
-
"passed": true,
|
|
464
|
-
"score": -1,
|
|
465
|
-
"violations": [],
|
|
466
|
-
"validatorType": "eslint",
|
|
467
|
-
"error": "ESLint not found"
|
|
468
|
-
}
|
|
362
|
+
{ "passed": false, "score": 0.37, "validatorType": "pattern", "violations": [...] },
|
|
363
|
+
{ "passed": true, "score": 1.0, "validatorType": "llm-judge", "violations": [] },
|
|
364
|
+
{ "passed": true, "score": -1, "validatorType": "eslint", "error": "ESLint not found" }
|
|
469
365
|
],
|
|
470
366
|
"timestamp": "2026-01-23T22:28:32.216Z",
|
|
471
367
|
"adapter": "copilot",
|
|
@@ -474,15 +370,11 @@ Each baseline file contains complete transparency into how the score was calcula
|
|
|
474
370
|
```
|
|
475
371
|
|
|
476
372
|
**Key fields**:
|
|
477
|
-
- `score
|
|
478
|
-
- `violations
|
|
479
|
-
- `validationResults
|
|
480
|
-
- Individual validator score
|
|
481
|
-
- Whether that validator passed
|
|
482
|
-
- Violations specific to that validator
|
|
483
|
-
- Any errors that occurred (`score: -1` means skipped)
|
|
373
|
+
- `score` - Overall scenario score (average of active validators)
|
|
374
|
+
- `violations` - All violations from all validators combined
|
|
375
|
+
- `validationResults` - Per-validator breakdown (score, passed, violations, errors)
|
|
484
376
|
|
|
485
|
-
**
|
|
377
|
+
**Traceability**: You can always trace the overall score back to individual validator scores (e.g., `score: 0.067` → check `validationResults` for Pattern: 0.135, LLM Judge: 0.00).
|
|
486
378
|
|
|
487
379
|
## CLI Commands
|
|
488
380
|
|
|
@@ -490,24 +382,26 @@ Each baseline file contains complete transparency into how the score was calcula
|
|
|
490
382
|
|
|
491
383
|
Run benchmark evaluations.
|
|
492
384
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
385
|
+
| Option | Description | Default/Example |
|
|
386
|
+
|--------|-------------|-----------------|
|
|
387
|
+
| `--scenario <pattern>` | Filter by scenario ID (supports wildcards) | `typescript-*` |
|
|
388
|
+
| `--category <categories>` | Filter by category (comma-separated) | `typescript,react` |
|
|
389
|
+
| `--tag <tags>` | Filter by tags (comma-separated) | `safety,types` |
|
|
390
|
+
| `--adapter <type>` | Adapter to use | `copilot` or `claude-code` |
|
|
391
|
+
| `--model <model>` | LLM model for judge | `openai/gpt-5` |
|
|
392
|
+
| `--threshold <number>` | Minimum passing score | `0.8` |
|
|
393
|
+
| `--verbose` | Show detailed output | - |
|
|
394
|
+
| `--output <file>` | Export JSON report | `report.json` |
|
|
395
|
+
| `--workspace-root <path>` | Workspace root directory | Current directory |
|
|
503
396
|
|
|
504
397
|
### `list`
|
|
505
398
|
|
|
506
399
|
List available test scenarios.
|
|
507
400
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
401
|
+
| Option | Description |
|
|
402
|
+
|--------|-------------|
|
|
403
|
+
| `--category <categories>` | Filter by category (comma-separated) |
|
|
404
|
+
| `--tag <tags>` | Filter by tags (comma-separated) |
|
|
511
405
|
|
|
512
406
|
### `check`
|
|
513
407
|
|
|
@@ -517,8 +411,9 @@ Check if coding agent CLIs are available.
|
|
|
517
411
|
|
|
518
412
|
Test LLM judge with a custom prompt (for debugging).
|
|
519
413
|
|
|
520
|
-
|
|
521
|
-
|
|
414
|
+
| Option | Description |
|
|
415
|
+
|--------|-------------|
|
|
416
|
+
| `--model <model>` | LLM model to use |
|
|
522
417
|
|
|
523
418
|
## Understanding Output
|
|
524
419
|
|
|
@@ -567,26 +462,15 @@ import { Evaluator, loadConfig } from 'coding-agent-benchmarks';
|
|
|
567
462
|
async function runEvaluation() {
|
|
568
463
|
const { config, scenarios } = await loadConfig();
|
|
569
464
|
|
|
570
|
-
// Create evaluator
|
|
571
465
|
const evaluator = new Evaluator({
|
|
572
466
|
adapter: 'copilot',
|
|
573
|
-
model: 'openai/gpt-
|
|
467
|
+
model: 'openai/gpt-5',
|
|
574
468
|
verbose: true,
|
|
575
469
|
saveBaseline: config.saveBaseline,
|
|
576
470
|
compareBaseline: config.compareBaseline,
|
|
577
471
|
});
|
|
578
472
|
|
|
579
|
-
// Check adapter availability
|
|
580
|
-
const available = await evaluator.checkAdapterAvailability();
|
|
581
|
-
if (!available) {
|
|
582
|
-
throw new Error('Adapter not available');
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
// Run evaluation
|
|
586
473
|
const report = await evaluator.evaluate(scenarios);
|
|
587
|
-
|
|
588
|
-
console.log(`Passed: ${report.summary.passed}/${report.summary.total}`);
|
|
589
|
-
console.log(`Average score: ${report.summary.averageScore.toFixed(2)}`);
|
|
590
474
|
}
|
|
591
475
|
|
|
592
476
|
runEvaluation();
|
|
@@ -597,106 +481,62 @@ runEvaluation();
|
|
|
597
481
|
Implement the `CodeValidator` interface:
|
|
598
482
|
|
|
599
483
|
```typescript
|
|
600
|
-
import { CodeValidator, ValidationResult
|
|
484
|
+
import { CodeValidator, ValidationResult } from 'coding-agent-benchmarks';
|
|
601
485
|
|
|
602
486
|
export class CustomValidator implements CodeValidator {
|
|
603
487
|
public readonly type = 'custom';
|
|
604
488
|
|
|
605
|
-
async validate(
|
|
606
|
-
files: readonly string[],
|
|
607
|
-
scenario: TestScenario
|
|
608
|
-
): Promise<ValidationResult> {
|
|
489
|
+
async validate(files: readonly string[], scenario: TestScenario): Promise<ValidationResult> {
|
|
609
490
|
// Your validation logic here
|
|
610
|
-
return {
|
|
611
|
-
passed: true,
|
|
612
|
-
score: 1.0,
|
|
613
|
-
violations: [],
|
|
614
|
-
validatorType: 'custom',
|
|
615
|
-
};
|
|
491
|
+
return { passed: true, score: 1.0, violations: [], validatorType: 'custom' };
|
|
616
492
|
}
|
|
617
493
|
}
|
|
618
494
|
```
|
|
619
495
|
|
|
496
|
+
See CONTRIBUTING.md for complete examples.
|
|
497
|
+
|
|
620
498
|
## Creating Custom Adapters
|
|
621
499
|
|
|
622
500
|
Implement the `CodeGenerationAdapter` interface:
|
|
623
501
|
|
|
624
502
|
```typescript
|
|
625
|
-
import { CodeGenerationAdapter
|
|
503
|
+
import { CodeGenerationAdapter } from 'coding-agent-benchmarks';
|
|
626
504
|
|
|
627
505
|
export class CustomAdapter implements CodeGenerationAdapter {
|
|
628
|
-
public readonly type
|
|
629
|
-
|
|
630
|
-
async checkAvailability(): Promise<boolean> {
|
|
631
|
-
// Check if CLI is available
|
|
632
|
-
return true;
|
|
633
|
-
}
|
|
506
|
+
public readonly type = 'custom-adapter';
|
|
634
507
|
|
|
635
|
-
async
|
|
636
|
-
|
|
637
|
-
contextFiles?: readonly string[],
|
|
638
|
-
timeout?: number
|
|
639
|
-
): Promise<string[]> {
|
|
640
|
-
// Generate code and return changed files
|
|
641
|
-
return ['path/to/generated/file.ts'];
|
|
642
|
-
}
|
|
508
|
+
async checkAvailability(): Promise<boolean> { /* ... */ }
|
|
509
|
+
async generate(prompt: string, contextFiles?: readonly string[], timeout?: number): Promise<string[]> { /* ... */ }
|
|
643
510
|
}
|
|
644
511
|
```
|
|
645
512
|
|
|
513
|
+
See CONTRIBUTING.md for complete examples.
|
|
514
|
+
|
|
646
515
|
## GitHub Authentication (for LLM Judge)
|
|
647
516
|
|
|
648
|
-
LLM-as-judge validation requires GitHub authentication to access GitHub Models API.
|
|
517
|
+
LLM-as-judge validation requires GitHub authentication to access GitHub Models API. Choose one option:
|
|
649
518
|
|
|
650
519
|
### Option 1: Personal Access Token (Recommended)
|
|
651
520
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
6. Set environment variable:
|
|
658
|
-
```bash
|
|
659
|
-
export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
|
|
660
|
-
```
|
|
521
|
+
Create a token at https://github.com/settings/tokens with the **`models:read`** scope, then set it as an environment variable:
|
|
522
|
+
|
|
523
|
+
```bash
|
|
524
|
+
export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
|
|
525
|
+
```
|
|
661
526
|
|
|
662
527
|
### Option 2: GitHub CLI (Automatic)
|
|
663
528
|
|
|
664
|
-
If
|
|
529
|
+
If GitHub CLI is installed, tokens are auto-detected:
|
|
665
530
|
|
|
666
531
|
```bash
|
|
667
|
-
#
|
|
668
|
-
brew install gh # macOS
|
|
669
|
-
# or download from https://cli.github.com
|
|
670
|
-
|
|
671
|
-
# Authenticate (one time)
|
|
532
|
+
brew install gh # or download from https://cli.github.com
|
|
672
533
|
gh auth login
|
|
673
|
-
|
|
674
534
|
# Token will be used automatically - no GITHUB_TOKEN needed!
|
|
675
535
|
```
|
|
676
536
|
|
|
677
|
-
### Check Authentication
|
|
678
|
-
|
|
679
|
-
```bash
|
|
680
|
-
npx coding-agent-benchmarks check
|
|
681
|
-
```
|
|
682
|
-
|
|
683
|
-
Output:
|
|
684
|
-
```
|
|
685
|
-
Checking adapter availability...
|
|
686
|
-
GitHub Copilot CLI: ✓ Available
|
|
687
|
-
Claude Code CLI: ✗ Not found
|
|
688
|
-
|
|
689
|
-
Checking GitHub authentication...
|
|
690
|
-
✓ Using token from GitHub CLI (gh auth token)
|
|
691
|
-
```
|
|
692
|
-
|
|
693
|
-
## How It Works
|
|
537
|
+
### Check Authentication
|
|
694
538
|
|
|
695
|
-
|
|
696
|
-
2. **File Tracking**: Git is used to detect which files were created/modified
|
|
697
|
-
3. **Validation**: Multiple validators check the generated code
|
|
698
|
-
4. **Scoring**: Results are aggregated and compared against thresholds
|
|
699
|
-
5. **Reporting**: Results are displayed in terminal and optionally exported as JSON
|
|
539
|
+
Run `npx coding-agent-benchmarks check` to verify authentication status.
|
|
700
540
|
|
|
701
541
|
## Requirements
|
|
702
542
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"claudeCodeCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,2BAA2B;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,oBAAqB,YAAW,qBAAqB;IAChE,SAAgB,IAAI,EAAG,aAAa,CAAU;IAC9C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"claudeCodeCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,2BAA2B;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,oBAAqB,YAAW,qBAAqB;IAChE,SAAgB,IAAI,EAAG,aAAa,CAAU;IAC9C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAuGrB"}
|
|
@@ -100,14 +100,15 @@ class ClaudeCodeCLIAdapter {
|
|
|
100
100
|
* @param timeout Timeout in milliseconds, or null for no timeout
|
|
101
101
|
*/
|
|
102
102
|
async generate(prompt, contextFiles, timeout) {
|
|
103
|
+
const isAvailable = await this.checkAvailability();
|
|
104
|
+
if (!isAvailable) {
|
|
105
|
+
throw new Error("Claude Code CLI is not available. Please install it first: https://docs.anthropic.com/en/docs/build-with-claude/claude-code");
|
|
106
|
+
}
|
|
103
107
|
const fullPrompt = this.buildPrompt(prompt, contextFiles);
|
|
104
|
-
// Capture git status before generation
|
|
105
108
|
const statusBefore = (0, gitUtils_1.getGitStatusPorcelain)(this.workspaceRoot);
|
|
106
|
-
// Write prompt to temp file and pipe via stdin (matches @copilot-evals pattern)
|
|
107
109
|
return new Promise((resolve, reject) => {
|
|
108
110
|
const tempFile = path.join(this.workspaceRoot, ".claude-eval-prompt.txt");
|
|
109
111
|
fs.writeFileSync(tempFile, fullPrompt, "utf8");
|
|
110
|
-
// Cleanup function
|
|
111
112
|
const cleanup = () => {
|
|
112
113
|
try {
|
|
113
114
|
if (fs.existsSync(tempFile)) {
|
|
@@ -118,7 +119,6 @@ class ClaudeCodeCLIAdapter {
|
|
|
118
119
|
// Ignore cleanup errors
|
|
119
120
|
}
|
|
120
121
|
};
|
|
121
|
-
// Register cleanup on process termination
|
|
122
122
|
const cleanupOnExit = () => {
|
|
123
123
|
cleanup();
|
|
124
124
|
};
|
|
@@ -138,7 +138,6 @@ class ClaudeCodeCLIAdapter {
|
|
|
138
138
|
proc.stderr?.on("data", (data) => {
|
|
139
139
|
stderr += data.toString();
|
|
140
140
|
});
|
|
141
|
-
// Set timeout only if specified (null/undefined = no timeout)
|
|
142
141
|
let timeoutHandle = null;
|
|
143
142
|
if (timeout !== null && timeout !== undefined) {
|
|
144
143
|
timeoutHandle = setTimeout(() => {
|
|
@@ -160,7 +159,6 @@ class ClaudeCodeCLIAdapter {
|
|
|
160
159
|
reject(new Error(`Claude Code CLI exited with code ${code}\nStderr: ${stderr}`));
|
|
161
160
|
return;
|
|
162
161
|
}
|
|
163
|
-
// Get files changed during generation (diff before/after)
|
|
164
162
|
try {
|
|
165
163
|
const statusAfter = (0, gitUtils_1.getGitStatusPorcelain)(this.workspaceRoot);
|
|
166
164
|
const changedFiles = (0, gitUtils_1.getChangedFilesDiff)(statusBefore, statusAfter, this.workspaceRoot);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"claudeCodeCLI.js","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAsC;AACtC,uCAAyB;AACzB,2CAA6B;AAE7B,gDAA+E;AAC/E,4DAGiC;AAOjC,MAAM,aAAa,GAAG,QAAQ,CAAC;AAE/B,MAAa,oBAAoB;IAK/B,YAAY,OAAqC;QAJjC,SAAI,GAAG,aAAsB,CAAC;QAK5C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QAClE,IAAI,CAAC,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE;gBACtC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,WAAW,CACjB,MAAc,EACd,YAAgC;QAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;YACpE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;gBAClC,MAAM,cAAc,GAAG,QAAQ;qBAC5B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC;oBAC5D,OAAO,OAAO,GAAG,CAAC,IAAI,WAAW,GAAG,KAAK,GAAG,CAAC,OAAO,UAAU,CAAC;gBACjE,CAAC,CAAC;qBACD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,KAAK,CAAC,IAAI,CACR,0GAA0G,CAC3G,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,YAAgC,EAChC,OAAuB;QAEvB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAE1D,
|
|
1
|
+
{"version":3,"file":"claudeCodeCLI.js","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAsC;AACtC,uCAAyB;AACzB,2CAA6B;AAE7B,gDAA+E;AAC/E,4DAGiC;AAOjC,MAAM,aAAa,GAAG,QAAQ,CAAC;AAE/B,MAAa,oBAAoB;IAK/B,YAAY,OAAqC;QAJjC,SAAI,GAAG,aAAsB,CAAC;QAK5C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QAClE,IAAI,CAAC,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE;gBACtC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,WAAW,CACjB,MAAc,EACd,YAAgC;QAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;YACpE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;gBAClC,MAAM,cAAc,GAAG,QAAQ;qBAC5B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC;oBAC5D,OAAO,OAAO,GAAG,CAAC,IAAI,WAAW,GAAG,KAAK,GAAG,CAAC,OAAO,UAAU,CAAC;gBACjE,CAAC,CAAC;qBACD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,KAAK,CAAC,IAAI,CACR,0GAA0G,CAC3G,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,YAAgC,EAChC,OAAuB;QAEvB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CACb,6HAA6H,CAC9H,CAAC;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAE1D,MAAM,YAAY,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE/D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,yBAAyB,CAAC,CAAC;YAC1E,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;YAE/C,MAAM,OAAO,GAAG,GAAS,EAAE;gBACzB,IAAI,CAAC;oBACH,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC5B,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;oBAC1B,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,wBAAwB;gBAC1B,CAAC;YACH,CAAC,CAAC;YAEF,MAAM,aAAa,GAAG,GAAS,EAAE;gBAC/B,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;YAEvC,MAAM,OAAO,GAAG,QAAQ,QAAQ,sBAAsB,IAAI,CAAC,KAAK,yIAAyI,CAAC;YAC1M,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,IAAI,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE;gBACxC,GAAG,EAAE,IAAI,CAAC,aAAa;gBACvB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YAEH,6DAA6D;YAC7D,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,aAAa,GAA0B,IAAI,CAAC;YAChD,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;oBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;oBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;oBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,mCAAmC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACpE,CAAC,EAAE,OAAO,CAAC,CAAC;YACd,CAAC;YAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBAED,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBAEjD,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;oBACf,MAAM,CACJ,IAAI,KAAK,CACP,oCAAoC,IAAI,aAAa,MAAM,EAAE,CAC9D,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBAED,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBAC9D,MAAM,YAAY,GAAG,IAAA,8BAAmB,EACtC,YAAY,EACZ,WAAW,EACX,IAAI,CAAC,aAAa,CACnB,CAAC;oBACF,OAAO,CAAC,YAAY,CAAC,CAAC;gBACxB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBACD,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AApLD,oDAoLC"}
|
|
@@ -11,9 +11,6 @@ export declare class CopilotCLIAdapter implements CodeGenerationAdapter {
|
|
|
11
11
|
private workspaceRoot;
|
|
12
12
|
private model;
|
|
13
13
|
constructor(options?: CopilotCLIAdapterOptions);
|
|
14
|
-
/**
|
|
15
|
-
* Check if GitHub Copilot CLI is available
|
|
16
|
-
*/
|
|
17
14
|
checkAvailability(): Promise<boolean>;
|
|
18
15
|
/**
|
|
19
16
|
* Get the model being used
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"copilotCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/copilotCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,wBAAwB;IACvC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,iBAAkB,YAAW,qBAAqB;IAC7D,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,wBAAwB;
|
|
1
|
+
{"version":3,"file":"copilotCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/copilotCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,wBAAwB;IACvC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,iBAAkB,YAAW,qBAAqB;IAC7D,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,wBAAwB;IAKxC,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAyGrB"}
|