coding-agent-benchmarks 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +97 -257
  2. package/dist/adapters/claudeCodeCLI.d.ts.map +1 -1
  3. package/dist/adapters/claudeCodeCLI.js +4 -6
  4. package/dist/adapters/claudeCodeCLI.js.map +1 -1
  5. package/dist/adapters/copilotCLI.d.ts +0 -3
  6. package/dist/adapters/copilotCLI.d.ts.map +1 -1
  7. package/dist/adapters/copilotCLI.js +4 -9
  8. package/dist/adapters/copilotCLI.js.map +1 -1
  9. package/dist/config/loader.d.ts.map +1 -1
  10. package/dist/config/loader.js +0 -7
  11. package/dist/config/loader.js.map +1 -1
  12. package/dist/evaluator.d.ts +0 -12
  13. package/dist/evaluator.d.ts.map +1 -1
  14. package/dist/evaluator.js +24 -40
  15. package/dist/evaluator.js.map +1 -1
  16. package/dist/reporter.d.ts +5 -0
  17. package/dist/reporter.d.ts.map +1 -1
  18. package/dist/reporter.js +36 -6
  19. package/dist/reporter.js.map +1 -1
  20. package/dist/runner.js +13 -10
  21. package/dist/runner.js.map +1 -1
  22. package/dist/types.d.ts +1 -1
  23. package/dist/utils/baselineManager.d.ts.map +1 -1
  24. package/dist/utils/baselineManager.js +0 -4
  25. package/dist/utils/baselineManager.js.map +1 -1
  26. package/dist/utils/gitUtils.d.ts +1 -15
  27. package/dist/utils/gitUtils.d.ts.map +1 -1
  28. package/dist/utils/gitUtils.js +17 -28
  29. package/dist/utils/gitUtils.js.map +1 -1
  30. package/dist/utils/timeUtils.d.ts +9 -0
  31. package/dist/utils/timeUtils.d.ts.map +1 -0
  32. package/dist/utils/timeUtils.js +23 -0
  33. package/dist/utils/timeUtils.js.map +1 -0
  34. package/dist/utils/workspaceUtils.d.ts.map +1 -1
  35. package/dist/utils/workspaceUtils.js +0 -5
  36. package/dist/utils/workspaceUtils.js.map +1 -1
  37. package/dist/validators/llmJudge.d.ts.map +1 -1
  38. package/dist/validators/llmJudge.js +3 -3
  39. package/dist/validators/llmJudge.js.map +1 -1
  40. package/package.json +1 -1
package/README.md CHANGED
@@ -5,6 +5,12 @@ Open-source framework for evaluating AI coding assistants (like GitHub Copilot C
5
5
 
6
6
  ![WhatsApp Image 2026-01-23 at 9 04 49 AM](https://github.com/user-attachments/assets/3544d04f-37a5-47b0-a013-669c6015d26f)
7
7
 
8
+ *Figure 1: Evaluation workflow - prompt → generate → validate → score*
9
+
10
+ ![WhatsApp Image 2026-01-24 at 1 58 31 PM](https://github.com/user-attachments/assets/f93ea3e0-74f8-4789-ab43-97245acc91b6)
11
+
12
+ *Figure 2: Example terminal output showing scenario evaluation results*
13
+
8
14
 
9
15
  ## Features
10
16
 
@@ -14,6 +20,20 @@ Open-source framework for evaluating AI coding assistants (like GitHub Copilot C
14
20
  - **Extensible**: Easy to add custom scenarios, validators, and adapters
15
21
  - **CLI & Programmatic API**: Use as a command-line tool or integrate into your workflow
16
22
 
23
+ ## How It Works
24
+
25
+ The filesystem is both the **input context** (the agent reads your project structure, configs, and existing code) and the **output surface** (the agent generates or modifies files). Git is the mechanism to **observe and reset** that surface between runs.
26
+
27
+ ### Workspace Cleanup
28
+
29
+ After each scenario completes, the framework automatically resets the workspace to its committed state using `git checkout` and `git clean`. This ensures:
30
+
31
+ - **Scenario isolation** — Each scenario starts from the same clean baseline, preventing leftover files from one scenario from contaminating the next.
32
+ - **Reproducible results** — The same scenario always runs against the same workspace state, regardless of execution order.
33
+ - **Validation integrity** — Validators (Pattern, ESLint, LLM Judge) only evaluate changes from the current scenario.
34
+
35
+ The `.benchmarks/` directory is excluded from cleanup so baseline results persist across scenarios.
36
+
17
37
  ## Installation
18
38
 
19
39
  ```bash
@@ -119,7 +139,7 @@ module.exports = {
119
139
  defaultAdapter: 'copilot',
120
140
 
121
141
  // Default LLM model for judge
122
- defaultModel: 'openai/gpt-4.1',
142
+ defaultModel: 'openai/gpt-5',
123
143
 
124
144
  // Default timeout for code generation (milliseconds)
125
145
  // Individual scenarios can override this
@@ -156,33 +176,15 @@ module.exports = {
156
176
  id: 'react-no-inline-styles',
157
177
  category: 'react',
158
178
  severity: 'major',
159
- tags: ['react', 'styling', 'best-practices'],
160
179
  description: 'Forbid inline style objects in React components',
161
- prompt: 'Create a React functional component called Button that accepts a "label" prop and renders a styled button. Use CSS classes instead of inline styles.',
162
180
  validationStrategy: {
163
181
  patterns: {
164
- forbiddenPatterns: [/style\s*=\s*\{\{/, /style\s*=\s*\{[^}]*\}/],
182
+ forbiddenPatterns: [/style\s*=\s*\{\{/],
165
183
  requiredPatterns: [/className/],
166
184
  },
167
185
  },
168
186
  },
169
- {
170
- id: 'async-error-handling',
171
- category: 'general',
172
- severity: 'critical',
173
- tags: ['async', 'error-handling', 'robustness'],
174
- description: 'Ensure async functions have proper error handling',
175
- prompt: 'Create an async function called fetchUserData that takes a userId parameter, makes an HTTP request to fetch user data, and returns the user object. Handle errors appropriately.',
176
- validationStrategy: {
177
- patterns: {
178
- requiredPatterns: [/async\s+function\s+fetchUserData|const\s+fetchUserData.*async/, /try|catch|\.catch\(/],
179
- },
180
- llmJudge: {
181
- enabled: true,
182
- judgmentPrompt: 'Evaluate the error handling in this async function. Does it use try/catch or .catch()? Are errors logged or re-thrown appropriately?',
183
- },
184
- },
185
- },
187
+ // Add more scenarios...
186
188
  ],
187
189
  };
188
190
  ```
@@ -195,29 +197,6 @@ You can configure timeouts at three levels (in order of precedence):
195
197
  2. **Global default**: Set `defaultTimeout` in your config file
196
198
  3. **Built-in default**: 120000ms (2 minutes) if nothing else is specified
197
199
 
198
- ```javascript
199
- // In benchmarks.config.js
200
- module.exports = {
201
- // Global default applies to all scenarios
202
- defaultTimeout: 180000, // 3 minutes
203
-
204
- scenarios: [
205
- {
206
- id: 'quick-check',
207
- prompt: '...',
208
- timeout: 60000, // Override: 1 minute for this scenario
209
- // ...
210
- },
211
- {
212
- id: 'complex-task',
213
- prompt: '...',
214
- // Will use defaultTimeout (3 minutes)
215
- // ...
216
- },
217
- ],
218
- };
219
- ```
220
-
221
200
  **Why configure timeouts?**
222
201
  - Complex code generation tasks may need more time
223
202
  - Simple checks can complete faster with shorter timeouts
@@ -255,130 +234,73 @@ validationStrategy: {
255
234
 
256
235
  ### LLM-as-Judge
257
236
 
258
- Semantic evaluation using AI:
237
+ Semantic evaluation using AI (requires `GITHUB_TOKEN`):
259
238
 
260
239
  ```javascript
261
240
  validationStrategy: {
262
241
  llmJudge: {
263
242
  enabled: true,
264
- model: 'openai/gpt-4.1', // or 'gpt-4o'
243
+ model: 'openai/gpt-5',
265
244
  judgmentPrompt: `Evaluate if the code follows best practices...`,
266
245
  },
267
246
  }
268
247
  ```
269
248
 
270
- The LLM judge requires a `GITHUB_TOKEN` environment variable with access to GitHub Models API.
271
-
272
249
  ### ESLint Integration
273
250
 
251
+ ![WhatsApp Image 2026-01-24 at 2 09 11 PM](https://github.com/user-attachments/assets/12af93e8-ed7c-4153-a183-20601a925965)
252
+
253
+ *Figure 3: ESLint validator detecting code quality issues in generated code*
254
+
274
255
  Run ESLint on generated code:
275
256
 
276
257
  ```javascript
277
258
  validationStrategy: {
278
- eslint: {
279
- enabled: true,
280
- configPath: '.eslintrc.js', // optional
281
- },
259
+ eslint: { enabled: true, configPath: '.eslintrc.js' },
282
260
  }
283
261
  ```
284
262
 
285
263
  ## Scoring System
286
264
 
287
- The scoring system operates at three levels: per-validator scoring, per-scenario scoring, and summary scoring.
288
-
289
265
  ### Per-Validator Scoring
290
266
 
291
- Each validator (Pattern, LLM Judge, ESLint) independently evaluates the generated code and produces a score from 0.0 to 1.0:
292
-
293
- #### Pattern Validator
294
-
295
- Uses exponential decay based on weighted violations:
296
-
297
- ```
298
- score = e^(-totalWeight)
299
- ```
300
-
301
- Where `totalWeight` is the sum of violation weights:
302
- - **Critical violations**: 1.0 weight each
303
- - **Major violations**: 0.7 weight each
304
- - **Minor violations**: 0.3 weight each
305
-
306
- **Examples**:
307
- - 0 violations → score = 1.0 (perfect)
308
- - 1 critical violation → score ≈ 0.37
309
- - 1 major violation → score ≈ 0.50
310
- - 2 minor violations → score ≈ 0.55
311
-
312
- #### LLM Judge Validator
313
-
314
- The LLM (GPT-4 or other model) evaluates the code semantically and returns:
315
- - An `overallScore` from 0.0 to 1.0
316
- - A list of violations with explanations
317
- - Passed if: score ≥ 0.7 AND no violations
318
-
319
- The LLM judge provides semantic understanding beyond pattern matching, evaluating whether the code actually solves the problem correctly and follows best practices.
320
-
321
- #### ESLint Validator
267
+ Each validator independently evaluates generated code and produces a score from 0.0 to 1.0:
322
268
 
323
- This validator runs ESLint on the generated code and scores based on the number and severity of linting violations. Note that ESLint must be installed and configured in your project for this validator to work. If you don't have ESLint set up globally, disable this validator or provide a custom validator.
324
-
325
- Uses exponential decay with a dampening factor:
326
-
327
- ```
328
- score = e^(-totalWeight / 2)
329
- ```
330
-
331
- ESLint violations are mapped to severity:
332
- - ESLint error (severity 2) → **Major** violation (0.7 weight)
333
- - ESLint warning (severity 1) → **Minor** violation (0.3 weight)
334
-
335
- The `/2` dampening factor makes ESLint less punitive since projects often have many minor linting issues.
269
+ | Validator | Scoring Method | Notes |
270
+ |-----------|----------------|-------|
271
+ | **Pattern** | Uses exponential decay based on weighted violations | Critical: 1.0 weight, Major: 0.7 weight, Minor: 0.3 weight |
272
+ | **LLM Judge** | AI evaluates semantically, returns 0.0-1.0 score | Passes if score ≥ 0.7 AND no violations |
273
+ | **ESLint** | Exponential decay with dampening factor (÷2) | ESLint errors → Major (0.7), warnings → Minor (0.3) |
336
274
 
337
275
  ### Per-Scenario Scoring
338
276
 
339
- Each scenario receives an **overall score** calculated as:
277
+ Each scenario receives an **overall score** = **average of all active validator scores**
340
278
 
341
- ```
342
- overallScore = average of all active validator scores
343
- ```
344
-
345
- **Active validators** are those that:
346
- - Are configured in the scenario's `validationStrategy`
347
- - Successfully ran (did not return score = -1)
279
+ **Active validators** are those configured in `validationStrategy` that successfully ran (score ≠ -1).
348
280
 
349
281
  **Pass/Fail Criteria**:
350
282
  - ✅ **PASS**: `overallScore ≥ 0.8` AND `violations.length === 0`
351
283
  - ❌ **FAIL**: `overallScore < 0.8` OR `violations.length > 0`
352
284
  - ⚠️ **SKIP**: An error occurred during evaluation (timeout, adapter failure, etc.)
353
285
 
354
- **Example**: If Pattern validator returns 0.9, LLM Judge returns 0.8, and ESLint is skipped:
355
- ```
356
- overallScore = (0.9 + 0.8) / 2 = 0.85
357
- ```
286
+ **Example**: Pattern (0.9) + LLM Judge (0.8) + ESLint (skipped) → `overallScore = (0.9 + 0.8) / 2 = 0.85`
358
287
 
359
288
  ### Summary Scoring
360
289
 
361
- After evaluating all scenarios, the framework calculates summary statistics:
290
+ After evaluating all scenarios, the framework calculates:
362
291
 
363
292
  ```javascript
364
293
  {
365
- total: 10, // Total number of scenarios
366
- passed: 7, // Scenarios with overallScore ≥ 0.8 and no violations
367
- failed: 2, // Scenarios evaluated but didn't pass
368
- skipped: 1, // Scenarios that encountered errors
369
- averageScore: 0.78, // Average of all scenario overallScores
294
+ total: 10, // Total scenarios
295
+ passed: 7, // overallScore ≥ 0.8 and no violations
296
+ failed: 2, // Evaluated but didn't pass
297
+ skipped: 1, // Encountered errors
298
+ averageScore: 0.78, // Average of all scenario scores
370
299
  totalViolations: 8 // Sum of violations across all scenarios
371
300
  }
372
301
  ```
373
302
 
374
- **Average Score Calculation**:
375
- ```
376
- averageScore = (sum of all scenario scores) / total scenarios
377
- ```
378
-
379
- This includes scores from failed scenarios, providing an overall quality metric across your entire test suite.
380
-
381
- **Transparency**: When baselines are saved, the per-validator breakdown is included in the baseline file, allowing you to trace exactly which validator contributed what score. See [Baseline File Format](#baseline-file-format) for details.
303
+ **Transparency**: Baselines include per-validator breakdowns. See [Baseline File Format](#baseline-file-format) for details.
382
304
 
383
305
  ### Score Interpretation
384
306
 
@@ -398,11 +320,6 @@ When baseline tracking is enabled, you'll see delta metrics:
398
320
  ↑ +18.5% improvement from baseline
399
321
  ```
400
322
 
401
- The percentage is calculated as:
402
- ```
403
- percentage = (currentScore - baselineScore) / baselineScore * 100
404
- ```
405
-
406
323
  ## Baseline Tracking
407
324
 
408
325
  Track evaluation results over time by enabling baseline management in your config file:
@@ -430,42 +347,21 @@ When `compareBaseline` is enabled, the report will show score deltas and whether
430
347
 
431
348
  ### Baseline File Format
432
349
 
433
- Each baseline file contains complete transparency into how the score was calculated:
350
+ Path: `.benchmarks/baselines/{adapter}/{model}/{scenario-id}.json`
351
+
352
+ Each baseline file provides complete score traceability:
434
353
 
435
354
  ```json
436
355
  {
437
356
  "scenarioId": "typescript-no-any",
438
357
  "score": 0.85,
439
358
  "violations": [
440
- {
441
- "type": "pattern",
442
- "message": "Forbidden pattern found: :\\s*any\\b",
443
- "file": "src/types.ts",
444
- "line": 12,
445
- "severity": "critical",
446
- "details": "Matched: \"metadata: any\""
447
- }
359
+ { "type": "pattern", "message": "Forbidden pattern found: :\\s*any\\b", "file": "src/types.ts", ... }
448
360
  ],
449
361
  "validationResults": [
450
- {
451
- "passed": false,
452
- "score": 0.37,
453
- "violations": [...],
454
- "validatorType": "pattern"
455
- },
456
- {
457
- "passed": true,
458
- "score": 1.0,
459
- "violations": [],
460
- "validatorType": "llm-judge"
461
- },
462
- {
463
- "passed": true,
464
- "score": -1,
465
- "violations": [],
466
- "validatorType": "eslint",
467
- "error": "ESLint not found"
468
- }
362
+ { "passed": false, "score": 0.37, "validatorType": "pattern", "violations": [...] },
363
+ { "passed": true, "score": 1.0, "validatorType": "llm-judge", "violations": [] },
364
+ { "passed": true, "score": -1, "validatorType": "eslint", "error": "ESLint not found" }
469
365
  ],
470
366
  "timestamp": "2026-01-23T22:28:32.216Z",
471
367
  "adapter": "copilot",
@@ -474,15 +370,11 @@ Each baseline file contains complete transparency into how the score was calcula
474
370
  ```
475
371
 
476
372
  **Key fields**:
477
- - `score`: Overall scenario score (average of active validators)
478
- - `violations`: All violations from all validators combined
479
- - `validationResults`: Per-validator breakdown showing:
480
- - Individual validator score
481
- - Whether that validator passed
482
- - Violations specific to that validator
483
- - Any errors that occurred (`score: -1` means skipped)
373
+ - `score` - Overall scenario score (average of active validators)
374
+ - `violations` - All violations from all validators combined
375
+ - `validationResults` - Per-validator breakdown (score, passed, violations, errors)
484
376
 
485
- **Score Traceability**: With this format, you can always trace the overall score back to individual validator scores. For example, if you see `score: 0.067`, you can look at `validationResults` to see which validators contributed what scores (e.g., Pattern: 0.135, LLM Judge: 0.00).
377
+ **Traceability**: You can always trace the overall score back to individual validator scores (e.g., `score: 0.067` check `validationResults` for Pattern: 0.135, LLM Judge: 0.00).
486
378
 
487
379
  ## CLI Commands
488
380
 
@@ -490,24 +382,26 @@ Each baseline file contains complete transparency into how the score was calcula
490
382
 
491
383
  Run benchmark evaluations.
492
384
 
493
- **Options:**
494
- - `--scenario <pattern>`: Filter by scenario ID (supports wildcards like `typescript-*`)
495
- - `--category <categories>`: Filter by category (comma-separated)
496
- - `--tag <tags>`: Filter by tags (comma-separated)
497
- - `--adapter <type>`: Adapter to use (`copilot` or `claude-code`)
498
- - `--model <model>`: LLM model for judge (default: `openai/gpt-4.1`)
499
- - `--threshold <number>`: Minimum passing score (default: `0.8`)
500
- - `--verbose`: Show detailed output
501
- - `--output <file>`: Export JSON report
502
- - `--workspace-root <path>`: Workspace root directory
385
+ | Option | Description | Default/Example |
386
+ |--------|-------------|-----------------|
387
+ | `--scenario <pattern>` | Filter by scenario ID (supports wildcards) | `typescript-*` |
388
+ | `--category <categories>` | Filter by category (comma-separated) | `typescript,react` |
389
+ | `--tag <tags>` | Filter by tags (comma-separated) | `safety,types` |
390
+ | `--adapter <type>` | Adapter to use | `copilot` or `claude-code` |
391
+ | `--model <model>` | LLM model for judge | `openai/gpt-5` |
392
+ | `--threshold <number>` | Minimum passing score | `0.8` |
393
+ | `--verbose` | Show detailed output | - |
394
+ | `--output <file>` | Export JSON report | `report.json` |
395
+ | `--workspace-root <path>` | Workspace root directory | Current directory |
503
396
 
504
397
  ### `list`
505
398
 
506
399
  List available test scenarios.
507
400
 
508
- **Options:**
509
- - `--category <categories>`: Filter by category
510
- - `--tag <tags>`: Filter by tags
401
+ | Option | Description |
402
+ |--------|-------------|
403
+ | `--category <categories>` | Filter by category (comma-separated) |
404
+ | `--tag <tags>` | Filter by tags (comma-separated) |
511
405
 
512
406
  ### `check`
513
407
 
@@ -517,8 +411,9 @@ Check if coding agent CLIs are available.
517
411
 
518
412
  Test LLM judge with a custom prompt (for debugging).
519
413
 
520
- **Options:**
521
- - `--model <model>`: LLM model to use
414
+ | Option | Description |
415
+ |--------|-------------|
416
+ | `--model <model>` | LLM model to use |
522
417
 
523
418
  ## Understanding Output
524
419
 
@@ -567,26 +462,15 @@ import { Evaluator, loadConfig } from 'coding-agent-benchmarks';
567
462
  async function runEvaluation() {
568
463
  const { config, scenarios } = await loadConfig();
569
464
 
570
- // Create evaluator
571
465
  const evaluator = new Evaluator({
572
466
  adapter: 'copilot',
573
- model: 'openai/gpt-4.1',
467
+ model: 'openai/gpt-5',
574
468
  verbose: true,
575
469
  saveBaseline: config.saveBaseline,
576
470
  compareBaseline: config.compareBaseline,
577
471
  });
578
472
 
579
- // Check adapter availability
580
- const available = await evaluator.checkAdapterAvailability();
581
- if (!available) {
582
- throw new Error('Adapter not available');
583
- }
584
-
585
- // Run evaluation
586
473
  const report = await evaluator.evaluate(scenarios);
587
-
588
- console.log(`Passed: ${report.summary.passed}/${report.summary.total}`);
589
- console.log(`Average score: ${report.summary.averageScore.toFixed(2)}`);
590
474
  }
591
475
 
592
476
  runEvaluation();
@@ -597,106 +481,62 @@ runEvaluation();
597
481
  Implement the `CodeValidator` interface:
598
482
 
599
483
  ```typescript
600
- import { CodeValidator, ValidationResult, TestScenario } from 'coding-agent-benchmarks';
484
+ import { CodeValidator, ValidationResult } from 'coding-agent-benchmarks';
601
485
 
602
486
  export class CustomValidator implements CodeValidator {
603
487
  public readonly type = 'custom';
604
488
 
605
- async validate(
606
- files: readonly string[],
607
- scenario: TestScenario
608
- ): Promise<ValidationResult> {
489
+ async validate(files: readonly string[], scenario: TestScenario): Promise<ValidationResult> {
609
490
  // Your validation logic here
610
- return {
611
- passed: true,
612
- score: 1.0,
613
- violations: [],
614
- validatorType: 'custom',
615
- };
491
+ return { passed: true, score: 1.0, violations: [], validatorType: 'custom' };
616
492
  }
617
493
  }
618
494
  ```
619
495
 
496
+ See CONTRIBUTING.md for complete examples.
497
+
620
498
  ## Creating Custom Adapters
621
499
 
622
500
  Implement the `CodeGenerationAdapter` interface:
623
501
 
624
502
  ```typescript
625
- import { CodeGenerationAdapter, AdapterType } from 'coding-agent-benchmarks';
503
+ import { CodeGenerationAdapter } from 'coding-agent-benchmarks';
626
504
 
627
505
  export class CustomAdapter implements CodeGenerationAdapter {
628
- public readonly type: AdapterType = 'copilot'; // or extend the type
629
-
630
- async checkAvailability(): Promise<boolean> {
631
- // Check if CLI is available
632
- return true;
633
- }
506
+ public readonly type = 'custom-adapter';
634
507
 
635
- async generate(
636
- prompt: string,
637
- contextFiles?: readonly string[],
638
- timeout?: number
639
- ): Promise<string[]> {
640
- // Generate code and return changed files
641
- return ['path/to/generated/file.ts'];
642
- }
508
+ async checkAvailability(): Promise<boolean> { /* ... */ }
509
+ async generate(prompt: string, contextFiles?: readonly string[], timeout?: number): Promise<string[]> { /* ... */ }
643
510
  }
644
511
  ```
645
512
 
513
+ See CONTRIBUTING.md for complete examples.
514
+
646
515
  ## GitHub Authentication (for LLM Judge)
647
516
 
648
- LLM-as-judge validation requires GitHub authentication to access GitHub Models API. There are **two easy options** - no OAuth registration needed!
517
+ LLM-as-judge validation requires GitHub authentication to access GitHub Models API. Choose one option:
649
518
 
650
519
  ### Option 1: Personal Access Token (Recommended)
651
520
 
652
- 1. Create token at https://github.com/settings/tokens
653
- 2. Click "Generate new token (classic)"
654
- 3. Give it a name (e.g., "coding-agent-benchmarks")
655
- 4. Select scope: **`models:read`**
656
- 5. Generate and copy the token
657
- 6. Set environment variable:
658
- ```bash
659
- export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
660
- ```
521
+ Create a token at https://github.com/settings/tokens with the **`models:read`** scope, then set it as an environment variable:
522
+
523
+ ```bash
524
+ export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
525
+ ```
661
526
 
662
527
  ### Option 2: GitHub CLI (Automatic)
663
528
 
664
- If you have GitHub CLI installed, tokens are auto-detected:
529
+ If GitHub CLI is installed, tokens are auto-detected:
665
530
 
666
531
  ```bash
667
- # Install GitHub CLI
668
- brew install gh # macOS
669
- # or download from https://cli.github.com
670
-
671
- # Authenticate (one time)
532
+ brew install gh # or download from https://cli.github.com
672
533
  gh auth login
673
-
674
534
  # Token will be used automatically - no GITHUB_TOKEN needed!
675
535
  ```
676
536
 
677
- ### Check Authentication Status
678
-
679
- ```bash
680
- npx coding-agent-benchmarks check
681
- ```
682
-
683
- Output:
684
- ```
685
- Checking adapter availability...
686
- GitHub Copilot CLI: ✓ Available
687
- Claude Code CLI: ✗ Not found
688
-
689
- Checking GitHub authentication...
690
- ✓ Using token from GitHub CLI (gh auth token)
691
- ```
692
-
693
- ## How It Works
537
+ ### Check Authentication
694
538
 
695
- 1. **Code Generation**: The adapter spawns a coding agent CLI with a prompt
696
- 2. **File Tracking**: Git is used to detect which files were created/modified
697
- 3. **Validation**: Multiple validators check the generated code
698
- 4. **Scoring**: Results are aggregated and compared against thresholds
699
- 5. **Reporting**: Results are displayed in terminal and optionally exported as JSON
539
+ Run `npx coding-agent-benchmarks check` to verify authentication status.
700
540
 
701
541
  ## Requirements
702
542
 
@@ -1 +1 @@
1
- {"version":3,"file":"claudeCodeCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,2BAA2B;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,oBAAqB,YAAW,qBAAqB;IAChE,SAAgB,IAAI,EAAG,aAAa,CAAU;IAC9C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAsGrB"}
1
+ {"version":3,"file":"claudeCodeCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,2BAA2B;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,oBAAqB,YAAW,qBAAqB;IAChE,SAAgB,IAAI,EAAG,aAAa,CAAU;IAC9C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAuGrB"}
@@ -100,14 +100,15 @@ class ClaudeCodeCLIAdapter {
100
100
  * @param timeout Timeout in milliseconds, or null for no timeout
101
101
  */
102
102
  async generate(prompt, contextFiles, timeout) {
103
+ const isAvailable = await this.checkAvailability();
104
+ if (!isAvailable) {
105
+ throw new Error("Claude Code CLI is not available. Please install it first: https://docs.anthropic.com/en/docs/build-with-claude/claude-code");
106
+ }
103
107
  const fullPrompt = this.buildPrompt(prompt, contextFiles);
104
- // Capture git status before generation
105
108
  const statusBefore = (0, gitUtils_1.getGitStatusPorcelain)(this.workspaceRoot);
106
- // Write prompt to temp file and pipe via stdin (matches @copilot-evals pattern)
107
109
  return new Promise((resolve, reject) => {
108
110
  const tempFile = path.join(this.workspaceRoot, ".claude-eval-prompt.txt");
109
111
  fs.writeFileSync(tempFile, fullPrompt, "utf8");
110
- // Cleanup function
111
112
  const cleanup = () => {
112
113
  try {
113
114
  if (fs.existsSync(tempFile)) {
@@ -118,7 +119,6 @@ class ClaudeCodeCLIAdapter {
118
119
  // Ignore cleanup errors
119
120
  }
120
121
  };
121
- // Register cleanup on process termination
122
122
  const cleanupOnExit = () => {
123
123
  cleanup();
124
124
  };
@@ -138,7 +138,6 @@ class ClaudeCodeCLIAdapter {
138
138
  proc.stderr?.on("data", (data) => {
139
139
  stderr += data.toString();
140
140
  });
141
- // Set timeout only if specified (null/undefined = no timeout)
142
141
  let timeoutHandle = null;
143
142
  if (timeout !== null && timeout !== undefined) {
144
143
  timeoutHandle = setTimeout(() => {
@@ -160,7 +159,6 @@ class ClaudeCodeCLIAdapter {
160
159
  reject(new Error(`Claude Code CLI exited with code ${code}\nStderr: ${stderr}`));
161
160
  return;
162
161
  }
163
- // Get files changed during generation (diff before/after)
164
162
  try {
165
163
  const statusAfter = (0, gitUtils_1.getGitStatusPorcelain)(this.workspaceRoot);
166
164
  const changedFiles = (0, gitUtils_1.getChangedFilesDiff)(statusBefore, statusAfter, this.workspaceRoot);
@@ -1 +1 @@
1
- {"version":3,"file":"claudeCodeCLI.js","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAsC;AACtC,uCAAyB;AACzB,2CAA6B;AAE7B,gDAA+E;AAC/E,4DAGiC;AAOjC,MAAM,aAAa,GAAG,QAAQ,CAAC;AAE/B,MAAa,oBAAoB;IAK/B,YAAY,OAAqC;QAJjC,SAAI,GAAG,aAAsB,CAAC;QAK5C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QAClE,IAAI,CAAC,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE;gBACtC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,WAAW,CACjB,MAAc,EACd,YAAgC;QAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;YACpE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;gBAClC,MAAM,cAAc,GAAG,QAAQ;qBAC5B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC;oBAC5D,OAAO,OAAO,GAAG,CAAC,IAAI,WAAW,GAAG,KAAK,GAAG,CAAC,OAAO,UAAU,CAAC;gBACjE,CAAC,CAAC;qBACD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,KAAK,CAAC,IAAI,CACR,0GAA0G,CAC3G,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,YAAgC,EAChC,OAAuB;QAEvB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAE1D,uCAAuC;QACvC,MAAM,YAAY,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE/D,gFAAgF;QAChF,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,yBAAyB,CAAC,CAAC;YAC1E,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;YAE/C,mBAAmB;YACnB,MAAM,OAAO,GAAG,GAAS,EAAE;gBACzB,IAAI,CAAC;oBACH,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC5B,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;oBAC1B,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,wBAAwB;gBAC1B,CAAC;YACH,CAAC,CAAC;YAEF,0CAA0C;YAC1C,MAAM,aAAa,GAAG,GAAS,EAAE;gBAC/B,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;YAEvC,MAAM,OAAO,GAAG,QAAQ,QAAQ,sBAAsB,IAAI,CAAC,KAAK,yIAAyI,CAAC;YAC1M,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,IAAI,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE;gBACxC,GAAG,EAAE,IAAI,CAAC,aAAa;gBACvB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YAEH,6DAA6D;YAC7D,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,8DAA8D;YAC9D,IAAI,aAAa,GAA0B,IAAI,CAAC;YAChD,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;oBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;oBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;oBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,mCAAmC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACpE,CAAC,EAAE,OAAO,CAAC,CAAC;YACd,CAAC;YAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBAED,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBAEjD,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;oBACf,MAAM,CACJ,IAAI,KAAK,CACP,oCAAoC,IAAI,aAAa,MAAM,EAAE,CAC9D,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBAED,0DAA0D;gBAC1D,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBAC9D,MAAM,YAAY,GAAG,IAAA,8BAAmB,EACtC,YAAY,EACZ,WAAW,EACX,IAAI,CAAC,aAAa,CACnB,CAAC;oBACF,OAAO,CAAC,YAAY,CAAC,CAAC;gBACxB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBACD,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAnLD,oDAmLC"}
1
+ {"version":3,"file":"claudeCodeCLI.js","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAsC;AACtC,uCAAyB;AACzB,2CAA6B;AAE7B,gDAA+E;AAC/E,4DAGiC;AAOjC,MAAM,aAAa,GAAG,QAAQ,CAAC;AAE/B,MAAa,oBAAoB;IAK/B,YAAY,OAAqC;QAJjC,SAAI,GAAG,aAAsB,CAAC;QAK5C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;QAClE,IAAI,CAAC,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,aAAa,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE;gBACtC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;OAEG;IACK,WAAW,CACjB,MAAc,EACd,YAAgC;QAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;YACpE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;gBAClC,MAAM,cAAc,GAAG,QAAQ;qBAC5B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC;oBAC5D,OAAO,OAAO,GAAG,CAAC,IAAI,WAAW,GAAG,KAAK,GAAG,CAAC,OAAO,UAAU,CAAC;gBACjE,CAAC,CAAC;qBACD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAChB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC3B,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnB,KAAK,CAAC,IAAI,CACR,0GAA0G,CAC3G,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,YAAgC,EAChC,OAAuB;QAEvB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CACb,6HAA6H,CAC9H,CAAC;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAE1D,MAAM,YAAY,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE/D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,yBAAyB,CAAC,CAAC;YAC1E,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;YAE/C,MAAM,OAAO,GAAG,GAAS,EAAE;gBACzB,IAAI,CAAC;oBACH,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAC5B,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;oBAC1B,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,wBAAwB;gBAC1B,CAAC;YACH,CAAC,CAAC;YAEF,MAAM,aAAa,GAAG,GAAS,EAAE;gBAC/B,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;YAEvC,MAAM,OAAO,GAAG,QAAQ,QAAQ,sBAAsB,IAAI,CAAC,KAAK,yIAAyI,CAAC;YAC1M,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,IAAI,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE;gBACxC,GAAG,EAAE,IAAI,CAAC,aAAa;gBACvB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YAEH,6DAA6D;YAC7D,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,aAAa,GAA0B,IAAI,CAAC;YAChD,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;oBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;oBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;oBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,mCAAmC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACpE,CAAC,EAAE,OAAO,CAAC,CAAC;YACd,CAAC;YAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBAED,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBAEjD,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;oBACf,MAAM,CACJ,IAAI,KAAK,CACP,oCAAoC,IAAI,aAAa,MAAM,EAAE,CAC9D,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBAED,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,IAAA,gCAAqB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBAC9D,MAAM,YAAY,GAAG,IAAA,8BAAmB,EACtC,YAAY,EACZ,WAAW,EACX,IAAI,CAAC,aAAa,CACnB,CAAC;oBACF,OAAO,CAAC,YAAY,CAAC,CAAC;gBACxB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBACD,OAAO,EAAE,CAAC;gBACV,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBAChD,OAAO,CAAC,cAAc,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;gBACjD,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AApLD,oDAoLC"}
@@ -11,9 +11,6 @@ export declare class CopilotCLIAdapter implements CodeGenerationAdapter {
11
11
  private workspaceRoot;
12
12
  private model;
13
13
  constructor(options?: CopilotCLIAdapterOptions);
14
- /**
15
- * Check if GitHub Copilot CLI is available
16
- */
17
14
  checkAvailability(): Promise<boolean>;
18
15
  /**
19
16
  * Get the model being used
@@ -1 +1 @@
1
- {"version":3,"file":"copilotCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/copilotCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,wBAAwB;IACvC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,iBAAkB,YAAW,qBAAqB;IAC7D,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,wBAAwB;IAK9C;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAwGrB"}
1
+ {"version":3,"file":"copilotCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/copilotCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAOjD,MAAM,WAAW,wBAAwB;IACvC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,qBAAa,iBAAkB,YAAW,qBAAqB;IAC7D,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAS;gBAEV,OAAO,CAAC,EAAE,wBAAwB;IAKxC,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;OAEG;IACH,QAAQ,IAAI,MAAM;IAIlB;;OAEG;IACH,OAAO,CAAC,WAAW;IA8BnB;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CAyGrB"}