coding-agent-benchmarks 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +474 -0
  3. package/dist/adapters/claudeCodeCLI.d.ts +19 -0
  4. package/dist/adapters/claudeCodeCLI.d.ts.map +1 -0
  5. package/dist/adapters/claudeCodeCLI.js +106 -0
  6. package/dist/adapters/claudeCodeCLI.js.map +1 -0
  7. package/dist/adapters/copilotCLI.d.ts +19 -0
  8. package/dist/adapters/copilotCLI.d.ts.map +1 -0
  9. package/dist/adapters/copilotCLI.js +104 -0
  10. package/dist/adapters/copilotCLI.js.map +1 -0
  11. package/dist/config/defaultScenarios.d.ts +6 -0
  12. package/dist/config/defaultScenarios.d.ts.map +1 -0
  13. package/dist/config/defaultScenarios.js +209 -0
  14. package/dist/config/defaultScenarios.js.map +1 -0
  15. package/dist/config/loader.d.ts +13 -0
  16. package/dist/config/loader.d.ts.map +1 -0
  17. package/dist/config/loader.js +153 -0
  18. package/dist/config/loader.js.map +1 -0
  19. package/dist/evaluator.d.ts +45 -0
  20. package/dist/evaluator.d.ts.map +1 -0
  21. package/dist/evaluator.js +226 -0
  22. package/dist/evaluator.js.map +1 -0
  23. package/dist/index.d.ts +13 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +38 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/runner.d.ts +6 -0
  28. package/dist/runner.d.ts.map +1 -0
  29. package/dist/runner.js +233 -0
  30. package/dist/runner.js.map +1 -0
  31. package/dist/types.d.ts +354 -0
  32. package/dist/types.d.ts.map +1 -0
  33. package/dist/types.js +6 -0
  34. package/dist/types.js.map +1 -0
  35. package/dist/utils/baselineManager.d.ts +53 -0
  36. package/dist/utils/baselineManager.d.ts.map +1 -0
  37. package/dist/utils/baselineManager.js +220 -0
  38. package/dist/utils/baselineManager.js.map +1 -0
  39. package/dist/utils/gitUtils.d.ts +39 -0
  40. package/dist/utils/gitUtils.d.ts.map +1 -0
  41. package/dist/utils/gitUtils.js +121 -0
  42. package/dist/utils/gitUtils.js.map +1 -0
  43. package/dist/utils/githubAuth.d.ts +22 -0
  44. package/dist/utils/githubAuth.d.ts.map +1 -0
  45. package/dist/utils/githubAuth.js +79 -0
  46. package/dist/utils/githubAuth.js.map +1 -0
  47. package/dist/utils/workspaceUtils.d.ts +32 -0
  48. package/dist/utils/workspaceUtils.d.ts.map +1 -0
  49. package/dist/utils/workspaceUtils.js +121 -0
  50. package/dist/utils/workspaceUtils.js.map +1 -0
  51. package/dist/validators/eslintValidator.d.ts +22 -0
  52. package/dist/validators/eslintValidator.d.ts.map +1 -0
  53. package/dist/validators/eslintValidator.js +217 -0
  54. package/dist/validators/eslintValidator.js.map +1 -0
  55. package/dist/validators/llmJudge.d.ts +28 -0
  56. package/dist/validators/llmJudge.d.ts.map +1 -0
  57. package/dist/validators/llmJudge.js +241 -0
  58. package/dist/validators/llmJudge.js.map +1 -0
  59. package/dist/validators/patternValidator.d.ts +27 -0
  60. package/dist/validators/patternValidator.d.ts.map +1 -0
  61. package/dist/validators/patternValidator.js +233 -0
  62. package/dist/validators/patternValidator.js.map +1 -0
  63. package/package.json +50 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 coding-agent-benchmarks contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,474 @@
1
+ # coding-agent-benchmarks
2
+
3
+ Open-source framework for evaluating AI coding assistants (like GitHub Copilot CLI or Claude Code) follow your coding standards. Here's the workflow:
4
+ 1. You give it a prompt → 2. AI generates code → 3. Library validates → 4. You get a score
5
+
6
+ ## Features
7
+
8
+ - **Multiple Adapters**: Built-in support for GitHub Copilot CLI and Claude Code CLI
9
+ - **Flexible Validation**: Pattern-based validation, LLM-as-judge semantic evaluation, and ESLint integration
10
+ - **Baseline Tracking**: Save and compare evaluation results over time
11
+ - **Extensible**: Easy to add custom scenarios, validators, and adapters
12
+ - **CLI & Programmatic API**: Use as a command-line tool or integrate into your workflow
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install --save-dev coding-agent-benchmarks
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ### 1. Create a Configuration File
23
+
24
+ Create a `benchmarks.config.js` file in your project root with your scenarios (see Configuration section below).
25
+
26
+ ### 2. Check Adapter Availability
27
+
28
+ ```bash
29
+ npx coding-agent-benchmarks check
30
+ ```
31
+
32
+ This will show which coding agent CLIs are installed on your system.
33
+
34
+ ### 3. List Your Scenarios
35
+
36
+ ```bash
37
+ npx coding-agent-benchmarks list
38
+ ```
39
+
40
+ ### 4. Run Evaluations
41
+
42
+ ```bash
43
+ # Run all scenarios with default adapter (Copilot)
44
+ npx coding-agent-benchmarks evaluate
45
+
46
+ # Run with Claude Code
47
+ npx coding-agent-benchmarks evaluate --adapter claude-code
48
+
49
+ # Run specific scenarios
50
+ npx coding-agent-benchmarks evaluate --scenario typescript-no-any
51
+ npx coding-agent-benchmarks evaluate --category typescript
52
+ npx coding-agent-benchmarks evaluate --tag best-practices
53
+
54
+ # Save as baseline for future comparison
55
+ npx coding-agent-benchmarks evaluate --save-baseline
56
+
57
+ # Compare with baseline
58
+ npx coding-agent-benchmarks evaluate --compare-baseline
59
+
60
+ # Export report as JSON
61
+ npx coding-agent-benchmarks evaluate --output report.json
62
+ ```
63
+
64
+ ## Configuration
65
+
66
+ **Configuration is required.** Create a `benchmarks.config.js` (or `.ts`) file in your project root with your test scenarios:
67
+
68
+ ```javascript
69
+ module.exports = {
70
+ // Default adapter to use
71
+ defaultAdapter: 'copilot',
72
+
73
+ // Default LLM model for judge
74
+ defaultModel: 'openai/gpt-4.1',
75
+
76
+ // Default timeout for code generation (milliseconds)
77
+ // Individual scenarios can override this
78
+ // Default: 120000 (2 minutes)
79
+ defaultTimeout: 180000, // 3 minutes
80
+
81
+ // Workspace root (auto-detected if not specified)
82
+ workspaceRoot: process.cwd(),
83
+
84
+ // Define your test scenarios
85
+ scenarios: [
86
+ {
87
+ id: 'typescript-no-any',
88
+ category: 'typescript',
89
+ severity: 'critical',
90
+ tags: ['typescript', 'types', 'safety'],
91
+ description: 'Ensure TypeScript interfaces use explicit types instead of "any"',
92
+ prompt: 'Create a TypeScript interface called User with fields: id (number), name (string), email (string), and metadata (object with key-value pairs)',
93
+ validationStrategy: {
94
+ patterns: {
95
+ forbiddenPatterns: [/:\s*any\b/],
96
+ requiredPatterns: [/interface\s+User/],
97
+ },
98
+ },
99
+ timeout: 120000,
100
+ },
101
+ {
102
+ id: 'react-no-inline-styles',
103
+ category: 'react',
104
+ severity: 'major',
105
+ tags: ['react', 'styling', 'best-practices'],
106
+ description: 'Forbid inline style objects in React components',
107
+ prompt: 'Create a React functional component called Button that accepts a "label" prop and renders a styled button. Use CSS classes instead of inline styles.',
108
+ validationStrategy: {
109
+ patterns: {
110
+ forbiddenPatterns: [/style\s*=\s*\{\{/, /style\s*=\s*\{[^}]*\}/],
111
+ requiredPatterns: [/className/],
112
+ },
113
+ },
114
+ },
115
+ {
116
+ id: 'async-error-handling',
117
+ category: 'general',
118
+ severity: 'critical',
119
+ tags: ['async', 'error-handling', 'robustness'],
120
+ description: 'Ensure async functions have proper error handling',
121
+ prompt: 'Create an async function called fetchUserData that takes a userId parameter, makes an HTTP request to fetch user data, and returns the user object. Handle errors appropriately.',
122
+ validationStrategy: {
123
+ patterns: {
124
+ requiredPatterns: [/async\s+function\s+fetchUserData|const\s+fetchUserData.*async/, /try|catch|\.catch\(/],
125
+ },
126
+ llmJudge: {
127
+ enabled: true,
128
+ judgmentPrompt: 'Evaluate the error handling in this async function. Does it use try/catch or .catch()? Are errors logged or re-thrown appropriately?',
129
+ },
130
+ },
131
+ },
132
+ ],
133
+ };
134
+ ```
135
+
136
+ ## Timeout Configuration
137
+
138
+ You can configure timeouts at three levels (in order of precedence):
139
+
140
+ 1. **Per-scenario timeout**: Set `timeout` on individual scenarios (highest priority)
141
+ 2. **Global default**: Set `defaultTimeout` in your config file
142
+ 3. **Built-in default**: 120000ms (2 minutes) if nothing else is specified
143
+
144
+ ```javascript
145
+ // In benchmarks.config.js
146
+ module.exports = {
147
+ // Global default applies to all scenarios
148
+ defaultTimeout: 180000, // 3 minutes
149
+
150
+ scenarios: [
151
+ {
152
+ id: 'quick-check',
153
+ prompt: '...',
154
+ timeout: 60000, // Override: 1 minute for this scenario
155
+ // ...
156
+ },
157
+ {
158
+ id: 'complex-task',
159
+ prompt: '...',
160
+ // Will use defaultTimeout (3 minutes)
161
+ // ...
162
+ },
163
+ ],
164
+ };
165
+ ```
166
+
167
+ **Why configure timeouts?**
168
+ - Complex code generation tasks may need more time
169
+ - Simple checks can complete faster with shorter timeouts
170
+ - Different AI models may have different response times
171
+
172
+ ## Validation Strategies
173
+
174
+ ### Pattern Validation
175
+
176
+ Regex-based validation for forbidden/required patterns:
177
+
178
+ ```javascript
179
+ validationStrategy: {
180
+ patterns: {
181
+ // Patterns that should NOT appear
182
+ forbiddenPatterns: [/:\s*any\b/, /console\.log/],
183
+
184
+ // Patterns that MUST appear
185
+ requiredPatterns: [/interface\s+User/],
186
+
187
+ // Import statements that should NOT be present
188
+ forbiddenImports: ['from "lodash"'],
189
+
190
+ // Import statements that MUST be present
191
+ requiredImports: ['import React'],
192
+
193
+ // File name patterns that should NOT be created
194
+ forbiddenFileNamePatterns: [/\.test\.js$/],
195
+
196
+ // File name patterns that MUST be created
197
+ requiredFileNamePatterns: [/\.tsx?$/],
198
+ },
199
+ }
200
+ ```
201
+
202
+ ### LLM-as-Judge
203
+
204
+ Semantic evaluation using AI:
205
+
206
+ ```javascript
207
+ validationStrategy: {
208
+ llmJudge: {
209
+ enabled: true,
210
+ model: 'openai/gpt-4.1', // or 'gpt-4o'
211
+ judgmentPrompt: `Evaluate if the code follows best practices...`,
212
+ },
213
+ }
214
+ ```
215
+
216
+ The LLM judge requires a `GITHUB_TOKEN` environment variable with access to GitHub Models API.
217
+
218
+ ### ESLint Integration
219
+
220
+ Run ESLint on generated code:
221
+
222
+ ```javascript
223
+ validationStrategy: {
224
+ eslint: {
225
+ enabled: true,
226
+ configPath: '.eslintrc.js', // optional
227
+ },
228
+ }
229
+ ```
230
+
231
+ ## Scoring
232
+
233
+ Each scenario receives a score from 0.0 to 1.0:
234
+
235
+ - **1.0**: Perfect, no violations
236
+ - **0.8-0.99**: Minor issues
237
+ - **0.5-0.79**: Moderate issues
238
+ - **0.0-0.49**: Major issues or failed
239
+
240
+ Violations are weighted by severity:
241
+ - **Critical**: 1.0 weight
242
+ - **Major**: 0.7 weight
243
+ - **Minor**: 0.3 weight
244
+
245
+ ## Baseline Tracking
246
+
247
+ Save current results as a baseline:
248
+
249
+ ```bash
250
+ npx coding-agent-benchmarks evaluate --save-baseline
251
+ ```
252
+
253
+ Baselines are stored in `.benchmarks/baselines/{adapter}/{model}/{scenario-id}.json`
254
+
255
+ Compare future runs against the baseline:
256
+
257
+ ```bash
258
+ npx coding-agent-benchmarks evaluate --compare-baseline
259
+ ```
260
+
261
+ The report will show score deltas and whether results improved or regressed.
262
+
263
+ ## CLI Commands
264
+
265
+ ### `evaluate`
266
+
267
+ Run benchmark evaluations.
268
+
269
+ **Options:**
270
+ - `--scenario <pattern>`: Filter by scenario ID (supports wildcards like `typescript-*`)
271
+ - `--category <categories>`: Filter by category (comma-separated)
272
+ - `--tag <tags>`: Filter by tags (comma-separated)
273
+ - `--adapter <type>`: Adapter to use (`copilot` or `claude-code`)
274
+ - `--model <model>`: LLM model for judge (default: `openai/gpt-4.1`)
275
+ - `--threshold <number>`: Minimum passing score (default: `0.8`)
276
+ - `--verbose`: Show detailed output
277
+ - `--output <file>`: Export JSON report
278
+ - `--save-baseline`: Save results as baseline
279
+ - `--compare-baseline`: Compare with baseline
280
+ - `--workspace-root <path>`: Workspace root directory
281
+
282
+ ### `list`
283
+
284
+ List available test scenarios.
285
+
286
+ **Options:**
287
+ - `--category <categories>`: Filter by category
288
+ - `--tag <tags>`: Filter by tags
289
+
290
+ ### `check`
291
+
292
+ Check if coding agent CLIs are available.
293
+
294
+ ### `test-llm`
295
+
296
+ Test LLM judge with a custom prompt (for debugging).
297
+
298
+ **Options:**
299
+ - `--model <model>`: LLM model to use
300
+
301
+ ## Programmatic Usage
302
+
303
+ You can also use the framework programmatically:
304
+
305
+ ```typescript
306
+ import { Evaluator, loadConfig } from 'coding-agent-benchmarks';
307
+
308
+ async function runEvaluation() {
309
+ // Load configuration
310
+ const { scenarios } = await loadConfig();
311
+
312
+ // Create evaluator
313
+ const evaluator = new Evaluator({
314
+ adapter: 'copilot',
315
+ model: 'openai/gpt-4.1',
316
+ verbose: true,
317
+ });
318
+
319
+ // Check adapter availability
320
+ const available = await evaluator.checkAdapterAvailability();
321
+ if (!available) {
322
+ throw new Error('Adapter not available');
323
+ }
324
+
325
+ // Run evaluation
326
+ const report = await evaluator.evaluate(scenarios);
327
+
328
+ console.log(`Passed: ${report.summary.passed}/${report.summary.total}`);
329
+ console.log(`Average score: ${report.summary.averageScore.toFixed(2)}`);
330
+ }
331
+
332
+ runEvaluation();
333
+ ```
334
+
335
+ ## Creating Custom Validators
336
+
337
+ Implement the `CodeValidator` interface:
338
+
339
+ ```typescript
340
+ import { CodeValidator, ValidationResult, TestScenario } from 'coding-agent-benchmarks';
341
+
342
+ export class CustomValidator implements CodeValidator {
343
+ public readonly type = 'custom';
344
+
345
+ async validate(
346
+ files: readonly string[],
347
+ scenario: TestScenario
348
+ ): Promise<ValidationResult> {
349
+ // Your validation logic here
350
+ return {
351
+ passed: true,
352
+ score: 1.0,
353
+ violations: [],
354
+ validatorType: 'custom',
355
+ };
356
+ }
357
+ }
358
+ ```
359
+
360
+ ## Creating Custom Adapters
361
+
362
+ Implement the `CodeGenerationAdapter` interface:
363
+
364
+ ```typescript
365
+ import { CodeGenerationAdapter, AdapterType } from 'coding-agent-benchmarks';
366
+
367
+ export class CustomAdapter implements CodeGenerationAdapter {
368
+ public readonly type: AdapterType = 'copilot'; // or extend the type
369
+
370
+ async checkAvailability(): Promise<boolean> {
371
+ // Check if CLI is available
372
+ return true;
373
+ }
374
+
375
+ async generate(
376
+ prompt: string,
377
+ contextFiles?: readonly string[],
378
+ timeout?: number
379
+ ): Promise<string[]> {
380
+ // Generate code and return changed files
381
+ return ['path/to/generated/file.ts'];
382
+ }
383
+ }
384
+ ```
385
+
386
+ ## GitHub Authentication (for LLM Judge)
387
+
388
+ LLM-as-judge validation requires GitHub authentication to access GitHub Models API. There are **two easy options** - no OAuth registration needed!
389
+
390
+ ### Option 1: Personal Access Token (Recommended)
391
+
392
+ 1. Create token at https://github.com/settings/tokens
393
+ 2. Click "Generate new token (classic)"
394
+ 3. Give it a name (e.g., "coding-agent-benchmarks")
395
+ 4. Select scope: **`models:read`**
396
+ 5. Generate and copy the token
397
+ 6. Set environment variable:
398
+ ```bash
399
+ export GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
400
+ ```
401
+
402
+ ### Option 2: GitHub CLI (Automatic)
403
+
404
+ If you have GitHub CLI installed, tokens are auto-detected:
405
+
406
+ ```bash
407
+ # Install GitHub CLI
408
+ brew install gh # macOS
409
+ # or download from https://cli.github.com
410
+
411
+ # Authenticate (one time)
412
+ gh auth login
413
+
414
+ # Token will be used automatically - no GITHUB_TOKEN needed!
415
+ ```
416
+
417
+ ### Check Authentication Status
418
+
419
+ ```bash
420
+ npx coding-agent-benchmarks check
421
+ ```
422
+
423
+ Output:
424
+ ```
425
+ Checking adapter availability...
426
+ GitHub Copilot CLI: ✓ Available
427
+ Claude Code CLI: ✗ Not found
428
+
429
+ Checking GitHub authentication...
430
+ ✓ Using token from GitHub CLI (gh auth token)
431
+ ```
432
+
433
+ ## How It Works
434
+
435
+ 1. **Code Generation**: The adapter spawns a coding agent CLI with a prompt
436
+ 2. **File Tracking**: Git is used to detect which files were created/modified
437
+ 3. **Validation**: Multiple validators check the generated code
438
+ 4. **Scoring**: Results are aggregated and compared against thresholds
439
+ 5. **Reporting**: Results are displayed in terminal and optionally exported as JSON
440
+
441
+ ## Requirements
442
+
443
+ - Node.js >= 18.0.0
444
+ - Git repository (for file change tracking)
445
+ - At least one coding agent CLI installed (Copilot or Claude Code)
446
+ - (Optional) `GITHUB_TOKEN` for LLM judge validation
447
+
448
+ ## Contributing
449
+
450
+ Contributions are welcome! Please:
451
+
452
+ 1. Fork the repository
453
+ 2. Create a feature branch
454
+ 3. Make your changes
455
+ 4. Add tests if applicable
456
+ 5. Submit a pull request
457
+
458
+ ## License
459
+
460
+ MIT License - see LICENSE file for details
461
+
462
+ ## Acknowledgments
463
+
464
+ Inspired by the need for systematic evaluation of AI coding assistants. Built to help teams ensure their AI tools follow coding standards and best practices.
465
+
466
+ ## Support
467
+
468
+ - Report issues: [GitHub Issues](https://github.com/yourusername/coding-agent-benchmarks/issues)
469
+ - Documentation: This README and inline JSDoc comments
470
+ - Examples: See `examples/` directory (coming soon)
471
+
472
+ ---
473
+
474
+ **Happy benchmarking!** 🚀
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Claude Code CLI Adapter
3
+ */
4
+ import { CodeGenerationAdapter } from '../types';
5
+ export declare class ClaudeCodeCLIAdapter implements CodeGenerationAdapter {
6
+ readonly type: "claude-code";
7
+ private workspaceRoot;
8
+ constructor(workspaceRoot?: string);
9
+ /**
10
+ * Check if Claude Code CLI is available
11
+ */
12
+ checkAvailability(): Promise<boolean>;
13
+ /**
14
+ * Generate code using Claude Code CLI
15
+ * @param timeout Timeout in milliseconds, or null for no timeout
16
+ */
17
+ generate(prompt: string, contextFiles?: readonly string[], timeout?: number | null): Promise<string[]>;
18
+ }
19
+ //# sourceMappingURL=claudeCodeCLI.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claudeCodeCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,qBAAa,oBAAqB,YAAW,qBAAqB;IAChE,SAAgB,IAAI,EAAG,aAAa,CAAU;IAC9C,OAAO,CAAC,aAAa,CAAS;gBAElB,aAAa,CAAC,EAAE,MAAM;IAIlC;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CA8ErB"}
@@ -0,0 +1,106 @@
1
+ "use strict";
2
+ /**
3
+ * Claude Code CLI Adapter
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.ClaudeCodeCLIAdapter = void 0;
7
+ const child_process_1 = require("child_process");
8
+ const gitUtils_1 = require("../utils/gitUtils");
9
+ const workspaceUtils_1 = require("../utils/workspaceUtils");
10
+ class ClaudeCodeCLIAdapter {
11
+ constructor(workspaceRoot) {
12
+ this.type = 'claude-code';
13
+ this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
14
+ }
15
+ /**
16
+ * Check if Claude Code CLI is available
17
+ */
18
+ async checkAvailability() {
19
+ return new Promise((resolve) => {
20
+ const proc = (0, child_process_1.spawn)('which', ['claude'], {
21
+ stdio: 'pipe',
22
+ });
23
+ proc.on('close', (code) => {
24
+ resolve(code === 0);
25
+ });
26
+ proc.on('error', () => {
27
+ resolve(false);
28
+ });
29
+ });
30
+ }
31
+ /**
32
+ * Generate code using Claude Code CLI
33
+ * @param timeout Timeout in milliseconds, or null for no timeout
34
+ */
35
+ async generate(prompt, contextFiles, timeout) {
36
+ // Reset workspace to clean state before generation
37
+ try {
38
+ (0, gitUtils_1.resetGitWorkingDirectory)(this.workspaceRoot);
39
+ }
40
+ catch (error) {
41
+ console.warn('Warning: Could not reset git working directory:', error);
42
+ }
43
+ // Build the full prompt with context
44
+ let fullPrompt = prompt;
45
+ if (contextFiles && contextFiles.length > 0) {
46
+ const contexts = (0, workspaceUtils_1.readContextFiles)(this.workspaceRoot, contextFiles);
47
+ if (contexts.length > 0) {
48
+ const contextSection = contexts
49
+ .map(ctx => `\n\n### Context from ${ctx.path}:\n\`\`\`\n${ctx.content}\n\`\`\``)
50
+ .join('\n');
51
+ fullPrompt = `${prompt}${contextSection}`;
52
+ }
53
+ }
54
+ // Spawn the claude CLI process
55
+ // Note: Claude Code CLI may require different flags or approach
56
+ // This is a basic implementation that may need adjustment
57
+ return new Promise((resolve, reject) => {
58
+ const proc = (0, child_process_1.spawn)('claude', ['--non-interactive', fullPrompt], {
59
+ cwd: this.workspaceRoot,
60
+ stdio: ['pipe', 'pipe', 'pipe'],
61
+ shell: true,
62
+ });
63
+ let stdout = '';
64
+ let stderr = '';
65
+ proc.stdout?.on('data', (data) => {
66
+ stdout += data.toString();
67
+ });
68
+ proc.stderr?.on('data', (data) => {
69
+ stderr += data.toString();
70
+ });
71
+ // Set timeout only if specified (null/undefined = no timeout)
72
+ let timeoutHandle = null;
73
+ if (timeout !== null && timeout !== undefined) {
74
+ timeoutHandle = setTimeout(() => {
75
+ proc.kill('SIGTERM');
76
+ reject(new Error(`Claude Code CLI timed out after ${timeout}ms`));
77
+ }, timeout);
78
+ }
79
+ proc.on('close', (code) => {
80
+ if (timeoutHandle) {
81
+ clearTimeout(timeoutHandle);
82
+ }
83
+ if (code !== 0) {
84
+ reject(new Error(`Claude Code CLI exited with code ${code}\nStderr: ${stderr}`));
85
+ return;
86
+ }
87
+ // Get the list of changed files
88
+ try {
89
+ const changedFiles = (0, gitUtils_1.getChangedFiles)(this.workspaceRoot);
90
+ resolve(changedFiles);
91
+ }
92
+ catch (error) {
93
+ reject(new Error(`Failed to get changed files: ${error}`));
94
+ }
95
+ });
96
+ proc.on('error', (error) => {
97
+ if (timeoutHandle) {
98
+ clearTimeout(timeoutHandle);
99
+ }
100
+ reject(new Error(`Failed to spawn Claude Code CLI: ${error}`));
101
+ });
102
+ });
103
+ }
104
+ }
105
+ exports.ClaudeCodeCLIAdapter = ClaudeCodeCLIAdapter;
106
+ //# sourceMappingURL=claudeCodeCLI.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claudeCodeCLI.js","sourceRoot":"","sources":["../../src/adapters/claudeCodeCLI.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,iDAAsC;AAEtC,gDAA8E;AAC9E,4DAAiF;AAEjF,MAAa,oBAAoB;IAI/B,YAAY,aAAsB;QAHlB,SAAI,GAAG,aAAsB,CAAC;QAI5C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE;gBACtC,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACpB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,MAAc,EACd,YAAgC,EAChC,OAAuB;QAEvB,mDAAmD;QACnD,IAAI,CAAC;YACH,IAAA,mCAAwB,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC/C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,iDAAiD,EAAE,KAAK,CAAC,CAAC;QACzE,CAAC;QAED,qCAAqC;QACrC,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;YACpE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,MAAM,cAAc,GAAG,QAAQ;qBAC5B,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,wBAAwB,GAAG,CAAC,IAAI,cAAc,GAAG,CAAC,OAAO,UAAU,CAAC;qBAC/E,IAAI,CAAC,IAAI,CAAC,CAAC;gBACd,UAAU,GAAG,GAAG,MAAM,GAAG,cAAc,EAAE,CAAC;YAC5C,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,gEAAgE;QAChE,0DAA0D;QAC1D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,IAAI,GAAG,IAAA,qBAAK,EAAC,QAAQ,EAAE,CAAC,mBAAmB,EAAE,UAAU,CAAC,EAAE;gBAC9D,GAAG,EAAE,IAAI,CAAC,aAAa;gBACvB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;gBAC/B,KAAK,EAAE,IAAI;aACZ,CAAC,CAAC;YAEH,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,8DAA8D;YAC9D,IAAI,aAAa,GAA0B,IAAI,CAAC;YAChD,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,MAAM,CAAC,IAAI,KAAK,CAAC,mCAAmC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACpE,CAAC,EAAE,OAAO,CAAC,CAAC;YACd,CAAC;YAED,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBAED,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,IAAI,aAAa,MAAM,EAAE,CAAC,CAAC,CAAC;oBACjF,OAAO;gBACT,CAAC;gBAED,gCAAgC;gBAChC,IAAI,CAAC;oBACH,MAAM,YAAY,GAAG,IAAA,0BAAe,EAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBACzD,OAAO,CAAC,YAAY,CAAC,CAAC;gBACxB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzB,IAAI,aAAa,EAAE,CAAC;oBAClB,YAAY,CAAC,aAAa,CAAC,CAAC;gBAC9B,CAAC;gBACD,MAAM,CAAC,IAAI,KAAK,CAAC,oCAAoC,KAAK,EAAE,CAAC,CAAC,CAAC;YACjE,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAjHD,oDAiHC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * GitHub Copilot CLI Adapter
3
+ */
4
+ import { CodeGenerationAdapter } from '../types';
5
+ export declare class CopilotCLIAdapter implements CodeGenerationAdapter {
6
+ readonly type: "copilot";
7
+ private workspaceRoot;
8
+ constructor(workspaceRoot?: string);
9
+ /**
10
+ * Check if GitHub Copilot CLI is available
11
+ */
12
+ checkAvailability(): Promise<boolean>;
13
+ /**
14
+ * Generate code using GitHub Copilot CLI
15
+ * @param timeout Timeout in milliseconds, or null for no timeout
16
+ */
17
+ generate(prompt: string, contextFiles?: readonly string[], timeout?: number | null): Promise<string[]>;
18
+ }
19
+ //# sourceMappingURL=copilotCLI.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"copilotCLI.d.ts","sourceRoot":"","sources":["../../src/adapters/copilotCLI.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,qBAAa,iBAAkB,YAAW,qBAAqB;IAC7D,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;gBAElB,aAAa,CAAC,EAAE,MAAM;IAIlC;;OAEG;IACG,iBAAiB,IAAI,OAAO,CAAC,OAAO,CAAC;IAgB3C;;;OAGG;IACG,QAAQ,CACZ,MAAM,EAAE,MAAM,EACd,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,EAChC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC;CA4ErB"}