codeprobe 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. codeprobe/__init__.py +3 -0
  2. codeprobe/__main__.py +5 -0
  3. codeprobe/adapters/__init__.py +19 -0
  4. codeprobe/adapters/_base.py +192 -0
  5. codeprobe/adapters/aider.py +79 -0
  6. codeprobe/adapters/claude.py +109 -0
  7. codeprobe/adapters/codex.py +135 -0
  8. codeprobe/adapters/copilot.py +104 -0
  9. codeprobe/adapters/openai_compat.py +140 -0
  10. codeprobe/adapters/protocol.py +113 -0
  11. codeprobe/adapters/session.py +304 -0
  12. codeprobe/adapters/telemetry.py +352 -0
  13. codeprobe/analysis/__init__.py +46 -0
  14. codeprobe/analysis/ranking.py +106 -0
  15. codeprobe/analysis/report.py +558 -0
  16. codeprobe/analysis/stats.py +455 -0
  17. codeprobe/api.py +182 -0
  18. codeprobe/assess/__init__.py +27 -0
  19. codeprobe/assess/heuristics.py +544 -0
  20. codeprobe/cli/__init__.py +383 -0
  21. codeprobe/cli/assess_cmd.py +41 -0
  22. codeprobe/cli/experiment_cmd.py +417 -0
  23. codeprobe/cli/init_cmd.py +167 -0
  24. codeprobe/cli/interpret_cmd.py +75 -0
  25. codeprobe/cli/mine_cmd.py +948 -0
  26. codeprobe/cli/probe_cmd.py +116 -0
  27. codeprobe/cli/ratings_cmd.py +149 -0
  28. codeprobe/cli/run_cmd.py +175 -0
  29. codeprobe/cli/scaffold_cmd.py +97 -0
  30. codeprobe/cli/wizard.py +122 -0
  31. codeprobe/cli/yaml_writer.py +59 -0
  32. codeprobe/config/__init__.py +5 -0
  33. codeprobe/config/loader.py +234 -0
  34. codeprobe/contrib/__init__.py +17 -0
  35. codeprobe/contrib/_shared.py +25 -0
  36. codeprobe/contrib/adaptive.py +37 -0
  37. codeprobe/contrib/counterfactual.py +54 -0
  38. codeprobe/contrib/debate.py +73 -0
  39. codeprobe/contrib/decision_tree.py +51 -0
  40. codeprobe/contrib/elo.py +53 -0
  41. codeprobe/contrib/fingerprint.py +39 -0
  42. codeprobe/contrib/mutation.py +66 -0
  43. codeprobe/contrib/pareto.py +64 -0
  44. codeprobe/contrib/sprt.py +61 -0
  45. codeprobe/contrib/tournament.py +54 -0
  46. codeprobe/core/__init__.py +26 -0
  47. codeprobe/core/checkpoint.py +267 -0
  48. codeprobe/core/executor.py +639 -0
  49. codeprobe/core/experiment.py +289 -0
  50. codeprobe/core/isolation.py +128 -0
  51. codeprobe/core/llm.py +385 -0
  52. codeprobe/core/preamble.py +130 -0
  53. codeprobe/core/registry.py +82 -0
  54. codeprobe/core/sandbox.py +35 -0
  55. codeprobe/core/scoring.py +436 -0
  56. codeprobe/loaders/__init__.py +143 -0
  57. codeprobe/mining/__init__.py +27 -0
  58. codeprobe/mining/_lang.py +42 -0
  59. codeprobe/mining/curator.py +279 -0
  60. codeprobe/mining/curator_backends.py +441 -0
  61. codeprobe/mining/curator_tiers.py +276 -0
  62. codeprobe/mining/extractor.py +795 -0
  63. codeprobe/mining/org_scale.py +612 -0
  64. codeprobe/mining/org_scale_families.py +255 -0
  65. codeprobe/mining/org_scale_oracle.py +332 -0
  66. codeprobe/mining/org_scale_scanner.py +689 -0
  67. codeprobe/mining/org_scale_validate.py +152 -0
  68. codeprobe/mining/sources.py +118 -0
  69. codeprobe/mining/writer.py +436 -0
  70. codeprobe/models/__init__.py +16 -0
  71. codeprobe/models/evalrc.py +18 -0
  72. codeprobe/models/experiment.py +58 -0
  73. codeprobe/models/preamble.py +28 -0
  74. codeprobe/models/task.py +72 -0
  75. codeprobe/preambles/__init__.py +52 -0
  76. codeprobe/preambles/sourcegraph.md +32 -0
  77. codeprobe/probe/__init__.py +1 -0
  78. codeprobe/probe/generator.py +623 -0
  79. codeprobe/probe/writer.py +178 -0
  80. codeprobe/ratings/__init__.py +1 -0
  81. codeprobe/ratings/collector.py +263 -0
  82. codeprobe/scaffold/__init__.py +1 -0
  83. codeprobe/scaffold/writer.py +180 -0
  84. codeprobe/templates/__init__.py +1 -0
  85. codeprobe/templates/evalrc-mcp-comparison.yaml +28 -0
  86. codeprobe/templates/evalrc-model-comparison.yaml +18 -0
  87. codeprobe/templates/evalrc-prompt-comparison.yaml +18 -0
  88. codeprobe-0.1.0.dist-info/METADATA +131 -0
  89. codeprobe-0.1.0.dist-info/RECORD +93 -0
  90. codeprobe-0.1.0.dist-info/WHEEL +5 -0
  91. codeprobe-0.1.0.dist-info/entry_points.txt +14 -0
  92. codeprobe-0.1.0.dist-info/licenses/LICENSE +190 -0
  93. codeprobe-0.1.0.dist-info/top_level.txt +1 -0
codeprobe/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """codeprobe — Benchmark AI coding agents against your own codebase."""
2
+
3
+ __version__ = "0.1.0a3"
codeprobe/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow ``python -m codeprobe`` and ``pipx run codeprobe``."""
2
+
3
+ from codeprobe.cli import main
4
+
5
+ main()
@@ -0,0 +1,19 @@
1
+ """Agent adapters — Protocol + built-in implementations."""
2
+
3
+ from codeprobe.adapters.protocol import (
4
+ AdapterError,
5
+ AdapterExecutionError,
6
+ AdapterSetupError,
7
+ AgentAdapter,
8
+ AgentConfig,
9
+ AgentOutput,
10
+ )
11
+
12
+ __all__ = [
13
+ "AdapterError",
14
+ "AdapterExecutionError",
15
+ "AdapterSetupError",
16
+ "AgentAdapter",
17
+ "AgentConfig",
18
+ "AgentOutput",
19
+ ]
@@ -0,0 +1,192 @@
1
+ """Shared base for agent adapters — eliminates duplicated run/preflight logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import shutil
8
+ import subprocess
9
+ import tempfile
10
+ import time
11
+ from abc import abstractmethod
12
+
13
+ from codeprobe.adapters.protocol import (
14
+ AdapterSetupError,
15
+ AgentConfig,
16
+ AgentOutput,
17
+ )
18
+
19
+ # Only these env vars are forwarded to agent subprocesses.
20
+ # Keeps secrets (OPENAI_API_KEY, AWS_SECRET_*, etc.) out of the child
21
+ # unless explicitly listed here.
22
+ _ADAPTER_ENV_WHITELIST: frozenset[str] = frozenset(
23
+ {
24
+ # System essentials
25
+ "PATH",
26
+ "HOME",
27
+ "LANG",
28
+ "TERM",
29
+ "TMPDIR",
30
+ "LC_ALL",
31
+ # Codeprobe sandbox signal (eval harness sets this)
32
+ "CODEPROBE_SANDBOX",
33
+ # Agent-specific API keys (required by the adapters)
34
+ "ANTHROPIC_API_KEY",
35
+ "CLAUDE_CONFIG_DIR",
36
+ "GITHUB_TOKEN",
37
+ "OPENAI_API_KEY",
38
+ "COPILOT_API_KEY",
39
+ # Python toolchain
40
+ "VIRTUAL_ENV",
41
+ "PYTHONPATH",
42
+ # Node/npm (for copilot CLI)
43
+ "NODE_PATH",
44
+ "NPM_CONFIG_PREFIX",
45
+ # Go toolchain
46
+ "GOPATH",
47
+ "GOROOT",
48
+ # Rust toolchain
49
+ "CARGO_HOME",
50
+ "RUSTUP_HOME",
51
+ }
52
+ )
53
+
54
+
55
+ def _adapter_safe_env(extra: dict[str, str] | None = None) -> dict[str, str]:
56
+ """Build a filtered environment for agent subprocesses.
57
+
58
+ Only passes whitelisted vars — prevents leaking secrets from parent env.
59
+ """
60
+ env = {k: v for k, v in os.environ.items() if k in _ADAPTER_ENV_WHITELIST}
61
+ if extra:
62
+ env.update(extra)
63
+ return env
64
+
65
+
66
+ class BaseAdapter:
67
+ """Base class for CLI-based agent adapters.
68
+
69
+ Subclasses set ``_binary_name`` and ``_install_hint``, then implement
70
+ ``build_command``. The Protocol requires ``name``, ``preflight``, and
71
+ ``run``; ``find_binary`` and ``build_command`` are BaseAdapter helpers.
72
+ """
73
+
74
+ _binary_name: str
75
+ _install_hint: str
76
+
77
+ @property
78
+ def name(self) -> str:
79
+ return self._binary_name
80
+
81
+ def find_binary(self) -> str | None:
82
+ return shutil.which(self._binary_name)
83
+
84
+ def _require_binary(self) -> str:
85
+ """Return binary path or raise AdapterSetupError."""
86
+ binary = self.find_binary()
87
+ if binary is None:
88
+ raise AdapterSetupError(f"{self._binary_name} CLI not found")
89
+ return binary
90
+
91
+ def preflight(self, config: AgentConfig) -> list[str]:
92
+ issues: list[str] = []
93
+ if self.find_binary() is None:
94
+ issues.append(self._install_hint)
95
+ return issues
96
+
97
+ def isolate_session(self, slot_id: int) -> dict[str, str]:
98
+ """Default: no session isolation env overrides."""
99
+ return {}
100
+
101
+ @abstractmethod
102
+ def build_command(self, prompt: str, config: AgentConfig) -> list[str]: ...
103
+
104
+ def parse_output(
105
+ self, result: subprocess.CompletedProcess[str], duration: float
106
+ ) -> AgentOutput:
107
+ """Convert subprocess result to AgentOutput.
108
+
109
+ Subclasses override to extract tokens, cost, etc. from agent output.
110
+ """
111
+ return AgentOutput(
112
+ stdout=result.stdout,
113
+ stderr=result.stderr or None,
114
+ exit_code=result.returncode,
115
+ duration_seconds=duration,
116
+ )
117
+
118
+ def _write_mcp_config(self, config: AgentConfig) -> str | None:
119
+ """Write MCP config to a temp file if present. Returns path or None.
120
+
121
+ Expands ``${VAR}`` references in string values from the environment
122
+ so experiment.json can reference secrets without hardcoding them.
123
+ """
124
+ if not config.mcp_config:
125
+ return None
126
+ expanded = json.loads(os.path.expandvars(json.dumps(config.mcp_config)))
127
+ tmp = tempfile.NamedTemporaryFile(
128
+ mode="w", suffix=".json", prefix="codeprobe-mcp-", delete=False
129
+ )
130
+ json.dump(expanded, tmp)
131
+ tmp.close()
132
+ return tmp.name
133
+
134
+ def run(
135
+ self,
136
+ prompt: str,
137
+ config: AgentConfig,
138
+ session_env: dict[str, str] | None = None,
139
+ ) -> AgentOutput:
140
+ cmd = self.build_command(prompt, config)
141
+ mcp_tmpfile: str | None = None
142
+
143
+ # Find and track MCP temp file for cleanup
144
+ for flag in ("--mcp-config", "--additional-mcp-config"):
145
+ if flag in cmd:
146
+ idx = cmd.index(flag)
147
+ if idx + 1 < len(cmd):
148
+ path = cmd[idx + 1]
149
+ if path.startswith(tempfile.gettempdir()):
150
+ mcp_tmpfile = path
151
+
152
+ start = time.monotonic()
153
+
154
+ try:
155
+ result = subprocess.run(
156
+ cmd,
157
+ capture_output=True,
158
+ text=True,
159
+ timeout=config.timeout_seconds,
160
+ cwd=config.cwd,
161
+ env=_adapter_safe_env(session_env),
162
+ )
163
+ except subprocess.TimeoutExpired as exc:
164
+ duration = time.monotonic() - start
165
+ return AgentOutput(
166
+ stdout=exc.stdout if isinstance(exc.stdout, str) else "",
167
+ stderr=exc.stderr if isinstance(exc.stderr, str) else None,
168
+ exit_code=-1,
169
+ duration_seconds=duration,
170
+ error=f"Agent timed out after {config.timeout_seconds}s",
171
+ )
172
+ except FileNotFoundError as exc:
173
+ raise AdapterSetupError(f"Binary not found at runtime: {exc}") from exc
174
+ finally:
175
+ if mcp_tmpfile:
176
+ try:
177
+ os.unlink(mcp_tmpfile)
178
+ except OSError:
179
+ pass
180
+
181
+ duration = time.monotonic() - start
182
+
183
+ try:
184
+ return self.parse_output(result, duration)
185
+ except Exception as exc:
186
+ return AgentOutput(
187
+ stdout=result.stdout,
188
+ stderr=result.stderr or None,
189
+ exit_code=result.returncode,
190
+ duration_seconds=duration,
191
+ error=f"Output parse failed: {exc}",
192
+ )
@@ -0,0 +1,79 @@
1
+ """Aider CLI agent adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import subprocess
7
+
8
+ from codeprobe.adapters._base import BaseAdapter
9
+ from codeprobe.adapters.protocol import AgentConfig, AgentOutput
10
+
11
+ # Matches: "Tokens: 1.2k sent, 856 received. Cost: $0.0034 message, $0.0034 session."
12
+ # Token counts may be plain integers or k-suffixed floats (e.g., 45.3k).
13
+ _TOKEN_RE = re.compile(r"Tokens:\s*([\d.]+k?)\s*sent,\s*([\d.]+k?)\s*received")
14
+ _COST_RE = re.compile(r"Cost:\s*\$([\d.]+)\s*message")
15
+
16
+
17
+ def _parse_token_value(raw: str) -> int:
18
+ """Parse a token count string like '1.2k' or '856' into an integer."""
19
+ raw = raw.strip().lower()
20
+ if raw.endswith("k"):
21
+ return int(float(raw[:-1]) * 1000)
22
+ return int(float(raw))
23
+
24
+
25
+ class AiderAdapter(BaseAdapter):
26
+ """Adapter for Aider CLI (aider --message)."""
27
+
28
+ _binary_name = "aider"
29
+ _install_hint = "Aider CLI not found. Install with: pip install aider-chat"
30
+
31
+ def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
32
+ binary = self._require_binary()
33
+ cmd = [binary, "--message", prompt, "--yes-always", "--no-git"]
34
+
35
+ if config.model:
36
+ cmd.extend(["--model", config.model])
37
+
38
+ return cmd
39
+
40
+ def parse_output(
41
+ self, result: subprocess.CompletedProcess[str], duration: float
42
+ ) -> AgentOutput:
43
+ """Parse Aider CLI output for token counts and cost.
44
+
45
+ Aider prints a summary line like:
46
+ Tokens: 1.2k sent, 856 received. Cost: $0.0034 message, $0.0034 session.
47
+
48
+ This may appear in stdout or stderr. We search both.
49
+ """
50
+ combined = (result.stdout or "") + "\n" + (result.stderr or "")
51
+
52
+ input_tokens: int | None = None
53
+ output_tokens: int | None = None
54
+ cost_usd: float | None = None
55
+ cost_model = "unknown"
56
+ cost_source = "unavailable"
57
+
58
+ token_match = _TOKEN_RE.search(combined)
59
+ if token_match:
60
+ input_tokens = _parse_token_value(token_match.group(1))
61
+ output_tokens = _parse_token_value(token_match.group(2))
62
+
63
+ cost_match = _COST_RE.search(combined)
64
+ if cost_match:
65
+ cost_usd = float(cost_match.group(1))
66
+ cost_model = "per_token"
67
+ cost_source = "log_parsed"
68
+
69
+ return AgentOutput(
70
+ stdout=result.stdout,
71
+ stderr=result.stderr or None,
72
+ exit_code=result.returncode,
73
+ duration_seconds=duration,
74
+ input_tokens=input_tokens,
75
+ output_tokens=output_tokens,
76
+ cost_usd=cost_usd,
77
+ cost_model=cost_model,
78
+ cost_source=cost_source,
79
+ )
@@ -0,0 +1,109 @@
1
+ """Claude Code agent adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import subprocess
8
+ import tempfile
9
+ from pathlib import Path
10
+
11
+ from codeprobe.adapters._base import BaseAdapter
12
+ from codeprobe.adapters.protocol import (
13
+ ALLOWED_PERMISSION_MODES,
14
+ AgentConfig,
15
+ AgentOutput,
16
+ )
17
+ from codeprobe.adapters.telemetry import JsonStdoutCollector
18
+ from codeprobe.core.sandbox import is_sandboxed
19
+
20
+ # Claude CLI accepts aliases (sonnet, opus, haiku) or short model IDs
21
+ # (claude-sonnet-4-6) but NOT full API model IDs with date suffixes
22
+ # (claude-sonnet-4-6-20250514). Strip the date suffix when present.
23
+ _API_MODEL_DATE_SUFFIX = re.compile(r"(-\d{8})$")
24
+
25
+
26
+ def _normalize_model_for_cli(model: str) -> str:
27
+ """Normalize a model identifier for the Claude CLI.
28
+
29
+ Strips date suffixes from full API model IDs so the CLI can resolve them.
30
+ Aliases like 'sonnet' or 'haiku' pass through unchanged.
31
+ """
32
+ return _API_MODEL_DATE_SUFFIX.sub("", model)
33
+
34
+
35
+ class ClaudeAdapter(BaseAdapter):
36
+ """Adapter for Claude Code CLI (claude -p)."""
37
+
38
+ _binary_name = "claude"
39
+ _install_hint = "Claude CLI not found. Install from https://claude.ai/download"
40
+
41
+ def __init__(self) -> None:
42
+ self._collector = JsonStdoutCollector()
43
+
44
+ def preflight(self, config: AgentConfig) -> list[str]:
45
+ issues = super().preflight(config)
46
+ if config.permission_mode == "dangerously_skip" and not is_sandboxed():
47
+ issues.append(
48
+ "permission_mode='dangerously_skip' requires a sandboxed environment "
49
+ "(Docker container or CODEPROBE_SANDBOX=1)"
50
+ )
51
+ return issues
52
+
53
+ def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
54
+ binary = self._require_binary()
55
+ cmd = [binary, "-p", prompt, "--output-format", "json"]
56
+
57
+ if config.model:
58
+ cmd.extend(["--model", _normalize_model_for_cli(config.model)])
59
+
60
+ if config.permission_mode == "dangerously_skip":
61
+ cmd.append("--dangerously-skip-permissions")
62
+ elif config.permission_mode != "default":
63
+ if config.permission_mode not in ALLOWED_PERMISSION_MODES:
64
+ raise ValueError(
65
+ f"Unsafe permission_mode: {config.permission_mode!r}. "
66
+ f"Allowed: {', '.join(sorted(ALLOWED_PERMISSION_MODES))}"
67
+ )
68
+ cmd.extend(["--permission-mode", config.permission_mode])
69
+
70
+ mcp_path = self._write_mcp_config(config)
71
+ if mcp_path:
72
+ cmd.extend(["--mcp-config", mcp_path])
73
+
74
+ return cmd
75
+
76
+ def isolate_session(self, slot_id: int) -> dict[str, str]:
77
+ """Return a per-slot CLAUDE_CONFIG_DIR for session isolation."""
78
+ config_dir = (
79
+ Path(tempfile.gettempdir()) / "codeprobe-claude" / f"slot-{slot_id}"
80
+ )
81
+ config_dir.mkdir(parents=True, exist_ok=True)
82
+ return {"CLAUDE_CONFIG_DIR": str(config_dir)}
83
+
84
+ def parse_output(
85
+ self, result: subprocess.CompletedProcess[str], duration: float
86
+ ) -> AgentOutput:
87
+ """Parse Claude CLI JSON envelope into AgentOutput."""
88
+ usage = self._collector.collect(result.stdout)
89
+
90
+ # Extract content text from the JSON envelope
91
+ try:
92
+ envelope = json.loads(result.stdout)
93
+ stdout_text = envelope.get("result", result.stdout)
94
+ except (json.JSONDecodeError, ValueError):
95
+ stdout_text = result.stdout
96
+
97
+ return AgentOutput(
98
+ stdout=stdout_text,
99
+ stderr=result.stderr or None,
100
+ exit_code=result.returncode,
101
+ duration_seconds=duration,
102
+ input_tokens=usage.input_tokens,
103
+ output_tokens=usage.output_tokens,
104
+ cache_read_tokens=usage.cache_read_tokens,
105
+ cost_usd=usage.cost_usd,
106
+ cost_model=usage.cost_model,
107
+ cost_source=usage.cost_source,
108
+ error=usage.error,
109
+ )
@@ -0,0 +1,135 @@
1
+ """OpenAI Codex agent adapter — API-based (no CLI subprocess)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import time
8
+
9
+ from codeprobe.adapters.protocol import (
10
+ AdapterExecutionError,
11
+ AdapterSetupError,
12
+ AgentConfig,
13
+ AgentOutput,
14
+ )
15
+ from codeprobe.adapters.telemetry import ApiResponseCollector
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _usage_fields(
21
+ usage: object | None, input_attr: str, output_attr: str
22
+ ) -> tuple[int | None, int | None]:
23
+ """Extract input/output token counts from an API usage object."""
24
+ if usage is None:
25
+ return None, None
26
+ return getattr(usage, input_attr, None), getattr(usage, output_attr, None)
27
+
28
+
29
+ class CodexAdapter:
30
+ """Adapter for OpenAI Codex API.
31
+
32
+ Tries the Responses API (responses.create) first. If the model is not
33
+ available on that endpoint (NotFoundError), falls back to the Chat
34
+ Completions API (chat.completions.create).
35
+ """
36
+
37
+ def __init__(self) -> None:
38
+ self._collector = ApiResponseCollector()
39
+
40
+ @property
41
+ def name(self) -> str:
42
+ return "codex"
43
+
44
+ def preflight(self, config: AgentConfig) -> list[str]:
45
+ issues: list[str] = []
46
+ try:
47
+ import openai # noqa: F401
48
+ except ImportError:
49
+ issues.append(
50
+ "openai SDK not found. Install with: pip install codeprobe[codex]"
51
+ )
52
+ return issues
53
+ if not os.environ.get("OPENAI_API_KEY"):
54
+ issues.append("OPENAI_API_KEY environment variable not set")
55
+ return issues
56
+
57
+ def isolate_session(self, slot_id: int) -> dict[str, str]:
58
+ """Codex uses API calls — no session-level isolation needed."""
59
+ return {}
60
+
61
+ def run(
62
+ self,
63
+ prompt: str,
64
+ config: AgentConfig,
65
+ session_env: dict[str, str] | None = None,
66
+ ) -> AgentOutput:
67
+ try:
68
+ import openai
69
+ except ImportError:
70
+ raise AdapterSetupError(
71
+ "openai SDK not installed. Run: pip install codeprobe[codex]"
72
+ )
73
+
74
+ client = openai.OpenAI()
75
+ model = config.model or "codex-mini-latest"
76
+ start = time.monotonic()
77
+
78
+ try:
79
+ try:
80
+ response = client.responses.create(model=model, input=prompt)
81
+ stdout = response.output_text or ""
82
+ input_tokens, output_tokens = _usage_fields(
83
+ response.usage, "input_tokens", "output_tokens"
84
+ )
85
+ except openai.NotFoundError:
86
+ logger.info(
87
+ "Model %s not found on Responses API, falling back to "
88
+ "Chat Completions API",
89
+ model,
90
+ )
91
+ try:
92
+ response = client.chat.completions.create(
93
+ model=model,
94
+ messages=[{"role": "user", "content": prompt}],
95
+ )
96
+ except openai.NotFoundError as exc:
97
+ raise AdapterExecutionError(
98
+ f"Model {model!r} not available on Responses or "
99
+ f"Chat Completions API: {exc}"
100
+ ) from exc
101
+ content = (
102
+ response.choices[0].message.content if response.choices else None
103
+ )
104
+ stdout = content or ""
105
+ input_tokens, output_tokens = _usage_fields(
106
+ response.usage, "prompt_tokens", "completion_tokens"
107
+ )
108
+ except openai.AuthenticationError as exc:
109
+ raise AdapterSetupError(f"OPENAI_API_KEY invalid: {exc}") from exc
110
+ except openai.RateLimitError as exc:
111
+ raise AdapterExecutionError(f"Rate limited: {exc}") from exc
112
+ except openai.APIError as exc:
113
+ raise AdapterExecutionError(f"OpenAI API error: {exc}") from exc
114
+
115
+ duration = time.monotonic() - start
116
+
117
+ usage = self._collector.collect(
118
+ stdout,
119
+ input_tokens=input_tokens,
120
+ output_tokens=output_tokens,
121
+ model=model,
122
+ )
123
+
124
+ return AgentOutput(
125
+ stdout=stdout,
126
+ stderr=None,
127
+ exit_code=0,
128
+ duration_seconds=duration,
129
+ input_tokens=usage.input_tokens,
130
+ output_tokens=usage.output_tokens,
131
+ cost_usd=usage.cost_usd,
132
+ cost_model=usage.cost_model,
133
+ cost_source=usage.cost_source,
134
+ error=usage.error,
135
+ )
@@ -0,0 +1,104 @@
1
+ """GitHub Copilot CLI agent adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import subprocess
8
+
9
+ from codeprobe.adapters._base import BaseAdapter
10
+ from codeprobe.adapters.protocol import AgentConfig, AgentOutput
11
+ from codeprobe.adapters.telemetry import NdjsonStreamCollector
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CopilotAdapter(BaseAdapter):
17
+ """Adapter for GitHub Copilot CLI."""
18
+
19
+ _binary_name = "copilot"
20
+ _install_hint = (
21
+ "Copilot CLI not found. Install from https://github.com/github/copilot-cli"
22
+ )
23
+
24
+ def __init__(self) -> None:
25
+ self._collector = NdjsonStreamCollector()
26
+
27
+ def preflight(self, config: AgentConfig) -> list[str]:
28
+ return super().preflight(config)
29
+
30
+ def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
31
+ binary = self._require_binary()
32
+ cmd = [binary, "--prompt", prompt, "--output-format", "json"]
33
+
34
+ # Non-interactive mode requires --allow-all-tools for tool auto-approval
35
+ cmd.append("--allow-all-tools")
36
+
37
+ if config.model:
38
+ cmd.extend(["--model", config.model])
39
+
40
+ mcp_path = self._write_mcp_config(config)
41
+ if mcp_path:
42
+ cmd.extend(["--additional-mcp-config", f"@{mcp_path}"])
43
+
44
+ return cmd
45
+
46
+ def parse_output(
47
+ self, result: subprocess.CompletedProcess[str], duration: float
48
+ ) -> AgentOutput:
49
+ """Parse Copilot CLI NDJSON output for token data.
50
+
51
+ Requires Copilot CLI 1.0.4+ with --output-format json which emits
52
+ NDJSON lines containing "assistant.message" events with outputTokens.
53
+
54
+ Input tokens are extracted from NDJSON ``usage`` events when available,
55
+ falling back to Copilot process log parsing.
56
+ """
57
+ raw = result.stdout or ""
58
+ usage = self._collector.collect(raw)
59
+
60
+ # Extract content text from NDJSON events.
61
+ # On JSON parse failure, the except clause resets to empty,
62
+ # and the fallback below uses raw output — matching original behavior.
63
+ result_text_parts: list[str] = []
64
+ try:
65
+ for line in raw.strip().splitlines():
66
+ if not line.strip():
67
+ continue
68
+ obj = json.loads(line)
69
+ event_type = obj.get("type", "")
70
+ if event_type == "assistant.message":
71
+ content = obj.get("data", {}).get("content", "")
72
+ if content:
73
+ result_text_parts.append(content)
74
+ elif event_type == "result":
75
+ content = obj.get("data", {}).get("content", "")
76
+ if content:
77
+ result_text_parts.append(content)
78
+ except json.JSONDecodeError:
79
+ logger.warning(
80
+ "ndjson_parse_fallback: failed to parse Copilot NDJSON output, "
81
+ "falling back to raw stdout"
82
+ )
83
+ result_text_parts = []
84
+ stdout_text = "\n".join(result_text_parts) if result_text_parts else raw
85
+
86
+ # When NDJSON parsing fell back to raw output, surface the fallback
87
+ # in the error field so callers can detect degraded telemetry.
88
+ error = usage.error
89
+ if not result_text_parts and raw:
90
+ fallback_msg = "ndjson_parse_fallback: raw stdout used as output"
91
+ error = f"{error}; {fallback_msg}" if error else fallback_msg
92
+
93
+ return AgentOutput(
94
+ stdout=stdout_text,
95
+ stderr=result.stderr or None,
96
+ exit_code=result.returncode,
97
+ duration_seconds=duration,
98
+ input_tokens=usage.input_tokens,
99
+ output_tokens=usage.output_tokens,
100
+ cost_usd=usage.cost_usd,
101
+ cost_model=usage.cost_model,
102
+ cost_source=usage.cost_source,
103
+ error=error,
104
+ )