codeprobe 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeprobe/__init__.py +3 -0
- codeprobe/__main__.py +5 -0
- codeprobe/adapters/__init__.py +19 -0
- codeprobe/adapters/_base.py +192 -0
- codeprobe/adapters/aider.py +79 -0
- codeprobe/adapters/claude.py +109 -0
- codeprobe/adapters/codex.py +135 -0
- codeprobe/adapters/copilot.py +104 -0
- codeprobe/adapters/openai_compat.py +140 -0
- codeprobe/adapters/protocol.py +113 -0
- codeprobe/adapters/session.py +304 -0
- codeprobe/adapters/telemetry.py +352 -0
- codeprobe/analysis/__init__.py +46 -0
- codeprobe/analysis/ranking.py +106 -0
- codeprobe/analysis/report.py +558 -0
- codeprobe/analysis/stats.py +455 -0
- codeprobe/api.py +182 -0
- codeprobe/assess/__init__.py +27 -0
- codeprobe/assess/heuristics.py +544 -0
- codeprobe/cli/__init__.py +383 -0
- codeprobe/cli/assess_cmd.py +41 -0
- codeprobe/cli/experiment_cmd.py +417 -0
- codeprobe/cli/init_cmd.py +167 -0
- codeprobe/cli/interpret_cmd.py +75 -0
- codeprobe/cli/mine_cmd.py +948 -0
- codeprobe/cli/probe_cmd.py +116 -0
- codeprobe/cli/ratings_cmd.py +149 -0
- codeprobe/cli/run_cmd.py +175 -0
- codeprobe/cli/scaffold_cmd.py +97 -0
- codeprobe/cli/wizard.py +122 -0
- codeprobe/cli/yaml_writer.py +59 -0
- codeprobe/config/__init__.py +5 -0
- codeprobe/config/loader.py +234 -0
- codeprobe/contrib/__init__.py +17 -0
- codeprobe/contrib/_shared.py +25 -0
- codeprobe/contrib/adaptive.py +37 -0
- codeprobe/contrib/counterfactual.py +54 -0
- codeprobe/contrib/debate.py +73 -0
- codeprobe/contrib/decision_tree.py +51 -0
- codeprobe/contrib/elo.py +53 -0
- codeprobe/contrib/fingerprint.py +39 -0
- codeprobe/contrib/mutation.py +66 -0
- codeprobe/contrib/pareto.py +64 -0
- codeprobe/contrib/sprt.py +61 -0
- codeprobe/contrib/tournament.py +54 -0
- codeprobe/core/__init__.py +26 -0
- codeprobe/core/checkpoint.py +267 -0
- codeprobe/core/executor.py +639 -0
- codeprobe/core/experiment.py +289 -0
- codeprobe/core/isolation.py +128 -0
- codeprobe/core/llm.py +385 -0
- codeprobe/core/preamble.py +130 -0
- codeprobe/core/registry.py +82 -0
- codeprobe/core/sandbox.py +35 -0
- codeprobe/core/scoring.py +436 -0
- codeprobe/loaders/__init__.py +143 -0
- codeprobe/mining/__init__.py +27 -0
- codeprobe/mining/_lang.py +42 -0
- codeprobe/mining/curator.py +279 -0
- codeprobe/mining/curator_backends.py +441 -0
- codeprobe/mining/curator_tiers.py +276 -0
- codeprobe/mining/extractor.py +795 -0
- codeprobe/mining/org_scale.py +612 -0
- codeprobe/mining/org_scale_families.py +255 -0
- codeprobe/mining/org_scale_oracle.py +332 -0
- codeprobe/mining/org_scale_scanner.py +689 -0
- codeprobe/mining/org_scale_validate.py +152 -0
- codeprobe/mining/sources.py +118 -0
- codeprobe/mining/writer.py +436 -0
- codeprobe/models/__init__.py +16 -0
- codeprobe/models/evalrc.py +18 -0
- codeprobe/models/experiment.py +58 -0
- codeprobe/models/preamble.py +28 -0
- codeprobe/models/task.py +72 -0
- codeprobe/preambles/__init__.py +52 -0
- codeprobe/preambles/sourcegraph.md +32 -0
- codeprobe/probe/__init__.py +1 -0
- codeprobe/probe/generator.py +623 -0
- codeprobe/probe/writer.py +178 -0
- codeprobe/ratings/__init__.py +1 -0
- codeprobe/ratings/collector.py +263 -0
- codeprobe/scaffold/__init__.py +1 -0
- codeprobe/scaffold/writer.py +180 -0
- codeprobe/templates/__init__.py +1 -0
- codeprobe/templates/evalrc-mcp-comparison.yaml +28 -0
- codeprobe/templates/evalrc-model-comparison.yaml +18 -0
- codeprobe/templates/evalrc-prompt-comparison.yaml +18 -0
- codeprobe-0.1.0.dist-info/METADATA +131 -0
- codeprobe-0.1.0.dist-info/RECORD +93 -0
- codeprobe-0.1.0.dist-info/WHEEL +5 -0
- codeprobe-0.1.0.dist-info/entry_points.txt +14 -0
- codeprobe-0.1.0.dist-info/licenses/LICENSE +190 -0
- codeprobe-0.1.0.dist-info/top_level.txt +1 -0
codeprobe/__init__.py
ADDED
codeprobe/__main__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Agent adapters — Protocol + built-in implementations."""
|
|
2
|
+
|
|
3
|
+
from codeprobe.adapters.protocol import (
|
|
4
|
+
AdapterError,
|
|
5
|
+
AdapterExecutionError,
|
|
6
|
+
AdapterSetupError,
|
|
7
|
+
AgentAdapter,
|
|
8
|
+
AgentConfig,
|
|
9
|
+
AgentOutput,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AdapterError",
|
|
14
|
+
"AdapterExecutionError",
|
|
15
|
+
"AdapterSetupError",
|
|
16
|
+
"AgentAdapter",
|
|
17
|
+
"AgentConfig",
|
|
18
|
+
"AgentOutput",
|
|
19
|
+
]
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Shared base for agent adapters — eliminates duplicated run/preflight logic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
import tempfile
|
|
10
|
+
import time
|
|
11
|
+
from abc import abstractmethod
|
|
12
|
+
|
|
13
|
+
from codeprobe.adapters.protocol import (
|
|
14
|
+
AdapterSetupError,
|
|
15
|
+
AgentConfig,
|
|
16
|
+
AgentOutput,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Only these env vars are forwarded to agent subprocesses.
|
|
20
|
+
# Keeps secrets (OPENAI_API_KEY, AWS_SECRET_*, etc.) out of the child
|
|
21
|
+
# unless explicitly listed here.
|
|
22
|
+
_ADAPTER_ENV_WHITELIST: frozenset[str] = frozenset(
|
|
23
|
+
{
|
|
24
|
+
# System essentials
|
|
25
|
+
"PATH",
|
|
26
|
+
"HOME",
|
|
27
|
+
"LANG",
|
|
28
|
+
"TERM",
|
|
29
|
+
"TMPDIR",
|
|
30
|
+
"LC_ALL",
|
|
31
|
+
# Codeprobe sandbox signal (eval harness sets this)
|
|
32
|
+
"CODEPROBE_SANDBOX",
|
|
33
|
+
# Agent-specific API keys (required by the adapters)
|
|
34
|
+
"ANTHROPIC_API_KEY",
|
|
35
|
+
"CLAUDE_CONFIG_DIR",
|
|
36
|
+
"GITHUB_TOKEN",
|
|
37
|
+
"OPENAI_API_KEY",
|
|
38
|
+
"COPILOT_API_KEY",
|
|
39
|
+
# Python toolchain
|
|
40
|
+
"VIRTUAL_ENV",
|
|
41
|
+
"PYTHONPATH",
|
|
42
|
+
# Node/npm (for copilot CLI)
|
|
43
|
+
"NODE_PATH",
|
|
44
|
+
"NPM_CONFIG_PREFIX",
|
|
45
|
+
# Go toolchain
|
|
46
|
+
"GOPATH",
|
|
47
|
+
"GOROOT",
|
|
48
|
+
# Rust toolchain
|
|
49
|
+
"CARGO_HOME",
|
|
50
|
+
"RUSTUP_HOME",
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _adapter_safe_env(extra: dict[str, str] | None = None) -> dict[str, str]:
|
|
56
|
+
"""Build a filtered environment for agent subprocesses.
|
|
57
|
+
|
|
58
|
+
Only passes whitelisted vars — prevents leaking secrets from parent env.
|
|
59
|
+
"""
|
|
60
|
+
env = {k: v for k, v in os.environ.items() if k in _ADAPTER_ENV_WHITELIST}
|
|
61
|
+
if extra:
|
|
62
|
+
env.update(extra)
|
|
63
|
+
return env
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class BaseAdapter:
|
|
67
|
+
"""Base class for CLI-based agent adapters.
|
|
68
|
+
|
|
69
|
+
Subclasses set ``_binary_name`` and ``_install_hint``, then implement
|
|
70
|
+
``build_command``. The Protocol requires ``name``, ``preflight``, and
|
|
71
|
+
``run``; ``find_binary`` and ``build_command`` are BaseAdapter helpers.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
_binary_name: str
|
|
75
|
+
_install_hint: str
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def name(self) -> str:
|
|
79
|
+
return self._binary_name
|
|
80
|
+
|
|
81
|
+
def find_binary(self) -> str | None:
|
|
82
|
+
return shutil.which(self._binary_name)
|
|
83
|
+
|
|
84
|
+
def _require_binary(self) -> str:
|
|
85
|
+
"""Return binary path or raise AdapterSetupError."""
|
|
86
|
+
binary = self.find_binary()
|
|
87
|
+
if binary is None:
|
|
88
|
+
raise AdapterSetupError(f"{self._binary_name} CLI not found")
|
|
89
|
+
return binary
|
|
90
|
+
|
|
91
|
+
def preflight(self, config: AgentConfig) -> list[str]:
|
|
92
|
+
issues: list[str] = []
|
|
93
|
+
if self.find_binary() is None:
|
|
94
|
+
issues.append(self._install_hint)
|
|
95
|
+
return issues
|
|
96
|
+
|
|
97
|
+
def isolate_session(self, slot_id: int) -> dict[str, str]:
|
|
98
|
+
"""Default: no session isolation env overrides."""
|
|
99
|
+
return {}
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def build_command(self, prompt: str, config: AgentConfig) -> list[str]: ...
|
|
103
|
+
|
|
104
|
+
def parse_output(
|
|
105
|
+
self, result: subprocess.CompletedProcess[str], duration: float
|
|
106
|
+
) -> AgentOutput:
|
|
107
|
+
"""Convert subprocess result to AgentOutput.
|
|
108
|
+
|
|
109
|
+
Subclasses override to extract tokens, cost, etc. from agent output.
|
|
110
|
+
"""
|
|
111
|
+
return AgentOutput(
|
|
112
|
+
stdout=result.stdout,
|
|
113
|
+
stderr=result.stderr or None,
|
|
114
|
+
exit_code=result.returncode,
|
|
115
|
+
duration_seconds=duration,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _write_mcp_config(self, config: AgentConfig) -> str | None:
|
|
119
|
+
"""Write MCP config to a temp file if present. Returns path or None.
|
|
120
|
+
|
|
121
|
+
Expands ``${VAR}`` references in string values from the environment
|
|
122
|
+
so experiment.json can reference secrets without hardcoding them.
|
|
123
|
+
"""
|
|
124
|
+
if not config.mcp_config:
|
|
125
|
+
return None
|
|
126
|
+
expanded = json.loads(os.path.expandvars(json.dumps(config.mcp_config)))
|
|
127
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
128
|
+
mode="w", suffix=".json", prefix="codeprobe-mcp-", delete=False
|
|
129
|
+
)
|
|
130
|
+
json.dump(expanded, tmp)
|
|
131
|
+
tmp.close()
|
|
132
|
+
return tmp.name
|
|
133
|
+
|
|
134
|
+
def run(
|
|
135
|
+
self,
|
|
136
|
+
prompt: str,
|
|
137
|
+
config: AgentConfig,
|
|
138
|
+
session_env: dict[str, str] | None = None,
|
|
139
|
+
) -> AgentOutput:
|
|
140
|
+
cmd = self.build_command(prompt, config)
|
|
141
|
+
mcp_tmpfile: str | None = None
|
|
142
|
+
|
|
143
|
+
# Find and track MCP temp file for cleanup
|
|
144
|
+
for flag in ("--mcp-config", "--additional-mcp-config"):
|
|
145
|
+
if flag in cmd:
|
|
146
|
+
idx = cmd.index(flag)
|
|
147
|
+
if idx + 1 < len(cmd):
|
|
148
|
+
path = cmd[idx + 1]
|
|
149
|
+
if path.startswith(tempfile.gettempdir()):
|
|
150
|
+
mcp_tmpfile = path
|
|
151
|
+
|
|
152
|
+
start = time.monotonic()
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
result = subprocess.run(
|
|
156
|
+
cmd,
|
|
157
|
+
capture_output=True,
|
|
158
|
+
text=True,
|
|
159
|
+
timeout=config.timeout_seconds,
|
|
160
|
+
cwd=config.cwd,
|
|
161
|
+
env=_adapter_safe_env(session_env),
|
|
162
|
+
)
|
|
163
|
+
except subprocess.TimeoutExpired as exc:
|
|
164
|
+
duration = time.monotonic() - start
|
|
165
|
+
return AgentOutput(
|
|
166
|
+
stdout=exc.stdout if isinstance(exc.stdout, str) else "",
|
|
167
|
+
stderr=exc.stderr if isinstance(exc.stderr, str) else None,
|
|
168
|
+
exit_code=-1,
|
|
169
|
+
duration_seconds=duration,
|
|
170
|
+
error=f"Agent timed out after {config.timeout_seconds}s",
|
|
171
|
+
)
|
|
172
|
+
except FileNotFoundError as exc:
|
|
173
|
+
raise AdapterSetupError(f"Binary not found at runtime: {exc}") from exc
|
|
174
|
+
finally:
|
|
175
|
+
if mcp_tmpfile:
|
|
176
|
+
try:
|
|
177
|
+
os.unlink(mcp_tmpfile)
|
|
178
|
+
except OSError:
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
duration = time.monotonic() - start
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
return self.parse_output(result, duration)
|
|
185
|
+
except Exception as exc:
|
|
186
|
+
return AgentOutput(
|
|
187
|
+
stdout=result.stdout,
|
|
188
|
+
stderr=result.stderr or None,
|
|
189
|
+
exit_code=result.returncode,
|
|
190
|
+
duration_seconds=duration,
|
|
191
|
+
error=f"Output parse failed: {exc}",
|
|
192
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Aider CLI agent adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
from codeprobe.adapters._base import BaseAdapter
|
|
9
|
+
from codeprobe.adapters.protocol import AgentConfig, AgentOutput
|
|
10
|
+
|
|
11
|
+
# Matches: "Tokens: 1.2k sent, 856 received. Cost: $0.0034 message, $0.0034 session."
|
|
12
|
+
# Token counts may be plain integers or k-suffixed floats (e.g., 45.3k).
|
|
13
|
+
_TOKEN_RE = re.compile(r"Tokens:\s*([\d.]+k?)\s*sent,\s*([\d.]+k?)\s*received")
|
|
14
|
+
_COST_RE = re.compile(r"Cost:\s*\$([\d.]+)\s*message")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _parse_token_value(raw: str) -> int:
|
|
18
|
+
"""Parse a token count string like '1.2k' or '856' into an integer."""
|
|
19
|
+
raw = raw.strip().lower()
|
|
20
|
+
if raw.endswith("k"):
|
|
21
|
+
return int(float(raw[:-1]) * 1000)
|
|
22
|
+
return int(float(raw))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AiderAdapter(BaseAdapter):
|
|
26
|
+
"""Adapter for Aider CLI (aider --message)."""
|
|
27
|
+
|
|
28
|
+
_binary_name = "aider"
|
|
29
|
+
_install_hint = "Aider CLI not found. Install with: pip install aider-chat"
|
|
30
|
+
|
|
31
|
+
def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
|
|
32
|
+
binary = self._require_binary()
|
|
33
|
+
cmd = [binary, "--message", prompt, "--yes-always", "--no-git"]
|
|
34
|
+
|
|
35
|
+
if config.model:
|
|
36
|
+
cmd.extend(["--model", config.model])
|
|
37
|
+
|
|
38
|
+
return cmd
|
|
39
|
+
|
|
40
|
+
def parse_output(
|
|
41
|
+
self, result: subprocess.CompletedProcess[str], duration: float
|
|
42
|
+
) -> AgentOutput:
|
|
43
|
+
"""Parse Aider CLI output for token counts and cost.
|
|
44
|
+
|
|
45
|
+
Aider prints a summary line like:
|
|
46
|
+
Tokens: 1.2k sent, 856 received. Cost: $0.0034 message, $0.0034 session.
|
|
47
|
+
|
|
48
|
+
This may appear in stdout or stderr. We search both.
|
|
49
|
+
"""
|
|
50
|
+
combined = (result.stdout or "") + "\n" + (result.stderr or "")
|
|
51
|
+
|
|
52
|
+
input_tokens: int | None = None
|
|
53
|
+
output_tokens: int | None = None
|
|
54
|
+
cost_usd: float | None = None
|
|
55
|
+
cost_model = "unknown"
|
|
56
|
+
cost_source = "unavailable"
|
|
57
|
+
|
|
58
|
+
token_match = _TOKEN_RE.search(combined)
|
|
59
|
+
if token_match:
|
|
60
|
+
input_tokens = _parse_token_value(token_match.group(1))
|
|
61
|
+
output_tokens = _parse_token_value(token_match.group(2))
|
|
62
|
+
|
|
63
|
+
cost_match = _COST_RE.search(combined)
|
|
64
|
+
if cost_match:
|
|
65
|
+
cost_usd = float(cost_match.group(1))
|
|
66
|
+
cost_model = "per_token"
|
|
67
|
+
cost_source = "log_parsed"
|
|
68
|
+
|
|
69
|
+
return AgentOutput(
|
|
70
|
+
stdout=result.stdout,
|
|
71
|
+
stderr=result.stderr or None,
|
|
72
|
+
exit_code=result.returncode,
|
|
73
|
+
duration_seconds=duration,
|
|
74
|
+
input_tokens=input_tokens,
|
|
75
|
+
output_tokens=output_tokens,
|
|
76
|
+
cost_usd=cost_usd,
|
|
77
|
+
cost_model=cost_model,
|
|
78
|
+
cost_source=cost_source,
|
|
79
|
+
)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Claude Code agent adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from codeprobe.adapters._base import BaseAdapter
|
|
12
|
+
from codeprobe.adapters.protocol import (
|
|
13
|
+
ALLOWED_PERMISSION_MODES,
|
|
14
|
+
AgentConfig,
|
|
15
|
+
AgentOutput,
|
|
16
|
+
)
|
|
17
|
+
from codeprobe.adapters.telemetry import JsonStdoutCollector
|
|
18
|
+
from codeprobe.core.sandbox import is_sandboxed
|
|
19
|
+
|
|
20
|
+
# Claude CLI accepts aliases (sonnet, opus, haiku) or short model IDs
|
|
21
|
+
# (claude-sonnet-4-6) but NOT full API model IDs with date suffixes
|
|
22
|
+
# (claude-sonnet-4-6-20250514). Strip the date suffix when present.
|
|
23
|
+
_API_MODEL_DATE_SUFFIX = re.compile(r"(-\d{8})$")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _normalize_model_for_cli(model: str) -> str:
|
|
27
|
+
"""Normalize a model identifier for the Claude CLI.
|
|
28
|
+
|
|
29
|
+
Strips date suffixes from full API model IDs so the CLI can resolve them.
|
|
30
|
+
Aliases like 'sonnet' or 'haiku' pass through unchanged.
|
|
31
|
+
"""
|
|
32
|
+
return _API_MODEL_DATE_SUFFIX.sub("", model)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ClaudeAdapter(BaseAdapter):
|
|
36
|
+
"""Adapter for Claude Code CLI (claude -p)."""
|
|
37
|
+
|
|
38
|
+
_binary_name = "claude"
|
|
39
|
+
_install_hint = "Claude CLI not found. Install from https://claude.ai/download"
|
|
40
|
+
|
|
41
|
+
def __init__(self) -> None:
|
|
42
|
+
self._collector = JsonStdoutCollector()
|
|
43
|
+
|
|
44
|
+
def preflight(self, config: AgentConfig) -> list[str]:
|
|
45
|
+
issues = super().preflight(config)
|
|
46
|
+
if config.permission_mode == "dangerously_skip" and not is_sandboxed():
|
|
47
|
+
issues.append(
|
|
48
|
+
"permission_mode='dangerously_skip' requires a sandboxed environment "
|
|
49
|
+
"(Docker container or CODEPROBE_SANDBOX=1)"
|
|
50
|
+
)
|
|
51
|
+
return issues
|
|
52
|
+
|
|
53
|
+
def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
|
|
54
|
+
binary = self._require_binary()
|
|
55
|
+
cmd = [binary, "-p", prompt, "--output-format", "json"]
|
|
56
|
+
|
|
57
|
+
if config.model:
|
|
58
|
+
cmd.extend(["--model", _normalize_model_for_cli(config.model)])
|
|
59
|
+
|
|
60
|
+
if config.permission_mode == "dangerously_skip":
|
|
61
|
+
cmd.append("--dangerously-skip-permissions")
|
|
62
|
+
elif config.permission_mode != "default":
|
|
63
|
+
if config.permission_mode not in ALLOWED_PERMISSION_MODES:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Unsafe permission_mode: {config.permission_mode!r}. "
|
|
66
|
+
f"Allowed: {', '.join(sorted(ALLOWED_PERMISSION_MODES))}"
|
|
67
|
+
)
|
|
68
|
+
cmd.extend(["--permission-mode", config.permission_mode])
|
|
69
|
+
|
|
70
|
+
mcp_path = self._write_mcp_config(config)
|
|
71
|
+
if mcp_path:
|
|
72
|
+
cmd.extend(["--mcp-config", mcp_path])
|
|
73
|
+
|
|
74
|
+
return cmd
|
|
75
|
+
|
|
76
|
+
def isolate_session(self, slot_id: int) -> dict[str, str]:
|
|
77
|
+
"""Return a per-slot CLAUDE_CONFIG_DIR for session isolation."""
|
|
78
|
+
config_dir = (
|
|
79
|
+
Path(tempfile.gettempdir()) / "codeprobe-claude" / f"slot-{slot_id}"
|
|
80
|
+
)
|
|
81
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
return {"CLAUDE_CONFIG_DIR": str(config_dir)}
|
|
83
|
+
|
|
84
|
+
def parse_output(
|
|
85
|
+
self, result: subprocess.CompletedProcess[str], duration: float
|
|
86
|
+
) -> AgentOutput:
|
|
87
|
+
"""Parse Claude CLI JSON envelope into AgentOutput."""
|
|
88
|
+
usage = self._collector.collect(result.stdout)
|
|
89
|
+
|
|
90
|
+
# Extract content text from the JSON envelope
|
|
91
|
+
try:
|
|
92
|
+
envelope = json.loads(result.stdout)
|
|
93
|
+
stdout_text = envelope.get("result", result.stdout)
|
|
94
|
+
except (json.JSONDecodeError, ValueError):
|
|
95
|
+
stdout_text = result.stdout
|
|
96
|
+
|
|
97
|
+
return AgentOutput(
|
|
98
|
+
stdout=stdout_text,
|
|
99
|
+
stderr=result.stderr or None,
|
|
100
|
+
exit_code=result.returncode,
|
|
101
|
+
duration_seconds=duration,
|
|
102
|
+
input_tokens=usage.input_tokens,
|
|
103
|
+
output_tokens=usage.output_tokens,
|
|
104
|
+
cache_read_tokens=usage.cache_read_tokens,
|
|
105
|
+
cost_usd=usage.cost_usd,
|
|
106
|
+
cost_model=usage.cost_model,
|
|
107
|
+
cost_source=usage.cost_source,
|
|
108
|
+
error=usage.error,
|
|
109
|
+
)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""OpenAI Codex agent adapter — API-based (no CLI subprocess)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
from codeprobe.adapters.protocol import (
|
|
10
|
+
AdapterExecutionError,
|
|
11
|
+
AdapterSetupError,
|
|
12
|
+
AgentConfig,
|
|
13
|
+
AgentOutput,
|
|
14
|
+
)
|
|
15
|
+
from codeprobe.adapters.telemetry import ApiResponseCollector
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _usage_fields(
|
|
21
|
+
usage: object | None, input_attr: str, output_attr: str
|
|
22
|
+
) -> tuple[int | None, int | None]:
|
|
23
|
+
"""Extract input/output token counts from an API usage object."""
|
|
24
|
+
if usage is None:
|
|
25
|
+
return None, None
|
|
26
|
+
return getattr(usage, input_attr, None), getattr(usage, output_attr, None)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CodexAdapter:
|
|
30
|
+
"""Adapter for OpenAI Codex API.
|
|
31
|
+
|
|
32
|
+
Tries the Responses API (responses.create) first. If the model is not
|
|
33
|
+
available on that endpoint (NotFoundError), falls back to the Chat
|
|
34
|
+
Completions API (chat.completions.create).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self) -> None:
|
|
38
|
+
self._collector = ApiResponseCollector()
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def name(self) -> str:
|
|
42
|
+
return "codex"
|
|
43
|
+
|
|
44
|
+
def preflight(self, config: AgentConfig) -> list[str]:
|
|
45
|
+
issues: list[str] = []
|
|
46
|
+
try:
|
|
47
|
+
import openai # noqa: F401
|
|
48
|
+
except ImportError:
|
|
49
|
+
issues.append(
|
|
50
|
+
"openai SDK not found. Install with: pip install codeprobe[codex]"
|
|
51
|
+
)
|
|
52
|
+
return issues
|
|
53
|
+
if not os.environ.get("OPENAI_API_KEY"):
|
|
54
|
+
issues.append("OPENAI_API_KEY environment variable not set")
|
|
55
|
+
return issues
|
|
56
|
+
|
|
57
|
+
def isolate_session(self, slot_id: int) -> dict[str, str]:
|
|
58
|
+
"""Codex uses API calls — no session-level isolation needed."""
|
|
59
|
+
return {}
|
|
60
|
+
|
|
61
|
+
def run(
|
|
62
|
+
self,
|
|
63
|
+
prompt: str,
|
|
64
|
+
config: AgentConfig,
|
|
65
|
+
session_env: dict[str, str] | None = None,
|
|
66
|
+
) -> AgentOutput:
|
|
67
|
+
try:
|
|
68
|
+
import openai
|
|
69
|
+
except ImportError:
|
|
70
|
+
raise AdapterSetupError(
|
|
71
|
+
"openai SDK not installed. Run: pip install codeprobe[codex]"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
client = openai.OpenAI()
|
|
75
|
+
model = config.model or "codex-mini-latest"
|
|
76
|
+
start = time.monotonic()
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
try:
|
|
80
|
+
response = client.responses.create(model=model, input=prompt)
|
|
81
|
+
stdout = response.output_text or ""
|
|
82
|
+
input_tokens, output_tokens = _usage_fields(
|
|
83
|
+
response.usage, "input_tokens", "output_tokens"
|
|
84
|
+
)
|
|
85
|
+
except openai.NotFoundError:
|
|
86
|
+
logger.info(
|
|
87
|
+
"Model %s not found on Responses API, falling back to "
|
|
88
|
+
"Chat Completions API",
|
|
89
|
+
model,
|
|
90
|
+
)
|
|
91
|
+
try:
|
|
92
|
+
response = client.chat.completions.create(
|
|
93
|
+
model=model,
|
|
94
|
+
messages=[{"role": "user", "content": prompt}],
|
|
95
|
+
)
|
|
96
|
+
except openai.NotFoundError as exc:
|
|
97
|
+
raise AdapterExecutionError(
|
|
98
|
+
f"Model {model!r} not available on Responses or "
|
|
99
|
+
f"Chat Completions API: {exc}"
|
|
100
|
+
) from exc
|
|
101
|
+
content = (
|
|
102
|
+
response.choices[0].message.content if response.choices else None
|
|
103
|
+
)
|
|
104
|
+
stdout = content or ""
|
|
105
|
+
input_tokens, output_tokens = _usage_fields(
|
|
106
|
+
response.usage, "prompt_tokens", "completion_tokens"
|
|
107
|
+
)
|
|
108
|
+
except openai.AuthenticationError as exc:
|
|
109
|
+
raise AdapterSetupError(f"OPENAI_API_KEY invalid: {exc}") from exc
|
|
110
|
+
except openai.RateLimitError as exc:
|
|
111
|
+
raise AdapterExecutionError(f"Rate limited: {exc}") from exc
|
|
112
|
+
except openai.APIError as exc:
|
|
113
|
+
raise AdapterExecutionError(f"OpenAI API error: {exc}") from exc
|
|
114
|
+
|
|
115
|
+
duration = time.monotonic() - start
|
|
116
|
+
|
|
117
|
+
usage = self._collector.collect(
|
|
118
|
+
stdout,
|
|
119
|
+
input_tokens=input_tokens,
|
|
120
|
+
output_tokens=output_tokens,
|
|
121
|
+
model=model,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return AgentOutput(
|
|
125
|
+
stdout=stdout,
|
|
126
|
+
stderr=None,
|
|
127
|
+
exit_code=0,
|
|
128
|
+
duration_seconds=duration,
|
|
129
|
+
input_tokens=usage.input_tokens,
|
|
130
|
+
output_tokens=usage.output_tokens,
|
|
131
|
+
cost_usd=usage.cost_usd,
|
|
132
|
+
cost_model=usage.cost_model,
|
|
133
|
+
cost_source=usage.cost_source,
|
|
134
|
+
error=usage.error,
|
|
135
|
+
)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""GitHub Copilot CLI agent adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import subprocess
|
|
8
|
+
|
|
9
|
+
from codeprobe.adapters._base import BaseAdapter
|
|
10
|
+
from codeprobe.adapters.protocol import AgentConfig, AgentOutput
|
|
11
|
+
from codeprobe.adapters.telemetry import NdjsonStreamCollector
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CopilotAdapter(BaseAdapter):
|
|
17
|
+
"""Adapter for GitHub Copilot CLI."""
|
|
18
|
+
|
|
19
|
+
_binary_name = "copilot"
|
|
20
|
+
_install_hint = (
|
|
21
|
+
"Copilot CLI not found. Install from https://github.com/github/copilot-cli"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def __init__(self) -> None:
|
|
25
|
+
self._collector = NdjsonStreamCollector()
|
|
26
|
+
|
|
27
|
+
def preflight(self, config: AgentConfig) -> list[str]:
|
|
28
|
+
return super().preflight(config)
|
|
29
|
+
|
|
30
|
+
def build_command(self, prompt: str, config: AgentConfig) -> list[str]:
|
|
31
|
+
binary = self._require_binary()
|
|
32
|
+
cmd = [binary, "--prompt", prompt, "--output-format", "json"]
|
|
33
|
+
|
|
34
|
+
# Non-interactive mode requires --allow-all-tools for tool auto-approval
|
|
35
|
+
cmd.append("--allow-all-tools")
|
|
36
|
+
|
|
37
|
+
if config.model:
|
|
38
|
+
cmd.extend(["--model", config.model])
|
|
39
|
+
|
|
40
|
+
mcp_path = self._write_mcp_config(config)
|
|
41
|
+
if mcp_path:
|
|
42
|
+
cmd.extend(["--additional-mcp-config", f"@{mcp_path}"])
|
|
43
|
+
|
|
44
|
+
return cmd
|
|
45
|
+
|
|
46
|
+
def parse_output(
|
|
47
|
+
self, result: subprocess.CompletedProcess[str], duration: float
|
|
48
|
+
) -> AgentOutput:
|
|
49
|
+
"""Parse Copilot CLI NDJSON output for token data.
|
|
50
|
+
|
|
51
|
+
Requires Copilot CLI 1.0.4+ with --output-format json which emits
|
|
52
|
+
NDJSON lines containing "assistant.message" events with outputTokens.
|
|
53
|
+
|
|
54
|
+
Input tokens are extracted from NDJSON ``usage`` events when available,
|
|
55
|
+
falling back to Copilot process log parsing.
|
|
56
|
+
"""
|
|
57
|
+
raw = result.stdout or ""
|
|
58
|
+
usage = self._collector.collect(raw)
|
|
59
|
+
|
|
60
|
+
# Extract content text from NDJSON events.
|
|
61
|
+
# On JSON parse failure, the except clause resets to empty,
|
|
62
|
+
# and the fallback below uses raw output — matching original behavior.
|
|
63
|
+
result_text_parts: list[str] = []
|
|
64
|
+
try:
|
|
65
|
+
for line in raw.strip().splitlines():
|
|
66
|
+
if not line.strip():
|
|
67
|
+
continue
|
|
68
|
+
obj = json.loads(line)
|
|
69
|
+
event_type = obj.get("type", "")
|
|
70
|
+
if event_type == "assistant.message":
|
|
71
|
+
content = obj.get("data", {}).get("content", "")
|
|
72
|
+
if content:
|
|
73
|
+
result_text_parts.append(content)
|
|
74
|
+
elif event_type == "result":
|
|
75
|
+
content = obj.get("data", {}).get("content", "")
|
|
76
|
+
if content:
|
|
77
|
+
result_text_parts.append(content)
|
|
78
|
+
except json.JSONDecodeError:
|
|
79
|
+
logger.warning(
|
|
80
|
+
"ndjson_parse_fallback: failed to parse Copilot NDJSON output, "
|
|
81
|
+
"falling back to raw stdout"
|
|
82
|
+
)
|
|
83
|
+
result_text_parts = []
|
|
84
|
+
stdout_text = "\n".join(result_text_parts) if result_text_parts else raw
|
|
85
|
+
|
|
86
|
+
# When NDJSON parsing fell back to raw output, surface the fallback
|
|
87
|
+
# in the error field so callers can detect degraded telemetry.
|
|
88
|
+
error = usage.error
|
|
89
|
+
if not result_text_parts and raw:
|
|
90
|
+
fallback_msg = "ndjson_parse_fallback: raw stdout used as output"
|
|
91
|
+
error = f"{error}; {fallback_msg}" if error else fallback_msg
|
|
92
|
+
|
|
93
|
+
return AgentOutput(
|
|
94
|
+
stdout=stdout_text,
|
|
95
|
+
stderr=result.stderr or None,
|
|
96
|
+
exit_code=result.returncode,
|
|
97
|
+
duration_seconds=duration,
|
|
98
|
+
input_tokens=usage.input_tokens,
|
|
99
|
+
output_tokens=usage.output_tokens,
|
|
100
|
+
cost_usd=usage.cost_usd,
|
|
101
|
+
cost_model=usage.cost_model,
|
|
102
|
+
cost_source=usage.cost_source,
|
|
103
|
+
error=error,
|
|
104
|
+
)
|