codeprobe 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeprobe-0.2.4 → codeprobe-0.2.6}/PKG-INFO +1 -1
- {codeprobe-0.2.4 → codeprobe-0.2.6}/pyproject.toml +1 -1
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/__init__.py +1 -1
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/run_cmd.py +19 -5
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/PKG-INFO +1 -1
- {codeprobe-0.2.4 → codeprobe-0.2.6}/LICENSE +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/README.md +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/setup.cfg +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/__main__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/_base.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/aider.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/claude.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/codex.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/copilot.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/openai_compat.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/protocol.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/session.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/adapters/telemetry.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/analysis/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/analysis/ranking.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/analysis/report.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/analysis/stats.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/api.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/assess/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/assess/heuristics.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/assess_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/experiment_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/init_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/interpret_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/mine_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/probe_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/ratings_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/scaffold_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/wizard.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/cli/yaml_writer.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/config/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/config/loader.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/_shared.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/adaptive.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/counterfactual.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/debate.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/decision_tree.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/elo.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/fingerprint.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/mutation.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/pareto.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/sprt.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/contrib/tournament.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/checkpoint.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/executor.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/experiment.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/isolation.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/llm.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/preamble.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/registry.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/sandbox.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/core/scoring.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/loaders/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/_lang.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/curator.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/curator_backends.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/curator_tiers.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/extractor.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/org_scale.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/org_scale_families.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/org_scale_oracle.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/org_scale_scanner.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/org_scale_validate.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/sg_ground_truth.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/sources.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/mining/writer.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/models/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/models/evalrc.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/models/experiment.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/models/preamble.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/models/task.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/preambles/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/preambles/github.md +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/preambles/sourcegraph.md +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/probe/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/probe/generator.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/probe/writer.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/ratings/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/ratings/collector.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/scaffold/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/scaffold/writer.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/templates/__init__.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/SOURCES.txt +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/dependency_links.txt +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/entry_points.txt +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/requires.txt +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/src/codeprobe.egg-info/top_level.txt +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_adapters.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_analysis.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_api.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_assess.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_changed_symbols.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_checkpoint.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_cli.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_config_loader.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_contrib.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_curator_backends.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_curator_core.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_curator_integration.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_curator_tiers.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_executor.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_experiment_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_experiment_core.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_init_wizard.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_llm.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_loaders.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_mcp_families_mining.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_mcp_validate.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_mining.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_models.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_new_families.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_openai_compat.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_oracle_types.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_org_scale.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_pipeline_integration.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_preamble.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_probe.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_ratings.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_ratings_cmd.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_registry.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_scaffold.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_scanner_refactor.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_scoring.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_session.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_sg_ground_truth.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_telemetry.py +0 -0
- {codeprobe-0.2.4 → codeprobe-0.2.6}/tests/test_weighted_f1.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
import subprocess
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
|
|
@@ -106,8 +107,22 @@ def run_eval(
|
|
|
106
107
|
click.echo(f"Error: {exc}", err=True)
|
|
107
108
|
raise SystemExit(1)
|
|
108
109
|
|
|
110
|
+
# Resolve to the git repo root — `path` may be an experiment subdir.
|
|
111
|
+
try:
|
|
112
|
+
repo_root = Path(
|
|
113
|
+
subprocess.run(
|
|
114
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
115
|
+
cwd=Path(path).resolve(),
|
|
116
|
+
capture_output=True,
|
|
117
|
+
text=True,
|
|
118
|
+
check=True,
|
|
119
|
+
).stdout.strip()
|
|
120
|
+
)
|
|
121
|
+
except (subprocess.CalledProcessError, OSError):
|
|
122
|
+
repo_root = Path(path).resolve()
|
|
123
|
+
|
|
109
124
|
tasks_dir = exp_dir / experiment.tasks_dir
|
|
110
|
-
repo_tasks =
|
|
125
|
+
repo_tasks = repo_root / ".codeprobe" / experiment.tasks_dir
|
|
111
126
|
|
|
112
127
|
task_dirs = _find_tasks(tasks_dir, task_ids=experiment.task_ids)
|
|
113
128
|
if not task_dirs and repo_tasks != tasks_dir:
|
|
@@ -132,7 +147,7 @@ def run_eval(
|
|
|
132
147
|
configs_count=len(configs_to_run),
|
|
133
148
|
repeats=repeats,
|
|
134
149
|
parallel=parallel,
|
|
135
|
-
repo_path=
|
|
150
|
+
repo_path=repo_root,
|
|
136
151
|
)
|
|
137
152
|
_print_dry_run(estimate)
|
|
138
153
|
return
|
|
@@ -165,7 +180,7 @@ def run_eval(
|
|
|
165
180
|
permission_mode=perm,
|
|
166
181
|
timeout_seconds=timeout,
|
|
167
182
|
mcp_config=exp_config.mcp_config,
|
|
168
|
-
cwd=str(
|
|
183
|
+
cwd=str(repo_root),
|
|
169
184
|
)
|
|
170
185
|
|
|
171
186
|
issues = config_adapter.preflight(agent_config)
|
|
@@ -186,9 +201,8 @@ def run_eval(
|
|
|
186
201
|
# Compute directories to exclude from git clean between sequential
|
|
187
202
|
# tasks so the experiment dir (untracked) isn't deleted.
|
|
188
203
|
_clean_excludes: tuple[str, ...] = ()
|
|
189
|
-
resolved_repo = Path(path).resolve()
|
|
190
204
|
try:
|
|
191
|
-
rel = exp_dir.resolve().relative_to(
|
|
205
|
+
rel = exp_dir.resolve().relative_to(repo_root)
|
|
192
206
|
top_dir = str(rel).split("/")[0]
|
|
193
207
|
if top_dir and top_dir != ".":
|
|
194
208
|
_clean_excludes = (top_dir,)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|