codeprobe 0.2.7__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeprobe-0.2.7 → codeprobe-0.2.8}/PKG-INFO +1 -1
- {codeprobe-0.2.7 → codeprobe-0.2.8}/pyproject.toml +1 -1
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/__init__.py +1 -1
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/executor.py +7 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/writer.py +2 -2
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/PKG-INFO +1 -1
- {codeprobe-0.2.7 → codeprobe-0.2.8}/LICENSE +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/README.md +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/setup.cfg +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/__main__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/_base.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/aider.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/claude.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/codex.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/copilot.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/openai_compat.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/protocol.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/session.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/telemetry.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/ranking.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/report.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/stats.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/api.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/assess/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/assess/heuristics.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/assess_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/experiment_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/init_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/interpret_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/mine_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/probe_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/ratings_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/run_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/scaffold_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/wizard.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/yaml_writer.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/config/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/config/loader.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/_shared.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/adaptive.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/counterfactual.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/debate.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/decision_tree.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/elo.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/fingerprint.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/mutation.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/pareto.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/sprt.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/tournament.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/checkpoint.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/experiment.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/isolation.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/llm.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/preamble.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/registry.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/sandbox.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/scoring.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/loaders/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/_lang.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator_backends.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator_tiers.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/extractor.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_families.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_oracle.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_scanner.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_validate.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/sg_ground_truth.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/sources.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/evalrc.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/experiment.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/preamble.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/task.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/github.md +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/sourcegraph.md +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/generator.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/writer.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/ratings/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/ratings/collector.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/scaffold/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/scaffold/writer.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/__init__.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/SOURCES.txt +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/dependency_links.txt +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/entry_points.txt +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/requires.txt +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/top_level.txt +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_adapters.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_analysis.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_api.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_assess.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_changed_symbols.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_checkpoint.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_cli.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_config_loader.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_contrib.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_backends.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_core.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_integration.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_tiers.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_executor.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_experiment_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_experiment_core.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_init_wizard.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_llm.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_loaders.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mcp_families_mining.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mcp_validate.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mining.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_models.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_new_families.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_openai_compat.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_oracle_types.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_org_scale.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_pipeline_integration.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_preamble.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_probe.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_ratings.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_ratings_cmd.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_registry.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scaffold.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scanner_refactor.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scoring.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_session.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_sg_ground_truth.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_telemetry.py +0 -0
- {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_weighted_f1.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -191,6 +191,13 @@ def execute_task(
|
|
|
191
191
|
if task_rt and task_rt != "binary":
|
|
192
192
|
reward_type = task_rt
|
|
193
193
|
|
|
194
|
+
# Remove stale answer.txt / reward.txt from prior runs so they don't
|
|
195
|
+
# leak into this task's scoring sandbox.
|
|
196
|
+
for stale in ("answer.txt", "reward.txt"):
|
|
197
|
+
stale_path = task_dir / stale
|
|
198
|
+
if stale_path.is_file():
|
|
199
|
+
stale_path.unlink(missing_ok=True)
|
|
200
|
+
|
|
194
201
|
def _error_result(error: str, error_category: str | None = None) -> TaskResult:
|
|
195
202
|
return TaskResult(
|
|
196
203
|
completed=CompletedTask(
|
|
@@ -467,8 +467,8 @@ def _write_oracle_task(
|
|
|
467
467
|
f"# Oracle verification for org-scale task {safe_id}\n"
|
|
468
468
|
'SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"\n'
|
|
469
469
|
'TASK_DIR="$(dirname "$SCRIPT_DIR")"\n\n'
|
|
470
|
-
"#
|
|
471
|
-
'if [ -n "${AGENT_OUTPUT:-}" ] && [ -f "$AGENT_OUTPUT" ]; then\n'
|
|
470
|
+
"# Fallback: if agent wrote to stdout instead of answer.txt, use $AGENT_OUTPUT\n"
|
|
471
|
+
'if [ ! -f "$TASK_DIR/answer.txt" ] && [ -n "${AGENT_OUTPUT:-}" ] && [ -f "$AGENT_OUTPUT" ]; then\n'
|
|
472
472
|
' cp "$AGENT_OUTPUT" "$TASK_DIR/answer.txt"\n'
|
|
473
473
|
"fi\n\n"
|
|
474
474
|
"# Self-contained oracle check — no codeprobe install required\n"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|