codeprobe 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeprobe-0.2.1 → codeprobe-0.2.2}/PKG-INFO +1 -1
- {codeprobe-0.2.1 → codeprobe-0.2.2}/pyproject.toml +1 -1
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/executor.py +19 -3
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/PKG-INFO +1 -1
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_executor.py +1 -1
- {codeprobe-0.2.1 → codeprobe-0.2.2}/LICENSE +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/README.md +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/setup.cfg +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/__main__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/_base.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/aider.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/claude.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/codex.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/copilot.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/openai_compat.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/protocol.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/session.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/telemetry.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/ranking.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/report.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/stats.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/api.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/assess/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/assess/heuristics.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/assess_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/experiment_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/init_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/interpret_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/mine_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/probe_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/ratings_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/run_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/scaffold_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/wizard.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/yaml_writer.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/config/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/config/loader.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/_shared.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/adaptive.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/counterfactual.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/debate.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/decision_tree.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/elo.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/fingerprint.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/mutation.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/pareto.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/sprt.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/tournament.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/checkpoint.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/experiment.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/isolation.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/llm.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/preamble.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/registry.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/sandbox.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/scoring.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/loaders/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/_lang.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator_backends.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator_tiers.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/extractor.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_families.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_oracle.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_scanner.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_validate.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/sg_ground_truth.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/sources.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/writer.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/evalrc.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/experiment.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/preamble.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/task.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/github.md +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/sourcegraph.md +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/generator.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/writer.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/ratings/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/ratings/collector.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/scaffold/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/scaffold/writer.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/__init__.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/SOURCES.txt +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/dependency_links.txt +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/entry_points.txt +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/requires.txt +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/top_level.txt +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_adapters.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_analysis.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_api.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_assess.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_changed_symbols.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_checkpoint.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_cli.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_config_loader.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_contrib.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_backends.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_core.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_integration.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_tiers.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_experiment_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_experiment_core.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_init_wizard.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_llm.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_loaders.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mcp_families_mining.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mcp_validate.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mining.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_models.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_new_families.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_openai_compat.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_oracle_types.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_org_scale.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_pipeline_integration.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_preamble.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_probe.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_ratings.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_ratings_cmd.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_registry.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scaffold.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scanner_refactor.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scoring.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_session.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_sg_ground_truth.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_telemetry.py +0 -0
- {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_weighted_f1.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -334,14 +334,16 @@ def execute_task(
|
|
|
334
334
|
_BILLABLE_COST_MODELS = frozenset({"per_token"})
|
|
335
335
|
|
|
336
336
|
|
|
337
|
-
def _git_reset_workdir(
|
|
337
|
+
def _git_reset_workdir(
|
|
338
|
+
repo_path: Path, *, extra_excludes: tuple[str, ...] = ()
|
|
339
|
+
) -> None:
|
|
338
340
|
"""Reset the working directory to a clean state between sequential tasks.
|
|
339
341
|
|
|
340
342
|
Runs ``git restore .`` and ``git clean -fd`` to discard modifications
|
|
341
343
|
and remove untracked files so task N's leftovers don't corrupt task N+1.
|
|
342
344
|
"""
|
|
343
345
|
try:
|
|
344
|
-
git_restore_clean(repo_path)
|
|
346
|
+
git_restore_clean(repo_path, extra_excludes=extra_excludes)
|
|
345
347
|
except subprocess.CalledProcessError as exc:
|
|
346
348
|
logger.warning(
|
|
347
349
|
"Git reset failed (exit %d): %s",
|
|
@@ -458,6 +460,20 @@ def execute_config(
|
|
|
458
460
|
results are returned. Tasks with ``unknown`` or ``subscription``
|
|
459
461
|
cost models are skipped in accumulation.
|
|
460
462
|
"""
|
|
463
|
+
# Compute directories to exclude from git clean so that experiment
|
|
464
|
+
# artifacts (runs/, tasks/, experiment.json) survive between tasks.
|
|
465
|
+
# runs_dir is e.g. <repo>/mcp-comparison/runs/baseline — walk up to
|
|
466
|
+
# find the experiment root relative to repo_path.
|
|
467
|
+
_clean_excludes: tuple[str, ...] = ()
|
|
468
|
+
if runs_dir is not None:
|
|
469
|
+
try:
|
|
470
|
+
exp_root = runs_dir.resolve().parent.parent # runs/<label> → exp dir
|
|
471
|
+
rel = exp_root.relative_to(repo_path.resolve())
|
|
472
|
+
# Exclude the top-level experiment directory name
|
|
473
|
+
_clean_excludes = (str(rel).split("/")[0],)
|
|
474
|
+
except ValueError:
|
|
475
|
+
pass # experiment dir is outside the repo — nothing to exclude
|
|
476
|
+
|
|
461
477
|
checkpointed_ids, results = _restore_checkpointed(checkpoint_store)
|
|
462
478
|
|
|
463
479
|
# Filter checkpointed results to only include tasks in the current
|
|
@@ -562,7 +578,7 @@ def execute_config(
|
|
|
562
578
|
# Reset working directory between tasks so leftovers from
|
|
563
579
|
# task N don't corrupt task N+1's results.
|
|
564
580
|
if idx > 0:
|
|
565
|
-
_git_reset_workdir(repo_path)
|
|
581
|
+
_git_reset_workdir(repo_path, extra_excludes=_clean_excludes)
|
|
566
582
|
task_result = _run_one(task_dir, repeat_index=repeat_index)
|
|
567
583
|
_handle_result(task_result)
|
|
568
584
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeprobe
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
|
|
5
5
|
Author: codeprobe contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -551,7 +551,7 @@ def test_execute_config_resets_workdir_between_sequential_tasks(tmp_path: Path):
|
|
|
551
551
|
)
|
|
552
552
|
# Reset should be called between tasks (not before first), so 2 times for 3 tasks
|
|
553
553
|
assert mock_reset.call_count == 2
|
|
554
|
-
mock_reset.assert_any_call(Path("/repo"))
|
|
554
|
+
mock_reset.assert_any_call(Path("/repo"), extra_excludes=())
|
|
555
555
|
|
|
556
556
|
|
|
557
557
|
def test_execute_config_no_reset_in_parallel_mode(tmp_path: Path):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|