codeprobe 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {codeprobe-0.2.1 → codeprobe-0.2.2}/PKG-INFO +1 -1
  2. {codeprobe-0.2.1 → codeprobe-0.2.2}/pyproject.toml +1 -1
  3. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/executor.py +19 -3
  4. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/PKG-INFO +1 -1
  5. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_executor.py +1 -1
  6. {codeprobe-0.2.1 → codeprobe-0.2.2}/LICENSE +0 -0
  7. {codeprobe-0.2.1 → codeprobe-0.2.2}/README.md +0 -0
  8. {codeprobe-0.2.1 → codeprobe-0.2.2}/setup.cfg +0 -0
  9. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/__init__.py +0 -0
  10. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/__main__.py +0 -0
  11. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/__init__.py +0 -0
  12. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/_base.py +0 -0
  13. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/aider.py +0 -0
  14. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/claude.py +0 -0
  15. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/codex.py +0 -0
  16. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/copilot.py +0 -0
  17. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/openai_compat.py +0 -0
  18. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/protocol.py +0 -0
  19. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/session.py +0 -0
  20. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/adapters/telemetry.py +0 -0
  21. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/__init__.py +0 -0
  22. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/ranking.py +0 -0
  23. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/report.py +0 -0
  24. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/analysis/stats.py +0 -0
  25. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/api.py +0 -0
  26. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/assess/__init__.py +0 -0
  27. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/assess/heuristics.py +0 -0
  28. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/__init__.py +0 -0
  29. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/assess_cmd.py +0 -0
  30. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/experiment_cmd.py +0 -0
  31. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/init_cmd.py +0 -0
  32. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/interpret_cmd.py +0 -0
  33. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/mine_cmd.py +0 -0
  34. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/probe_cmd.py +0 -0
  35. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/ratings_cmd.py +0 -0
  36. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/run_cmd.py +0 -0
  37. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  38. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/wizard.py +0 -0
  39. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/cli/yaml_writer.py +0 -0
  40. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/config/__init__.py +0 -0
  41. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/config/loader.py +0 -0
  42. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/__init__.py +0 -0
  43. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/_shared.py +0 -0
  44. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/adaptive.py +0 -0
  45. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/counterfactual.py +0 -0
  46. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/debate.py +0 -0
  47. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/decision_tree.py +0 -0
  48. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/elo.py +0 -0
  49. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/fingerprint.py +0 -0
  50. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/mutation.py +0 -0
  51. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/pareto.py +0 -0
  52. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/sprt.py +0 -0
  53. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/contrib/tournament.py +0 -0
  54. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/__init__.py +0 -0
  55. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/checkpoint.py +0 -0
  56. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/experiment.py +0 -0
  57. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/isolation.py +0 -0
  58. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/llm.py +0 -0
  59. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/preamble.py +0 -0
  60. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/registry.py +0 -0
  61. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/sandbox.py +0 -0
  62. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/core/scoring.py +0 -0
  63. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/loaders/__init__.py +0 -0
  64. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/__init__.py +0 -0
  65. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/_lang.py +0 -0
  66. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator.py +0 -0
  67. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator_backends.py +0 -0
  68. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/curator_tiers.py +0 -0
  69. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/extractor.py +0 -0
  70. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale.py +0 -0
  71. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_families.py +0 -0
  72. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  73. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  74. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/org_scale_validate.py +0 -0
  75. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  76. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/sources.py +0 -0
  77. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/mining/writer.py +0 -0
  78. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/__init__.py +0 -0
  79. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/evalrc.py +0 -0
  80. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/experiment.py +0 -0
  81. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/preamble.py +0 -0
  82. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/models/task.py +0 -0
  83. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/__init__.py +0 -0
  84. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/github.md +0 -0
  85. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/preambles/sourcegraph.md +0 -0
  86. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/__init__.py +0 -0
  87. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/generator.py +0 -0
  88. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/probe/writer.py +0 -0
  89. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/ratings/__init__.py +0 -0
  90. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/ratings/collector.py +0 -0
  91. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/scaffold/__init__.py +0 -0
  92. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/scaffold/writer.py +0 -0
  93. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/__init__.py +0 -0
  94. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  95. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  96. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  97. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  98. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  99. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/entry_points.txt +0 -0
  100. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/requires.txt +0 -0
  101. {codeprobe-0.2.1 → codeprobe-0.2.2}/src/codeprobe.egg-info/top_level.txt +0 -0
  102. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_adapters.py +0 -0
  103. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_analysis.py +0 -0
  104. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_api.py +0 -0
  105. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_assess.py +0 -0
  106. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_changed_symbols.py +0 -0
  107. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_checkpoint.py +0 -0
  108. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_cli.py +0 -0
  109. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_config_loader.py +0 -0
  110. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_contrib.py +0 -0
  111. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_backends.py +0 -0
  112. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_core.py +0 -0
  113. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_integration.py +0 -0
  114. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_curator_tiers.py +0 -0
  115. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_experiment_cmd.py +0 -0
  116. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_experiment_core.py +0 -0
  117. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_init_wizard.py +0 -0
  118. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_llm.py +0 -0
  119. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_loaders.py +0 -0
  120. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mcp_families_mining.py +0 -0
  121. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mcp_validate.py +0 -0
  122. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_mining.py +0 -0
  123. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_models.py +0 -0
  124. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_new_families.py +0 -0
  125. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_openai_compat.py +0 -0
  126. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_oracle_types.py +0 -0
  127. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_org_scale.py +0 -0
  128. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_pipeline_integration.py +0 -0
  129. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_preamble.py +0 -0
  130. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_probe.py +0 -0
  131. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_ratings.py +0 -0
  132. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_ratings_cmd.py +0 -0
  133. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_registry.py +0 -0
  134. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scaffold.py +0 -0
  135. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scanner_refactor.py +0 -0
  136. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_scoring.py +0 -0
  137. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_session.py +0 -0
  138. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_sg_ground_truth.py +0 -0
  139. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_telemetry.py +0 -0
  140. {codeprobe-0.2.1 → codeprobe-0.2.2}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -334,14 +334,16 @@ def execute_task(
334
334
  _BILLABLE_COST_MODELS = frozenset({"per_token"})
335
335
 
336
336
 
337
- def _git_reset_workdir(repo_path: Path) -> None:
337
+ def _git_reset_workdir(
338
+ repo_path: Path, *, extra_excludes: tuple[str, ...] = ()
339
+ ) -> None:
338
340
  """Reset the working directory to a clean state between sequential tasks.
339
341
 
340
342
  Runs ``git restore .`` and ``git clean -fd`` to discard modifications
341
343
  and remove untracked files so task N's leftovers don't corrupt task N+1.
342
344
  """
343
345
  try:
344
- git_restore_clean(repo_path)
346
+ git_restore_clean(repo_path, extra_excludes=extra_excludes)
345
347
  except subprocess.CalledProcessError as exc:
346
348
  logger.warning(
347
349
  "Git reset failed (exit %d): %s",
@@ -458,6 +460,20 @@ def execute_config(
458
460
  results are returned. Tasks with ``unknown`` or ``subscription``
459
461
  cost models are skipped in accumulation.
460
462
  """
463
+ # Compute directories to exclude from git clean so that experiment
464
+ # artifacts (runs/, tasks/, experiment.json) survive between tasks.
465
+ # runs_dir is e.g. <repo>/mcp-comparison/runs/baseline — walk up to
466
+ # find the experiment root relative to repo_path.
467
+ _clean_excludes: tuple[str, ...] = ()
468
+ if runs_dir is not None:
469
+ try:
470
+ exp_root = runs_dir.resolve().parent.parent # runs/<label> → exp dir
471
+ rel = exp_root.relative_to(repo_path.resolve())
472
+ # Exclude the top-level experiment directory name
473
+ _clean_excludes = (str(rel).split("/")[0],)
474
+ except ValueError:
475
+ pass # experiment dir is outside the repo — nothing to exclude
476
+
461
477
  checkpointed_ids, results = _restore_checkpointed(checkpoint_store)
462
478
 
463
479
  # Filter checkpointed results to only include tasks in the current
@@ -562,7 +578,7 @@ def execute_config(
562
578
  # Reset working directory between tasks so leftovers from
563
579
  # task N don't corrupt task N+1's results.
564
580
  if idx > 0:
565
- _git_reset_workdir(repo_path)
581
+ _git_reset_workdir(repo_path, extra_excludes=_clean_excludes)
566
582
  task_result = _run_one(task_dir, repeat_index=repeat_index)
567
583
  _handle_result(task_result)
568
584
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -551,7 +551,7 @@ def test_execute_config_resets_workdir_between_sequential_tasks(tmp_path: Path):
551
551
  )
552
552
  # Reset should be called between tasks (not before first), so 2 times for 3 tasks
553
553
  assert mock_reset.call_count == 2
554
- mock_reset.assert_any_call(Path("/repo"))
554
+ mock_reset.assert_any_call(Path("/repo"), extra_excludes=())
555
555
 
556
556
 
557
557
  def test_execute_config_no_reset_in_parallel_mode(tmp_path: Path):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes