codeprobe 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. {codeprobe-0.3.1 → codeprobe-0.3.3}/PKG-INFO +1 -1
  2. {codeprobe-0.3.1 → codeprobe-0.3.3}/pyproject.toml +1 -1
  3. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/executor.py +3 -1
  4. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/isolation.py +59 -11
  5. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/PKG-INFO +1 -1
  6. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_executor.py +16 -8
  7. {codeprobe-0.3.1 → codeprobe-0.3.3}/LICENSE +0 -0
  8. {codeprobe-0.3.1 → codeprobe-0.3.3}/README.md +0 -0
  9. {codeprobe-0.3.1 → codeprobe-0.3.3}/setup.cfg +0 -0
  10. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/__init__.py +0 -0
  11. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/__main__.py +0 -0
  12. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/__init__.py +0 -0
  13. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/_base.py +0 -0
  14. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/claude.py +0 -0
  15. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/codex.py +0 -0
  16. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/copilot.py +0 -0
  17. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/openai_compat.py +0 -0
  18. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/protocol.py +0 -0
  19. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/session.py +0 -0
  20. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/adapters/telemetry.py +0 -0
  21. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/analysis/__init__.py +0 -0
  22. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/analysis/ranking.py +0 -0
  23. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/analysis/report.py +0 -0
  24. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/analysis/stats.py +0 -0
  25. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/api.py +0 -0
  26. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/assess/__init__.py +0 -0
  27. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/assess/heuristics.py +0 -0
  28. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/__init__.py +0 -0
  29. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/assess_cmd.py +0 -0
  30. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/doctor_cmd.py +0 -0
  31. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/experiment_cmd.py +0 -0
  32. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/init_cmd.py +0 -0
  33. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/interpret_cmd.py +0 -0
  34. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/json_display.py +0 -0
  35. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/mine_cmd.py +0 -0
  36. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/preamble_cmd.py +0 -0
  37. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/probe_cmd.py +0 -0
  38. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/ratings_cmd.py +0 -0
  39. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/rich_display.py +0 -0
  40. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/run_cmd.py +0 -0
  41. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  42. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/validate_cmd.py +0 -0
  43. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/wizard.py +0 -0
  44. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/cli/yaml_writer.py +0 -0
  45. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/config/__init__.py +0 -0
  46. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/config/loader.py +0 -0
  47. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/__init__.py +0 -0
  48. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/_shared.py +0 -0
  49. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/adaptive.py +0 -0
  50. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/counterfactual.py +0 -0
  51. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/debate.py +0 -0
  52. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/decision_tree.py +0 -0
  53. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/elo.py +0 -0
  54. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/fingerprint.py +0 -0
  55. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/mutation.py +0 -0
  56. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/pareto.py +0 -0
  57. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/sprt.py +0 -0
  58. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/contrib/tournament.py +0 -0
  59. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/__init__.py +0 -0
  60. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/__main__.py +0 -0
  61. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/checkpoint.py +0 -0
  62. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/events.py +0 -0
  63. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/experiment.py +0 -0
  64. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/llm.py +0 -0
  65. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/mcp_discovery.py +0 -0
  66. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/preamble.py +0 -0
  67. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/registry.py +0 -0
  68. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/sandbox.py +0 -0
  69. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/core/scoring.py +0 -0
  70. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/loaders/__init__.py +0 -0
  71. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/loaders/suite.py +0 -0
  72. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/__init__.py +0 -0
  73. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/_graph.py +0 -0
  74. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/_lang.py +0 -0
  75. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/comprehension.py +0 -0
  76. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/comprehension_writer.py +0 -0
  77. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/curator.py +0 -0
  78. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/curator_backends.py +0 -0
  79. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/curator_tiers.py +0 -0
  80. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/extractor.py +0 -0
  81. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/org_scale.py +0 -0
  82. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/org_scale_families.py +0 -0
  83. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  84. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  85. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/org_scale_validate.py +0 -0
  86. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  87. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/sources.py +0 -0
  88. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/mining/writer.py +0 -0
  89. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/__init__.py +0 -0
  90. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/evalrc.py +0 -0
  91. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/experiment.py +0 -0
  92. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/preamble.py +0 -0
  93. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/suite.py +0 -0
  94. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/models/task.py +0 -0
  95. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/preambles/__init__.py +0 -0
  96. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/preambles/github.md +0 -0
  97. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/preambles/sourcegraph.md +0 -0
  98. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/probe/__init__.py +0 -0
  99. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/probe/adapter.py +0 -0
  100. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/probe/generator.py +0 -0
  101. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/probe/writer.py +0 -0
  102. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/ratings/__init__.py +0 -0
  103. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/ratings/collector.py +0 -0
  104. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/scaffold/__init__.py +0 -0
  105. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/scaffold/writer.py +0 -0
  106. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/templates/__init__.py +0 -0
  107. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  108. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  109. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  110. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  111. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  112. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/entry_points.txt +0 -0
  113. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/requires.txt +0 -0
  114. {codeprobe-0.3.1 → codeprobe-0.3.3}/src/codeprobe.egg-info/top_level.txt +0 -0
  115. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_adapter_contracts.py +0 -0
  116. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_adapters.py +0 -0
  117. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_analysis.py +0 -0
  118. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_api.py +0 -0
  119. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_artifact_scorer.py +0 -0
  120. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_assess.py +0 -0
  121. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_changed_symbols.py +0 -0
  122. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_checkpoint.py +0 -0
  123. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_checkpoint_scoring.py +0 -0
  124. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_cli.py +0 -0
  125. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_comprehension.py +0 -0
  126. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_config_loader.py +0 -0
  127. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_contrib.py +0 -0
  128. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_ctrlc_integration.py +0 -0
  129. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_curator_backends.py +0 -0
  130. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_curator_core.py +0 -0
  131. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_curator_integration.py +0 -0
  132. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_curator_tiers.py +0 -0
  133. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_doctor_cmd.py +0 -0
  134. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_events.py +0 -0
  135. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_executor_events.py +0 -0
  136. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_experiment_cmd.py +0 -0
  137. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_experiment_core.py +0 -0
  138. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_init_wizard.py +0 -0
  139. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_json_display.py +0 -0
  140. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_llm.py +0 -0
  141. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_loaders.py +0 -0
  142. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mcp_families_mining.py +0 -0
  143. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mcp_validate.py +0 -0
  144. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mine_goals.py +0 -0
  145. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mine_presets.py +0 -0
  146. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mine_profiles.py +0 -0
  147. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_mining.py +0 -0
  148. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_models.py +0 -0
  149. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_new_families.py +0 -0
  150. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_openai_compat.py +0 -0
  151. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_oracle_types.py +0 -0
  152. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_org_scale.py +0 -0
  153. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_pipeline_integration.py +0 -0
  154. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_preamble.py +0 -0
  155. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_preamble_cmd.py +0 -0
  156. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_probe.py +0 -0
  157. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_probe_adapter.py +0 -0
  158. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_ratings.py +0 -0
  159. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_ratings_cmd.py +0 -0
  160. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_registry.py +0 -0
  161. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_run_config_resolution.py +0 -0
  162. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_scaffold.py +0 -0
  163. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_scanner_refactor.py +0 -0
  164. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_scoring.py +0 -0
  165. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_session.py +0 -0
  166. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_sg_ground_truth.py +0 -0
  167. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_shell_shim.py +0 -0
  168. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_show_prompt.py +0 -0
  169. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_suite.py +0 -0
  170. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_telemetry.py +0 -0
  171. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_validate_cmd.py +0 -0
  172. {codeprobe-0.3.1 → codeprobe-0.3.3}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.3.1"
3
+ version = "0.3.3"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -700,7 +700,9 @@ def execute_config(
700
700
  owns_isolation = False
701
701
  active_isolation = isolation
702
702
  if active_isolation is None:
703
- active_isolation = WorktreeIsolation(repo_path, pool_size=workers)
703
+ active_isolation = WorktreeIsolation(
704
+ repo_path, pool_size=workers, namespace=experiment_config.label
705
+ )
704
706
  owns_isolation = True
705
707
 
706
708
  def _run_isolated(task_dir: Path, repeat_index: int) -> TaskResult:
@@ -53,7 +53,7 @@ def git_restore_clean(workdir: Path, *, extra_excludes: tuple[str, ...] = ()) ->
53
53
  "-e",
54
54
  ".codeprobe",
55
55
  "-e",
56
- ".codeprobe-worktrees",
56
+ ".codeprobe-worktrees*",
57
57
  ]
58
58
  # Auto-discover experiment directories inside the repo
59
59
  for exp_dir in _discover_experiment_dirs(workdir):
@@ -91,12 +91,15 @@ class WorktreeIsolation:
91
91
  slot is free, ``release()`` resets and returns the slot to the pool.
92
92
  """
93
93
 
94
- def __init__(self, repo_path: Path, pool_size: int) -> None:
94
+ def __init__(self, repo_path: Path, pool_size: int, namespace: str = "") -> None:
95
95
  if pool_size < 1:
96
96
  raise ValueError(f"pool_size must be >= 1, got {pool_size}")
97
97
  self._repo_path = repo_path.resolve()
98
98
  self._pool_size = pool_size
99
- self._base_dir = self._repo_path / ".codeprobe-worktrees"
99
+ base_name = ".codeprobe-worktrees"
100
+ if namespace:
101
+ base_name = f"{base_name}-{namespace}"
102
+ self._base_dir = self._repo_path / base_name
100
103
  self._available: queue.Queue[Path] = queue.Queue()
101
104
  self._all_paths: list[Path] = []
102
105
  self._lock = threading.Lock()
@@ -107,20 +110,49 @@ class WorktreeIsolation:
107
110
  with self._lock:
108
111
  if self._created:
109
112
  return
113
+ # Prune stale worktree records left by previous interrupted runs
114
+ subprocess.run(
115
+ ["git", "worktree", "prune"],
116
+ cwd=self._repo_path,
117
+ capture_output=True,
118
+ )
110
119
  self._base_dir.mkdir(parents=True, exist_ok=True)
111
120
  for i in range(self._pool_size):
112
121
  wt_path = self._base_dir / f"slot-{i}"
113
122
  if not wt_path.exists():
114
- subprocess.run(
115
- ["git", "worktree", "add", "--detach", str(wt_path)],
116
- cwd=self._repo_path,
117
- check=True,
118
- capture_output=True,
119
- )
123
+ self._add_worktree(wt_path)
120
124
  self._all_paths.append(wt_path)
121
125
  self._available.put(wt_path)
122
126
  self._created = True
123
127
 
128
+ def _add_worktree(self, wt_path: Path) -> None:
129
+ """Add a detached worktree, recovering from stale git state."""
130
+ try:
131
+ subprocess.run(
132
+ ["git", "worktree", "add", "--detach", str(wt_path)],
133
+ cwd=self._repo_path,
134
+ check=True,
135
+ capture_output=True,
136
+ )
137
+ except subprocess.CalledProcessError:
138
+ # Stale record may remain even after prune — force-remove and retry
139
+ subprocess.run(
140
+ ["git", "worktree", "remove", "--force", str(wt_path)],
141
+ cwd=self._repo_path,
142
+ capture_output=True,
143
+ )
144
+ subprocess.run(
145
+ ["git", "worktree", "prune"],
146
+ cwd=self._repo_path,
147
+ capture_output=True,
148
+ )
149
+ subprocess.run(
150
+ ["git", "worktree", "add", "--detach", str(wt_path)],
151
+ cwd=self._repo_path,
152
+ check=True,
153
+ capture_output=True,
154
+ )
155
+
124
156
  def acquire(self) -> Path:
125
157
  """Get a worktree from the pool (blocks until available)."""
126
158
  if not self._created:
@@ -148,6 +180,8 @@ class WorktreeIsolation:
148
180
 
149
181
  def cleanup(self) -> None:
150
182
  """Remove all managed worktrees."""
183
+ import shutil
184
+
151
185
  for wt_path in self._all_paths:
152
186
  try:
153
187
  subprocess.run(
@@ -156,9 +190,23 @@ class WorktreeIsolation:
156
190
  check=True,
157
191
  capture_output=True,
158
192
  )
159
- except (subprocess.CalledProcessError, OSError) as exc:
160
- logger.warning("Failed to remove worktree %s: %s", wt_path, exc)
193
+ except (subprocess.CalledProcessError, OSError):
194
+ # Force-remove failed delete the directory and let prune
195
+ # clean up git's internal records.
196
+ try:
197
+ if wt_path.exists():
198
+ shutil.rmtree(wt_path)
199
+ except OSError as rm_exc:
200
+ logger.warning(
201
+ "Failed to remove worktree dir %s: %s", wt_path, rm_exc
202
+ )
161
203
  self._all_paths.clear()
204
+ # Prune any stale records so future runs start clean
205
+ subprocess.run(
206
+ ["git", "worktree", "prune"],
207
+ cwd=self._repo_path,
208
+ capture_output=True,
209
+ )
162
210
  # Clean up base directory if empty
163
211
  try:
164
212
  if self._base_dir.exists() and not any(self._base_dir.iterdir()):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -562,11 +562,16 @@ def test_execute_config_no_reset_in_parallel_mode(tmp_path: Path):
562
562
  exp_config = ExperimentConfig(label="baseline")
563
563
  agent_config = AgentConfig()
564
564
 
565
- with patch("codeprobe.core.executor._git_reset_workdir") as mock_reset:
565
+ fake_iso = MagicMock()
566
+ fake_iso.acquire.return_value = tmp_path
567
+ with (
568
+ patch("codeprobe.core.executor._git_reset_workdir") as mock_reset,
569
+ patch("codeprobe.core.executor.WorktreeIsolation", return_value=fake_iso),
570
+ ):
566
571
  execute_config(
567
572
  adapter=adapter,
568
573
  task_dirs=tasks,
569
- repo_path=Path("/repo"),
574
+ repo_path=tmp_path,
570
575
  experiment_config=exp_config,
571
576
  agent_config=agent_config,
572
577
  parallel=3,
@@ -604,9 +609,10 @@ class TestWorktreeIsolation:
604
609
  # Force pool creation by acquiring
605
610
  iso._base_dir.mkdir(parents=True, exist_ok=True)
606
611
  iso._create_pool()
607
- # Should call git worktree add twice
608
- assert mock_run.call_count == 2
609
- for c in mock_run.call_args_list:
612
+ # Should call git worktree prune once + git worktree add twice
613
+ assert mock_run.call_count == 3
614
+ assert mock_run.call_args_list[0][0][0] == ["git", "worktree", "prune"]
615
+ for c in mock_run.call_args_list[1:]:
610
616
  assert c[0][0][0:3] == ["git", "worktree", "add"]
611
617
 
612
618
  def test_acquire_returns_path(self, tmp_path: Path) -> None:
@@ -635,7 +641,7 @@ class TestWorktreeIsolation:
635
641
  "-e",
636
642
  ".codeprobe",
637
643
  "-e",
638
- ".codeprobe-worktrees",
644
+ ".codeprobe-worktrees*",
639
645
  ]
640
646
 
641
647
  def test_release_resets_and_returns_to_pool(self, tmp_path: Path) -> None:
@@ -656,9 +662,11 @@ class TestWorktreeIsolation:
656
662
  iso._create_pool()
657
663
  with patch("subprocess.run") as mock_run:
658
664
  iso.cleanup()
659
- assert mock_run.call_count == 2
660
- for c in mock_run.call_args_list:
665
+ # 2 worktree removes + 1 prune
666
+ assert mock_run.call_count == 3
667
+ for c in mock_run.call_args_list[:2]:
661
668
  assert c[0][0][0:3] == ["git", "worktree", "remove"]
669
+ assert mock_run.call_args_list[2][0][0] == ["git", "worktree", "prune"]
662
670
 
663
671
  def test_pool_size_validation(self) -> None:
664
672
  """pool_size must be >= 1."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes