codeprobe 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {codeprobe-0.2.1 → codeprobe-0.2.3}/PKG-INFO +1 -1
  2. {codeprobe-0.2.1 → codeprobe-0.2.3}/pyproject.toml +1 -1
  3. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/run_cmd.py +14 -1
  4. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/executor.py +6 -3
  5. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/PKG-INFO +1 -1
  6. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_executor.py +2 -1
  7. {codeprobe-0.2.1 → codeprobe-0.2.3}/LICENSE +0 -0
  8. {codeprobe-0.2.1 → codeprobe-0.2.3}/README.md +0 -0
  9. {codeprobe-0.2.1 → codeprobe-0.2.3}/setup.cfg +0 -0
  10. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/__init__.py +0 -0
  11. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/__main__.py +0 -0
  12. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/__init__.py +0 -0
  13. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/_base.py +0 -0
  14. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/aider.py +0 -0
  15. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/claude.py +0 -0
  16. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/codex.py +0 -0
  17. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/copilot.py +0 -0
  18. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/openai_compat.py +0 -0
  19. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/protocol.py +0 -0
  20. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/session.py +0 -0
  21. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/adapters/telemetry.py +0 -0
  22. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/analysis/__init__.py +0 -0
  23. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/analysis/ranking.py +0 -0
  24. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/analysis/report.py +0 -0
  25. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/analysis/stats.py +0 -0
  26. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/api.py +0 -0
  27. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/assess/__init__.py +0 -0
  28. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/assess/heuristics.py +0 -0
  29. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/__init__.py +0 -0
  30. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/assess_cmd.py +0 -0
  31. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/experiment_cmd.py +0 -0
  32. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/init_cmd.py +0 -0
  33. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/interpret_cmd.py +0 -0
  34. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/mine_cmd.py +0 -0
  35. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/probe_cmd.py +0 -0
  36. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/ratings_cmd.py +0 -0
  37. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  38. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/wizard.py +0 -0
  39. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/cli/yaml_writer.py +0 -0
  40. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/config/__init__.py +0 -0
  41. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/config/loader.py +0 -0
  42. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/__init__.py +0 -0
  43. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/_shared.py +0 -0
  44. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/adaptive.py +0 -0
  45. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/counterfactual.py +0 -0
  46. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/debate.py +0 -0
  47. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/decision_tree.py +0 -0
  48. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/elo.py +0 -0
  49. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/fingerprint.py +0 -0
  50. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/mutation.py +0 -0
  51. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/pareto.py +0 -0
  52. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/sprt.py +0 -0
  53. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/contrib/tournament.py +0 -0
  54. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/__init__.py +0 -0
  55. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/checkpoint.py +0 -0
  56. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/experiment.py +0 -0
  57. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/isolation.py +0 -0
  58. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/llm.py +0 -0
  59. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/preamble.py +0 -0
  60. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/registry.py +0 -0
  61. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/sandbox.py +0 -0
  62. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/core/scoring.py +0 -0
  63. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/loaders/__init__.py +0 -0
  64. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/__init__.py +0 -0
  65. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/_lang.py +0 -0
  66. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/curator.py +0 -0
  67. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/curator_backends.py +0 -0
  68. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/curator_tiers.py +0 -0
  69. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/extractor.py +0 -0
  70. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/org_scale.py +0 -0
  71. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/org_scale_families.py +0 -0
  72. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  73. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  74. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/org_scale_validate.py +0 -0
  75. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  76. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/sources.py +0 -0
  77. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/mining/writer.py +0 -0
  78. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/models/__init__.py +0 -0
  79. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/models/evalrc.py +0 -0
  80. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/models/experiment.py +0 -0
  81. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/models/preamble.py +0 -0
  82. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/models/task.py +0 -0
  83. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/preambles/__init__.py +0 -0
  84. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/preambles/github.md +0 -0
  85. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/preambles/sourcegraph.md +0 -0
  86. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/probe/__init__.py +0 -0
  87. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/probe/generator.py +0 -0
  88. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/probe/writer.py +0 -0
  89. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/ratings/__init__.py +0 -0
  90. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/ratings/collector.py +0 -0
  91. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/scaffold/__init__.py +0 -0
  92. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/scaffold/writer.py +0 -0
  93. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/templates/__init__.py +0 -0
  94. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  95. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  96. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  97. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  98. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  99. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/entry_points.txt +0 -0
  100. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/requires.txt +0 -0
  101. {codeprobe-0.2.1 → codeprobe-0.2.3}/src/codeprobe.egg-info/top_level.txt +0 -0
  102. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_adapters.py +0 -0
  103. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_analysis.py +0 -0
  104. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_api.py +0 -0
  105. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_assess.py +0 -0
  106. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_changed_symbols.py +0 -0
  107. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_checkpoint.py +0 -0
  108. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_cli.py +0 -0
  109. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_config_loader.py +0 -0
  110. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_contrib.py +0 -0
  111. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_curator_backends.py +0 -0
  112. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_curator_core.py +0 -0
  113. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_curator_integration.py +0 -0
  114. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_curator_tiers.py +0 -0
  115. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_experiment_cmd.py +0 -0
  116. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_experiment_core.py +0 -0
  117. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_init_wizard.py +0 -0
  118. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_llm.py +0 -0
  119. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_loaders.py +0 -0
  120. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_mcp_families_mining.py +0 -0
  121. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_mcp_validate.py +0 -0
  122. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_mining.py +0 -0
  123. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_models.py +0 -0
  124. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_new_families.py +0 -0
  125. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_openai_compat.py +0 -0
  126. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_oracle_types.py +0 -0
  127. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_org_scale.py +0 -0
  128. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_pipeline_integration.py +0 -0
  129. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_preamble.py +0 -0
  130. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_probe.py +0 -0
  131. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_ratings.py +0 -0
  132. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_ratings_cmd.py +0 -0
  133. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_registry.py +0 -0
  134. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_scaffold.py +0 -0
  135. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_scanner_refactor.py +0 -0
  136. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_scoring.py +0 -0
  137. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_session.py +0 -0
  138. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_sg_ground_truth.py +0 -0
  139. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_telemetry.py +0 -0
  140. {codeprobe-0.2.1 → codeprobe-0.2.3}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -183,10 +183,22 @@ def run_eval(
183
183
 
184
184
  click.echo(f"\nRunning config: {exp_config.label} ({len(task_dirs)} tasks)")
185
185
 
186
+ # Compute directories to exclude from git clean between sequential
187
+ # tasks so the experiment dir (untracked) isn't deleted.
188
+ _clean_excludes: tuple[str, ...] = ()
189
+ resolved_repo = Path(path).resolve()
190
+ try:
191
+ rel = exp_dir.resolve().relative_to(resolved_repo)
192
+ top_dir = str(rel).split("/")[0]
193
+ if top_dir and top_dir != ".":
194
+ _clean_excludes = (top_dir,)
195
+ except ValueError:
196
+ pass # experiment dir is outside the repo
197
+
186
198
  results = execute_config(
187
199
  adapter=config_adapter,
188
200
  task_dirs=task_dirs,
189
- repo_path=Path(path).resolve(),
201
+ repo_path=resolved_repo,
190
202
  experiment_config=exp_config,
191
203
  agent_config=agent_config,
192
204
  checkpoint_store=checkpoint_store,
@@ -195,6 +207,7 @@ def run_eval(
195
207
  max_cost_usd=max_cost_usd,
196
208
  parallel=parallel,
197
209
  repeats=repeats,
210
+ clean_excludes=_clean_excludes,
198
211
  )
199
212
 
200
213
  if owns_sandbox:
@@ -334,14 +334,16 @@ def execute_task(
334
334
  _BILLABLE_COST_MODELS = frozenset({"per_token"})
335
335
 
336
336
 
337
- def _git_reset_workdir(repo_path: Path) -> None:
337
+ def _git_reset_workdir(
338
+ repo_path: Path, *, extra_excludes: tuple[str, ...] = ()
339
+ ) -> None:
338
340
  """Reset the working directory to a clean state between sequential tasks.
339
341
 
340
342
  Runs ``git restore .`` and ``git clean -fd`` to discard modifications
341
343
  and remove untracked files so task N's leftovers don't corrupt task N+1.
342
344
  """
343
345
  try:
344
- git_restore_clean(repo_path)
346
+ git_restore_clean(repo_path, extra_excludes=extra_excludes)
345
347
  except subprocess.CalledProcessError as exc:
346
348
  logger.warning(
347
349
  "Git reset failed (exit %d): %s",
@@ -441,6 +443,7 @@ def execute_config(
441
443
  parallel: int = 1,
442
444
  isolation: IsolationStrategy | None = None,
443
445
  repeats: int = 1,
446
+ clean_excludes: tuple[str, ...] = (),
444
447
  ) -> list[CompletedTask]:
445
448
  """Execute all tasks for a single experiment configuration.
446
449
 
@@ -562,7 +565,7 @@ def execute_config(
562
565
  # Reset working directory between tasks so leftovers from
563
566
  # task N don't corrupt task N+1's results.
564
567
  if idx > 0:
565
- _git_reset_workdir(repo_path)
568
+ _git_reset_workdir(repo_path, extra_excludes=clean_excludes)
566
569
  task_result = _run_one(task_dir, repeat_index=repeat_index)
567
570
  _handle_result(task_result)
568
571
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -551,7 +551,8 @@ def test_execute_config_resets_workdir_between_sequential_tasks(tmp_path: Path):
551
551
  )
552
552
  # Reset should be called between tasks (not before first), so 2 times for 3 tasks
553
553
  assert mock_reset.call_count == 2
554
- mock_reset.assert_any_call(Path("/repo"))
554
+ # First positional arg should be repo_path
555
+ assert mock_reset.call_args_list[0][0][0] == Path("/repo")
555
556
 
556
557
 
557
558
  def test_execute_config_no_reset_in_parallel_mode(tmp_path: Path):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes