codeprobe 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {codeprobe-0.2.2 → codeprobe-0.2.4}/PKG-INFO +1 -1
  2. {codeprobe-0.2.2 → codeprobe-0.2.4}/pyproject.toml +1 -1
  3. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/run_cmd.py +14 -1
  4. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/executor.py +2 -15
  5. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/isolation.py +21 -2
  6. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/PKG-INFO +1 -1
  7. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_executor.py +2 -1
  8. {codeprobe-0.2.2 → codeprobe-0.2.4}/LICENSE +0 -0
  9. {codeprobe-0.2.2 → codeprobe-0.2.4}/README.md +0 -0
  10. {codeprobe-0.2.2 → codeprobe-0.2.4}/setup.cfg +0 -0
  11. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/__init__.py +0 -0
  12. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/__main__.py +0 -0
  13. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/__init__.py +0 -0
  14. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/_base.py +0 -0
  15. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/aider.py +0 -0
  16. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/claude.py +0 -0
  17. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/codex.py +0 -0
  18. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/copilot.py +0 -0
  19. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/openai_compat.py +0 -0
  20. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/protocol.py +0 -0
  21. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/session.py +0 -0
  22. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/adapters/telemetry.py +0 -0
  23. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/analysis/__init__.py +0 -0
  24. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/analysis/ranking.py +0 -0
  25. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/analysis/report.py +0 -0
  26. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/analysis/stats.py +0 -0
  27. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/api.py +0 -0
  28. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/assess/__init__.py +0 -0
  29. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/assess/heuristics.py +0 -0
  30. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/__init__.py +0 -0
  31. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/assess_cmd.py +0 -0
  32. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/experiment_cmd.py +0 -0
  33. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/init_cmd.py +0 -0
  34. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/interpret_cmd.py +0 -0
  35. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/mine_cmd.py +0 -0
  36. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/probe_cmd.py +0 -0
  37. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/ratings_cmd.py +0 -0
  38. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  39. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/wizard.py +0 -0
  40. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/cli/yaml_writer.py +0 -0
  41. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/config/__init__.py +0 -0
  42. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/config/loader.py +0 -0
  43. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/__init__.py +0 -0
  44. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/_shared.py +0 -0
  45. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/adaptive.py +0 -0
  46. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/counterfactual.py +0 -0
  47. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/debate.py +0 -0
  48. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/decision_tree.py +0 -0
  49. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/elo.py +0 -0
  50. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/fingerprint.py +0 -0
  51. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/mutation.py +0 -0
  52. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/pareto.py +0 -0
  53. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/sprt.py +0 -0
  54. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/contrib/tournament.py +0 -0
  55. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/__init__.py +0 -0
  56. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/checkpoint.py +0 -0
  57. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/experiment.py +0 -0
  58. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/llm.py +0 -0
  59. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/preamble.py +0 -0
  60. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/registry.py +0 -0
  61. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/sandbox.py +0 -0
  62. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/core/scoring.py +0 -0
  63. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/loaders/__init__.py +0 -0
  64. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/__init__.py +0 -0
  65. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/_lang.py +0 -0
  66. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/curator.py +0 -0
  67. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/curator_backends.py +0 -0
  68. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/curator_tiers.py +0 -0
  69. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/extractor.py +0 -0
  70. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/org_scale.py +0 -0
  71. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/org_scale_families.py +0 -0
  72. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  73. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  74. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/org_scale_validate.py +0 -0
  75. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  76. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/sources.py +0 -0
  77. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/mining/writer.py +0 -0
  78. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/models/__init__.py +0 -0
  79. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/models/evalrc.py +0 -0
  80. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/models/experiment.py +0 -0
  81. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/models/preamble.py +0 -0
  82. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/models/task.py +0 -0
  83. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/preambles/__init__.py +0 -0
  84. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/preambles/github.md +0 -0
  85. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/preambles/sourcegraph.md +0 -0
  86. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/probe/__init__.py +0 -0
  87. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/probe/generator.py +0 -0
  88. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/probe/writer.py +0 -0
  89. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/ratings/__init__.py +0 -0
  90. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/ratings/collector.py +0 -0
  91. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/scaffold/__init__.py +0 -0
  92. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/scaffold/writer.py +0 -0
  93. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/templates/__init__.py +0 -0
  94. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  95. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  96. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  97. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  98. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  99. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/entry_points.txt +0 -0
  100. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/requires.txt +0 -0
  101. {codeprobe-0.2.2 → codeprobe-0.2.4}/src/codeprobe.egg-info/top_level.txt +0 -0
  102. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_adapters.py +0 -0
  103. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_analysis.py +0 -0
  104. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_api.py +0 -0
  105. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_assess.py +0 -0
  106. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_changed_symbols.py +0 -0
  107. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_checkpoint.py +0 -0
  108. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_cli.py +0 -0
  109. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_config_loader.py +0 -0
  110. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_contrib.py +0 -0
  111. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_curator_backends.py +0 -0
  112. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_curator_core.py +0 -0
  113. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_curator_integration.py +0 -0
  114. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_curator_tiers.py +0 -0
  115. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_experiment_cmd.py +0 -0
  116. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_experiment_core.py +0 -0
  117. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_init_wizard.py +0 -0
  118. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_llm.py +0 -0
  119. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_loaders.py +0 -0
  120. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_mcp_families_mining.py +0 -0
  121. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_mcp_validate.py +0 -0
  122. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_mining.py +0 -0
  123. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_models.py +0 -0
  124. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_new_families.py +0 -0
  125. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_openai_compat.py +0 -0
  126. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_oracle_types.py +0 -0
  127. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_org_scale.py +0 -0
  128. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_pipeline_integration.py +0 -0
  129. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_preamble.py +0 -0
  130. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_probe.py +0 -0
  131. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_ratings.py +0 -0
  132. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_ratings_cmd.py +0 -0
  133. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_registry.py +0 -0
  134. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_scaffold.py +0 -0
  135. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_scanner_refactor.py +0 -0
  136. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_scoring.py +0 -0
  137. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_session.py +0 -0
  138. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_sg_ground_truth.py +0 -0
  139. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_telemetry.py +0 -0
  140. {codeprobe-0.2.2 → codeprobe-0.2.4}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.2.2"
3
+ version = "0.2.4"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -183,10 +183,22 @@ def run_eval(
183
183
 
184
184
  click.echo(f"\nRunning config: {exp_config.label} ({len(task_dirs)} tasks)")
185
185
 
186
+ # Compute directories to exclude from git clean between sequential
187
+ # tasks so the experiment dir (untracked) isn't deleted.
188
+ _clean_excludes: tuple[str, ...] = ()
189
+ resolved_repo = Path(path).resolve()
190
+ try:
191
+ rel = exp_dir.resolve().relative_to(resolved_repo)
192
+ top_dir = str(rel).split("/")[0]
193
+ if top_dir and top_dir != ".":
194
+ _clean_excludes = (top_dir,)
195
+ except ValueError:
196
+ pass # experiment dir is outside the repo
197
+
186
198
  results = execute_config(
187
199
  adapter=config_adapter,
188
200
  task_dirs=task_dirs,
189
- repo_path=Path(path).resolve(),
201
+ repo_path=resolved_repo,
190
202
  experiment_config=exp_config,
191
203
  agent_config=agent_config,
192
204
  checkpoint_store=checkpoint_store,
@@ -195,6 +207,7 @@ def run_eval(
195
207
  max_cost_usd=max_cost_usd,
196
208
  parallel=parallel,
197
209
  repeats=repeats,
210
+ clean_excludes=_clean_excludes,
198
211
  )
199
212
 
200
213
  if owns_sandbox:
@@ -443,6 +443,7 @@ def execute_config(
443
443
  parallel: int = 1,
444
444
  isolation: IsolationStrategy | None = None,
445
445
  repeats: int = 1,
446
+ clean_excludes: tuple[str, ...] = (),
446
447
  ) -> list[CompletedTask]:
447
448
  """Execute all tasks for a single experiment configuration.
448
449
 
@@ -460,20 +461,6 @@ def execute_config(
460
461
  results are returned. Tasks with ``unknown`` or ``subscription``
461
462
  cost models are skipped in accumulation.
462
463
  """
463
- # Compute directories to exclude from git clean so that experiment
464
- # artifacts (runs/, tasks/, experiment.json) survive between tasks.
465
- # runs_dir is e.g. <repo>/mcp-comparison/runs/baseline — walk up to
466
- # find the experiment root relative to repo_path.
467
- _clean_excludes: tuple[str, ...] = ()
468
- if runs_dir is not None:
469
- try:
470
- exp_root = runs_dir.resolve().parent.parent # runs/<label> → exp dir
471
- rel = exp_root.relative_to(repo_path.resolve())
472
- # Exclude the top-level experiment directory name
473
- _clean_excludes = (str(rel).split("/")[0],)
474
- except ValueError:
475
- pass # experiment dir is outside the repo — nothing to exclude
476
-
477
464
  checkpointed_ids, results = _restore_checkpointed(checkpoint_store)
478
465
 
479
466
  # Filter checkpointed results to only include tasks in the current
@@ -578,7 +565,7 @@ def execute_config(
578
565
  # Reset working directory between tasks so leftovers from
579
566
  # task N don't corrupt task N+1's results.
580
567
  if idx > 0:
581
- _git_reset_workdir(repo_path, extra_excludes=_clean_excludes)
568
+ _git_reset_workdir(repo_path, extra_excludes=clean_excludes)
582
569
  task_result = _run_one(task_dir, repeat_index=repeat_index)
583
570
  _handle_result(task_result)
584
571
  else:
@@ -12,12 +12,28 @@ from typing import Protocol, runtime_checkable
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
+ def _discover_experiment_dirs(workdir: Path) -> list[str]:
16
+ """Find top-level directories that contain an experiment.json.
17
+
18
+ These are codeprobe experiment directories that must survive git clean.
19
+ """
20
+ excludes: list[str] = []
21
+ try:
22
+ for entry in workdir.iterdir():
23
+ if entry.is_dir() and (entry / "experiment.json").is_file():
24
+ excludes.append(entry.name)
25
+ except OSError:
26
+ pass
27
+ return excludes
28
+
29
+
15
30
  def git_restore_clean(workdir: Path, *, extra_excludes: tuple[str, ...] = ()) -> None:
16
31
  """Restore tracked files and remove untracked files in *workdir*.
17
32
 
18
33
  Uses ``git restore .`` (tolerant of empty diffs) followed by
19
- ``git clean -fd``. Always excludes ``.codeprobe`` and
20
- ``.codeprobe-worktrees``; pass *extra_excludes* for more.
34
+ ``git clean -fd``. Always excludes ``.codeprobe``,
35
+ ``.codeprobe-worktrees``, and any directories containing
36
+ ``experiment.json`` (codeprobe experiment dirs).
21
37
  """
22
38
  result = subprocess.run(
23
39
  ["git", "restore", "."],
@@ -39,6 +55,9 @@ def git_restore_clean(workdir: Path, *, extra_excludes: tuple[str, ...] = ()) ->
39
55
  "-e",
40
56
  ".codeprobe-worktrees",
41
57
  ]
58
+ # Auto-discover experiment directories inside the repo
59
+ for exp_dir in _discover_experiment_dirs(workdir):
60
+ clean_cmd += ["-e", exp_dir]
42
61
  for exc in extra_excludes:
43
62
  clean_cmd += ["-e", exc]
44
63
  subprocess.run(clean_cmd, cwd=workdir, check=True, capture_output=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -551,7 +551,8 @@ def test_execute_config_resets_workdir_between_sequential_tasks(tmp_path: Path):
551
551
  )
552
552
  # Reset should be called between tasks (not before first), so 2 times for 3 tasks
553
553
  assert mock_reset.call_count == 2
554
- mock_reset.assert_any_call(Path("/repo"), extra_excludes=())
554
+ # First positional arg should be repo_path
555
+ assert mock_reset.call_args_list[0][0][0] == Path("/repo")
555
556
 
556
557
 
557
558
  def test_execute_config_no_reset_in_parallel_mode(tmp_path: Path):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes