codeprobe 0.2.7__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {codeprobe-0.2.7 → codeprobe-0.2.8}/PKG-INFO +1 -1
  2. {codeprobe-0.2.7 → codeprobe-0.2.8}/pyproject.toml +1 -1
  3. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/__init__.py +1 -1
  4. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/executor.py +7 -0
  5. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/writer.py +2 -2
  6. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/PKG-INFO +1 -1
  7. {codeprobe-0.2.7 → codeprobe-0.2.8}/LICENSE +0 -0
  8. {codeprobe-0.2.7 → codeprobe-0.2.8}/README.md +0 -0
  9. {codeprobe-0.2.7 → codeprobe-0.2.8}/setup.cfg +0 -0
  10. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/__main__.py +0 -0
  11. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/__init__.py +0 -0
  12. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/_base.py +0 -0
  13. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/aider.py +0 -0
  14. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/claude.py +0 -0
  15. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/codex.py +0 -0
  16. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/copilot.py +0 -0
  17. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/openai_compat.py +0 -0
  18. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/protocol.py +0 -0
  19. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/session.py +0 -0
  20. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/adapters/telemetry.py +0 -0
  21. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/__init__.py +0 -0
  22. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/ranking.py +0 -0
  23. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/report.py +0 -0
  24. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/analysis/stats.py +0 -0
  25. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/api.py +0 -0
  26. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/assess/__init__.py +0 -0
  27. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/assess/heuristics.py +0 -0
  28. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/__init__.py +0 -0
  29. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/assess_cmd.py +0 -0
  30. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/experiment_cmd.py +0 -0
  31. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/init_cmd.py +0 -0
  32. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/interpret_cmd.py +0 -0
  33. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/mine_cmd.py +0 -0
  34. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/probe_cmd.py +0 -0
  35. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/ratings_cmd.py +0 -0
  36. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/run_cmd.py +0 -0
  37. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  38. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/wizard.py +0 -0
  39. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/cli/yaml_writer.py +0 -0
  40. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/config/__init__.py +0 -0
  41. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/config/loader.py +0 -0
  42. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/__init__.py +0 -0
  43. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/_shared.py +0 -0
  44. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/adaptive.py +0 -0
  45. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/counterfactual.py +0 -0
  46. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/debate.py +0 -0
  47. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/decision_tree.py +0 -0
  48. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/elo.py +0 -0
  49. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/fingerprint.py +0 -0
  50. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/mutation.py +0 -0
  51. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/pareto.py +0 -0
  52. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/sprt.py +0 -0
  53. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/contrib/tournament.py +0 -0
  54. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/__init__.py +0 -0
  55. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/checkpoint.py +0 -0
  56. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/experiment.py +0 -0
  57. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/isolation.py +0 -0
  58. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/llm.py +0 -0
  59. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/preamble.py +0 -0
  60. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/registry.py +0 -0
  61. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/sandbox.py +0 -0
  62. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/core/scoring.py +0 -0
  63. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/loaders/__init__.py +0 -0
  64. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/__init__.py +0 -0
  65. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/_lang.py +0 -0
  66. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator.py +0 -0
  67. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator_backends.py +0 -0
  68. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/curator_tiers.py +0 -0
  69. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/extractor.py +0 -0
  70. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale.py +0 -0
  71. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_families.py +0 -0
  72. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  73. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  74. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/org_scale_validate.py +0 -0
  75. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  76. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/mining/sources.py +0 -0
  77. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/__init__.py +0 -0
  78. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/evalrc.py +0 -0
  79. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/experiment.py +0 -0
  80. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/preamble.py +0 -0
  81. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/models/task.py +0 -0
  82. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/__init__.py +0 -0
  83. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/github.md +0 -0
  84. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/preambles/sourcegraph.md +0 -0
  85. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/__init__.py +0 -0
  86. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/generator.py +0 -0
  87. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/probe/writer.py +0 -0
  88. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/ratings/__init__.py +0 -0
  89. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/ratings/collector.py +0 -0
  90. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/scaffold/__init__.py +0 -0
  91. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/scaffold/writer.py +0 -0
  92. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/__init__.py +0 -0
  93. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  94. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  95. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  96. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  97. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  98. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/entry_points.txt +0 -0
  99. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/requires.txt +0 -0
  100. {codeprobe-0.2.7 → codeprobe-0.2.8}/src/codeprobe.egg-info/top_level.txt +0 -0
  101. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_adapters.py +0 -0
  102. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_analysis.py +0 -0
  103. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_api.py +0 -0
  104. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_assess.py +0 -0
  105. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_changed_symbols.py +0 -0
  106. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_checkpoint.py +0 -0
  107. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_cli.py +0 -0
  108. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_config_loader.py +0 -0
  109. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_contrib.py +0 -0
  110. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_backends.py +0 -0
  111. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_core.py +0 -0
  112. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_integration.py +0 -0
  113. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_curator_tiers.py +0 -0
  114. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_executor.py +0 -0
  115. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_experiment_cmd.py +0 -0
  116. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_experiment_core.py +0 -0
  117. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_init_wizard.py +0 -0
  118. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_llm.py +0 -0
  119. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_loaders.py +0 -0
  120. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mcp_families_mining.py +0 -0
  121. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mcp_validate.py +0 -0
  122. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_mining.py +0 -0
  123. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_models.py +0 -0
  124. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_new_families.py +0 -0
  125. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_openai_compat.py +0 -0
  126. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_oracle_types.py +0 -0
  127. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_org_scale.py +0 -0
  128. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_pipeline_integration.py +0 -0
  129. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_preamble.py +0 -0
  130. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_probe.py +0 -0
  131. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_ratings.py +0 -0
  132. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_ratings_cmd.py +0 -0
  133. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_registry.py +0 -0
  134. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scaffold.py +0 -0
  135. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scanner_refactor.py +0 -0
  136. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_scoring.py +0 -0
  137. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_session.py +0 -0
  138. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_sg_ground_truth.py +0 -0
  139. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_telemetry.py +0 -0
  140. {codeprobe-0.2.7 → codeprobe-0.2.8}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.2.7"
3
+ version = "0.2.8"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -1,3 +1,3 @@
1
1
  """codeprobe — Benchmark AI coding agents against your own codebase."""
2
2
 
3
- __version__ = "0.2.7"
3
+ __version__ = "0.2.8"
@@ -191,6 +191,13 @@ def execute_task(
191
191
  if task_rt and task_rt != "binary":
192
192
  reward_type = task_rt
193
193
 
194
+ # Remove stale answer.txt / reward.txt from prior runs so they don't
195
+ # leak into this task's scoring sandbox.
196
+ for stale in ("answer.txt", "reward.txt"):
197
+ stale_path = task_dir / stale
198
+ if stale_path.is_file():
199
+ stale_path.unlink(missing_ok=True)
200
+
194
201
  def _error_result(error: str, error_category: str | None = None) -> TaskResult:
195
202
  return TaskResult(
196
203
  completed=CompletedTask(
@@ -467,8 +467,8 @@ def _write_oracle_task(
467
467
  f"# Oracle verification for org-scale task {safe_id}\n"
468
468
  'SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"\n'
469
469
  'TASK_DIR="$(dirname "$SCRIPT_DIR")"\n\n'
470
- "# Read agent output from $AGENT_OUTPUT (sandbox) or answer.txt\n"
471
- 'if [ -n "${AGENT_OUTPUT:-}" ] && [ -f "$AGENT_OUTPUT" ]; then\n'
470
+ "# Fallback: if agent wrote to stdout instead of answer.txt, use $AGENT_OUTPUT\n"
471
+ 'if [ ! -f "$TASK_DIR/answer.txt" ] && [ -n "${AGENT_OUTPUT:-}" ] && [ -f "$AGENT_OUTPUT" ]; then\n'
472
472
  ' cp "$AGENT_OUTPUT" "$TASK_DIR/answer.txt"\n'
473
473
  "fi\n\n"
474
474
  "# Self-contained oracle check — no codeprobe install required\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes