codeprobe 0.4.1__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. {codeprobe-0.4.1 → codeprobe-0.5.2}/PKG-INFO +6 -3
  2. {codeprobe-0.4.1 → codeprobe-0.5.2}/README.md +2 -0
  3. {codeprobe-0.4.1 → codeprobe-0.5.2}/pyproject.toml +7 -3
  4. codeprobe-0.5.2/src/codeprobe/acceptance_compiler.py +355 -0
  5. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/_base.py +12 -6
  6. codeprobe-0.5.2/src/codeprobe/adapters/claude.py +316 -0
  7. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/protocol.py +1 -1
  8. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/telemetry.py +52 -2
  9. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/analysis/report.py +90 -13
  10. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/analysis/stats.py +84 -5
  11. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/api.py +5 -4
  12. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/assess/__init__.py +14 -2
  13. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/assess/heuristics.py +1 -1
  14. codeprobe-0.5.2/src/codeprobe/assess/oracle_diff.py +517 -0
  15. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/__init__.py +106 -9
  16. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/experiment_cmd.py +49 -3
  17. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/init_cmd.py +5 -0
  18. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/interpret_cmd.py +10 -1
  19. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/json_display.py +1 -1
  20. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/mine_cmd.py +686 -203
  21. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/rich_display.py +2 -2
  22. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/run_cmd.py +73 -65
  23. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/validate_cmd.py +56 -1
  24. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/config/loader.py +47 -21
  25. codeprobe-0.5.2/src/codeprobe/config/redact.py +123 -0
  26. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/checkpoint.py +2 -2
  27. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/events.py +2 -2
  28. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/executor.py +26 -0
  29. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/experiment.py +7 -0
  30. codeprobe-0.5.2/src/codeprobe/core/repo_hygiene.py +51 -0
  31. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/scoring.py +26 -7
  32. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/comprehension.py +15 -10
  33. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/comprehension_writer.py +1 -1
  34. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/curator.py +1 -1
  35. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/curator_backends.py +11 -3
  36. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/extractor.py +385 -74
  37. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/org_scale.py +88 -25
  38. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/org_scale_oracle.py +70 -11
  39. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/org_scale_scanner.py +171 -10
  40. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/sg_auth.py +28 -6
  41. codeprobe-0.5.2/src/codeprobe/mining/task_types.py +180 -0
  42. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/writer.py +413 -18
  43. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/suite.py +1 -1
  44. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/task.py +3 -3
  45. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/probe/generator.py +19 -11
  46. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/probe/writer.py +3 -4
  47. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/ratings/collector.py +2 -2
  48. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/PKG-INFO +6 -3
  49. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/SOURCES.txt +20 -0
  50. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/requires.txt +3 -2
  51. codeprobe-0.5.2/tests/test_acceptance_compiler.py +566 -0
  52. codeprobe-0.5.2/tests/test_acceptance_compiler_integration.py +161 -0
  53. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_adapters.py +326 -169
  54. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_artifact_scorer.py +1 -1
  55. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_auth_cmd.py +0 -1
  56. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_changed_symbols.py +0 -1
  57. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_checkpoint.py +0 -4
  58. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_checkpoint_scoring.py +1 -1
  59. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_cli.py +1 -1
  60. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_contrib.py +9 -5
  61. codeprobe-0.5.2/tests/test_convergence.py +441 -0
  62. codeprobe-0.5.2/tests/test_criteria_loader.py +411 -0
  63. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_curator_backends.py +2 -2
  64. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_curator_integration.py +7 -6
  65. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_curator_tiers.py +0 -2
  66. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_doctor_cmd.py +0 -2
  67. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_adversarial_fixes.py +175 -3
  68. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_executor.py +53 -8
  69. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_executor_dual_isolation.py +2 -4
  70. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_executor_events.py +0 -2
  71. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_experiment_cmd.py +78 -0
  72. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_ground_truth_schema.py +1 -3
  73. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_init_wizard.py +10 -3
  74. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_json_display.py +0 -2
  75. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_listeners_dual.py +50 -0
  76. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_llm.py +1 -3
  77. codeprobe-0.5.2/tests/test_loader.py +128 -0
  78. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mcp_families_mining.py +3 -6
  79. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mcp_validate.py +0 -2
  80. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mine_cli.py +59 -3
  81. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mine_goals.py +3 -1
  82. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mine_presets.py +2 -3
  83. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mine_profiles.py +0 -1
  84. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mining.py +422 -17
  85. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_multi_repo_e2e.py +1 -2
  86. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_new_families.py +2 -3
  87. codeprobe-0.5.2/tests/test_oracle_diff.py +341 -0
  88. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_oracle_registry.py +1 -2
  89. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_org_scale.py +173 -11
  90. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_pipeline_integration.py +4 -8
  91. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_preamble.py +3 -3
  92. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_ratings.py +2 -2
  93. codeprobe-0.5.2/tests/test_regression_gate.py +719 -0
  94. codeprobe-0.5.2/tests/test_release_gate.py +477 -0
  95. codeprobe-0.5.2/tests/test_repo_hygiene.py +106 -0
  96. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_run_config_resolution.py +0 -2
  97. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scaffold.py +0 -3
  98. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scoring.py +1 -3
  99. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scoring_extended.py +1 -2
  100. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scoring_v2.py +1 -1
  101. codeprobe-0.5.2/tests/test_sdlc_ground_truth.py +519 -0
  102. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_secret_redaction.py +181 -2
  103. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_sg_auth.py +4 -2
  104. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_sg_ground_truth.py +0 -1
  105. codeprobe-0.5.2/tests/test_stats.py +261 -0
  106. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_suite_manifest.py +0 -2
  107. codeprobe-0.5.2/tests/test_task_model.py +186 -0
  108. codeprobe-0.5.2/tests/test_task_types.py +187 -0
  109. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_telemetry.py +45 -7
  110. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_validate_cmd.py +65 -1
  111. codeprobe-0.5.2/tests/test_verifier.py +629 -0
  112. codeprobe-0.5.2/tests/test_verify.py +172 -0
  113. codeprobe-0.5.2/tests/test_weighted_checklist.py +662 -0
  114. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_weighted_f1.py +154 -8
  115. codeprobe-0.4.1/src/codeprobe/adapters/claude.py +0 -136
  116. codeprobe-0.4.1/src/codeprobe/config/redact.py +0 -45
  117. {codeprobe-0.4.1 → codeprobe-0.5.2}/LICENSE +0 -0
  118. {codeprobe-0.4.1 → codeprobe-0.5.2}/setup.cfg +0 -0
  119. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/__init__.py +0 -0
  120. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/__main__.py +0 -0
  121. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/__init__.py +0 -0
  122. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/codex.py +0 -0
  123. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/copilot.py +0 -0
  124. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/openai_compat.py +0 -0
  125. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/adapters/session.py +0 -0
  126. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/analysis/__init__.py +0 -0
  127. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/analysis/dual.py +0 -0
  128. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/analysis/ranking.py +0 -0
  129. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/assess_cmd.py +0 -0
  130. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/auth_cmd.py +0 -0
  131. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/doctor_cmd.py +0 -0
  132. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/preamble_cmd.py +0 -0
  133. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/probe_cmd.py +2 -2
  134. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/ratings_cmd.py +0 -0
  135. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  136. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/wizard.py +0 -0
  137. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/cli/yaml_writer.py +0 -0
  138. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/config/__init__.py +0 -0
  139. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/__init__.py +0 -0
  140. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/_shared.py +0 -0
  141. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/adaptive.py +0 -0
  142. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/counterfactual.py +0 -0
  143. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/debate.py +0 -0
  144. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/decision_tree.py +0 -0
  145. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/elo.py +0 -0
  146. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/fingerprint.py +0 -0
  147. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/mutation.py +0 -0
  148. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/pareto.py +0 -0
  149. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/sprt.py +0 -0
  150. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/contrib/tournament.py +0 -0
  151. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/__init__.py +0 -0
  152. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/__main__.py +0 -0
  153. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/isolation.py +0 -0
  154. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/llm.py +0 -0
  155. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/mcp_discovery.py +0 -0
  156. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/preamble.py +0 -0
  157. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/registry.py +0 -0
  158. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/core/sandbox.py +0 -0
  159. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/loaders/__init__.py +0 -0
  160. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/loaders/suite.py +0 -0
  161. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/__init__.py +0 -0
  162. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/_graph.py +0 -0
  163. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/_lang.py +0 -0
  164. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/curator_tiers.py +0 -0
  165. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/multi_repo.py +0 -0
  166. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/org_scale_families.py +0 -0
  167. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/org_scale_validate.py +0 -0
  168. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  169. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/mining/sources.py +0 -0
  170. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/__init__.py +0 -0
  171. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/evalrc.py +0 -0
  172. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/experiment.py +0 -0
  173. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/models/preamble.py +0 -0
  174. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/preambles/__init__.py +0 -0
  175. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/preambles/github.md +0 -0
  176. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/preambles/sourcegraph.md +0 -0
  177. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/probe/__init__.py +0 -0
  178. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/probe/adapter.py +0 -0
  179. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/ratings/__init__.py +0 -0
  180. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/scaffold/__init__.py +0 -0
  181. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/scaffold/writer.py +0 -0
  182. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/templates/__init__.py +0 -0
  183. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  184. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  185. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  186. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  187. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/entry_points.txt +0 -0
  188. {codeprobe-0.4.1 → codeprobe-0.5.2}/src/codeprobe.egg-info/top_level.txt +0 -0
  189. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_adapter_contracts.py +0 -0
  190. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_analysis.py +0 -0
  191. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_api.py +0 -0
  192. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_assess.py +4 -4
  193. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_comprehension.py +0 -0
  194. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_config_loader.py +0 -0
  195. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_ctrlc_integration.py +1 -1
  196. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_curator_core.py +0 -0
  197. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_composite.py +0 -0
  198. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_e2e.py +0 -0
  199. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_matrix.py +0 -0
  200. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_scorer.py +0 -0
  201. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_dual_scoring_details.py +0 -0
  202. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_events.py +0 -0
  203. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_examples_dual.py +0 -0
  204. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_experiment_core.py +0 -0
  205. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_isolation.py +0 -0
  206. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_loaders.py +0 -0
  207. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_loaders_dual.py +0 -0
  208. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_mining_dual.py +0 -0
  209. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_models.py +0 -0
  210. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_multi_repo_mining.py +0 -0
  211. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_openai_compat.py +0 -0
  212. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_oracle_types.py +0 -0
  213. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_preamble_cmd.py +0 -0
  214. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_probe.py +0 -0
  215. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_probe_adapter.py +0 -0
  216. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_ratings_cmd.py +0 -0
  217. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_registry.py +0 -0
  218. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_report_dual.py +0 -0
  219. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_safe_leg_score.py +0 -0
  220. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scaffold_upgrade.py +0 -0
  221. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_scanner_refactor.py +0 -0
  222. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_score_result.py +0 -0
  223. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_session.py +0 -0
  224. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_shell_shim.py +0 -0
  225. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_show_prompt.py +0 -0
  226. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_suite.py +0 -0
  227. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_validate_dual.py +0 -0
  228. {codeprobe-0.4.1 → codeprobe-0.5.2}/tests/test_writer_dual.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.4.1
3
+ Version: 0.5.2
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -20,8 +20,8 @@ Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: click<9,>=8.0
22
22
  Requires-Dist: pyyaml<7,>=6.0
23
- Requires-Dist: anthropic>=0.39
24
- Requires-Dist: openai>=1.66
23
+ Requires-Dist: anthropic<1,>=0.39
24
+ Requires-Dist: openai<3,>=1.66
25
25
  Requires-Dist: tiktoken<1,>=0.7
26
26
  Requires-Dist: scipy<2,>=1.11
27
27
  Requires-Dist: rich<14,>=13.7
@@ -32,6 +32,7 @@ Requires-Dist: ruff<1,>=0.4; extra == "dev"
32
32
  Requires-Dist: mypy<2,>=1.10; extra == "dev"
33
33
  Requires-Dist: types-PyYAML<7,>=6.0; extra == "dev"
34
34
  Requires-Dist: scipy<2,>=1.11; extra == "dev"
35
+ Requires-Dist: build<2,>=1.0; extra == "dev"
35
36
  Dynamic: license-file
36
37
 
37
38
  # codeprobe
@@ -83,6 +84,8 @@ codeprobe run . # Run agents against tasks
83
84
  codeprobe interpret . # Get recommendations
84
85
  ```
85
86
 
87
+ Prefer driving codeprobe through a coding agent instead? See [docs/workflows/with-agents.md](docs/workflows/with-agents.md) for the skills-based workflow (`/experiment`, `/assess-codebase`, `/interpret`).
88
+
86
89
  ## Commands
87
90
 
88
91
  | Command | Purpose |
@@ -47,6 +47,8 @@ codeprobe run . # Run agents against tasks
47
47
  codeprobe interpret . # Get recommendations
48
48
  ```
49
49
 
50
+ Prefer driving codeprobe through a coding agent instead? See [docs/workflows/with-agents.md](docs/workflows/with-agents.md) for the skills-based workflow (`/experiment`, `/assess-codebase`, `/interpret`).
51
+
50
52
  ## Commands
51
53
 
52
54
  | Command | Purpose |
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.4.1"
3
+ version = "0.5.2"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -21,8 +21,8 @@ classifiers = [
21
21
  dependencies = [
22
22
  "click>=8.0,<9",
23
23
  "pyyaml>=6.0,<7",
24
- "anthropic>=0.39",
25
- "openai>=1.66",
24
+ "anthropic>=0.39,<1",
25
+ "openai>=1.66,<3",
26
26
  "tiktoken>=0.7,<1",
27
27
  "scipy>=1.11,<2",
28
28
  "rich>=13.7,<14",
@@ -41,6 +41,10 @@ dev = [
41
41
  "mypy>=1.10,<2",
42
42
  "types-PyYAML>=6.0,<7",
43
43
  "scipy>=1.11,<2",
44
+ # Needed by tests/test_release_gate.py::test_build_and_stage_real_wheel
45
+ # which shells out to ``python -m build --wheel`` to verify the wheel
46
+ # produced at release time is installable and version-consistent.
47
+ "build>=1.0,<2",
44
48
  ]
45
49
 
46
50
  [project.scripts]
@@ -0,0 +1,355 @@
1
+ """Criterion-driven Test Agent action compiler.
2
+
3
+ Reads acceptance criteria from ``acceptance/criteria.toml`` (via
4
+ :func:`acceptance.loader.load_criteria`) and compiles each criterion whose
5
+ ``check_type`` requires a workspace artifact into a :class:`TestAction` —
6
+ a frozen dataclass holding a bash snippet that the Test Agent executes to
7
+ populate the artifact(s) the Verifier reads.
8
+
9
+ Structural check types (``import_equals``, ``regex_present``, etc.) require
10
+ no workspace artifact and produce no action. Check types that have no
11
+ handler registered in ``acceptance.verify.Verifier._handlers()`` also
12
+ produce no action — emitting artifacts for them would be pure waste since
13
+ the Verifier skips them regardless.
14
+
15
+ This module is a **pure function** — no IO beyond what the caller passes in,
16
+ no subprocesses, no LLM calls. Token substitution uses ``.replace()``
17
+ chains (never ``.format()``) to avoid crashes on shell ``${VAR}`` braces.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ import textwrap
24
+ from collections.abc import Callable
25
+ from dataclasses import dataclass
26
+ from pathlib import Path
27
+ from typing import Any
28
+
29
+ from acceptance.loader import Criterion
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Public types
33
+ # ---------------------------------------------------------------------------
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class TestAction:
38
+ """A single compiled action for the Test Agent to execute."""
39
+
40
+ criterion_id: str
41
+ description: str
42
+ shell_snippet: str
43
+ artifact_paths: tuple[str, ...]
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Check types that the Verifier handles AND that read workspace artifacts.
48
+ # Structural types are excluded (they introspect Python or source files).
49
+ # Handler-less types are excluded (no Verifier reader → artifacts are waste).
50
+ # ---------------------------------------------------------------------------
51
+
52
+ #: Check types handled by the Verifier that DO NOT need workspace artifacts.
53
+ _STRUCTURAL_TYPES: frozenset[str] = frozenset(
54
+ {
55
+ "import_equals",
56
+ "dataclass_has_fields",
57
+ "regex_present",
58
+ "regex_absent",
59
+ "pyproject_deps_bounded",
60
+ }
61
+ )
62
+
63
+ #: Check types present in criteria.toml but absent from Verifier._handlers().
64
+ #: Criterion IDs must match this pattern to be safe for shell embedding.
65
+ #: Prevents command injection via $() or backticks in double-quoted contexts.
66
+ _SAFE_ID_RE: re.Pattern[str] = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_\-]{0,63}$")
67
+
68
+ #: Shell environment variable names must match this pattern.
69
+ _SAFE_ENV_RE: re.Pattern[str] = re.compile(r"^[A-Z_][A-Z0-9_]{0,127}$")
70
+
71
+ _HANDLERLESS_TYPES: frozenset[str] = frozenset(
72
+ {
73
+ "stream_separation",
74
+ "log_level_matches",
75
+ "json_lines_valid",
76
+ "dataclass_roundtrip",
77
+ "yaml_field_equal",
78
+ }
79
+ )
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # Public API
83
+ # ---------------------------------------------------------------------------
84
+
85
+
86
+ def compile_actions(
87
+ criteria: list[Criterion],
88
+ *,
89
+ target_repo: Path,
90
+ workspace: Path,
91
+ project_root: Path,
92
+ ) -> list[TestAction]:
93
+ """Return one :class:`TestAction` per criterion that needs a workspace artifact.
94
+
95
+ Structural criteria and handler-less criteria produce no action.
96
+ Criteria whose params cannot be resolved produce a stub action that writes
97
+ a ``COMPILE_ERROR`` marker so the Verifier sees an explicit failure rather
98
+ than a silent skip.
99
+ """
100
+ actions: list[TestAction] = []
101
+ for criterion in criteria:
102
+ if not _SAFE_ID_RE.fullmatch(criterion.id):
103
+ raise ValueError(
104
+ f"Criterion id {criterion.id!r} contains characters unsafe "
105
+ "for shell embedding; only [A-Za-z0-9_-] allowed."
106
+ )
107
+ ct = criterion.check_type
108
+ if ct in _STRUCTURAL_TYPES or ct in _HANDLERLESS_TYPES:
109
+ continue
110
+ emitter = _EMITTERS.get(ct)
111
+ if emitter is None:
112
+ continue
113
+ action = emitter(criterion, target_repo, workspace, project_root)
114
+ if action is not None:
115
+ actions.append(action)
116
+ return actions
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Token substitution
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ def _substitute_command(
125
+ raw: str,
126
+ target_repo: Path,
127
+ workspace: Path,
128
+ project_root: Path,
129
+ params: dict[str, Any],
130
+ ) -> str:
131
+ """Substitute ``{repo}``, ``{results}``, ``{tasks_dir}``, ``{experiment}``
132
+ tokens inside a command string.
133
+
134
+ Uses ``.replace()`` (not ``.format()``) so shell ``${VAR}`` braces are
135
+ left intact.
136
+ """
137
+ result = raw.replace("{repo}", str(target_repo))
138
+ result = result.replace("{results}", str(workspace / "results"))
139
+ result = result.replace(
140
+ "{experiment}", str(workspace / ".codeprobe" / "experiment.json")
141
+ )
142
+
143
+ # {tasks_dir} resolves via the fixture param if present, else workspace/tasks.
144
+ fixture = params.get("fixture")
145
+ if fixture and isinstance(fixture, str):
146
+ resolved = (project_root / fixture).resolve()
147
+ root_resolved = project_root.resolve()
148
+ if not str(resolved).startswith(str(root_resolved)):
149
+ raise ValueError(
150
+ f"fixture param {fixture!r} escapes project_root — path traversal denied"
151
+ )
152
+ tasks_dir = str(resolved)
153
+ else:
154
+ tasks_dir = str(workspace / "tasks")
155
+ result = result.replace("{tasks_dir}", tasks_dir)
156
+
157
+ return result
158
+
159
+
160
+ # ---------------------------------------------------------------------------
161
+ # Per-check-type emitters
162
+ # ---------------------------------------------------------------------------
163
+
164
+
165
+ def _emit_cli_help_contains(
166
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
167
+ ) -> TestAction | None:
168
+ commands = c.params.get("commands")
169
+ if not isinstance(commands, list) or not commands:
170
+ return _stub_compile_error(c, workspace)
171
+ lines: list[str] = []
172
+ for i, raw_cmd in enumerate(commands):
173
+ if not isinstance(raw_cmd, str):
174
+ continue
175
+ cmd = _substitute_command(
176
+ raw_cmd, target_repo, workspace, project_root, c.params
177
+ )
178
+ op = ">>" if i > 0 else ">"
179
+ lines.append(
180
+ f'( {cmd} ) {op} "{workspace}/{c.id}.stdout" 2>> "{workspace}/{c.id}.stderr"'
181
+ )
182
+ lines.append(f'echo "0" > "{workspace}/{c.id}.exit"')
183
+ snippet = "\n".join(lines)
184
+ return TestAction(
185
+ criterion_id=c.id,
186
+ description=f"help-check: {len(commands)} commands",
187
+ shell_snippet=snippet,
188
+ artifact_paths=(f"{c.id}.stdout", f"{c.id}.stderr", f"{c.id}.exit"),
189
+ )
190
+
191
+
192
+ def _emit_command_capture(
193
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
194
+ ) -> TestAction | None:
195
+ """Shared emitter for types that capture stdout+stderr from a command."""
196
+ raw_cmd = c.params.get("command")
197
+ if not isinstance(raw_cmd, str) or not raw_cmd:
198
+ return _stub_compile_error(c, workspace)
199
+ cmd = _substitute_command(raw_cmd, target_repo, workspace, project_root, c.params)
200
+ snippet = textwrap.dedent(f"""\
201
+ ( {cmd} ) \\
202
+ > "{workspace}/{c.id}.stdout" \\
203
+ 2> "{workspace}/{c.id}.stderr"
204
+ echo "$?" > "{workspace}/{c.id}.exit"
205
+ """).strip()
206
+ return TestAction(
207
+ criterion_id=c.id,
208
+ description=f"run: {cmd}",
209
+ shell_snippet=snippet,
210
+ artifact_paths=(f"{c.id}.stdout", f"{c.id}.stderr", f"{c.id}.exit"),
211
+ )
212
+
213
+
214
+ def _emit_cli_writes_file(
215
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
216
+ ) -> TestAction | None:
217
+ raw_cmd = c.params.get("command")
218
+ expected_path = c.params.get("expected_path")
219
+ if not isinstance(raw_cmd, str) or not raw_cmd:
220
+ return None
221
+ cmd = _substitute_command(raw_cmd, target_repo, workspace, project_root, c.params)
222
+ snippet = textwrap.dedent(f"""\
223
+ ( cd "{workspace}" && {cmd} ) \\
224
+ > "{workspace}/{c.id}.stdout" \\
225
+ 2> "{workspace}/{c.id}.stderr"
226
+ echo "$?" > "{workspace}/{c.id}.exit"
227
+ """).strip()
228
+ artifact_paths = [f"{c.id}.stdout", f"{c.id}.stderr", f"{c.id}.exit"]
229
+ if isinstance(expected_path, str) and expected_path:
230
+ artifact_paths.append(expected_path)
231
+ return TestAction(
232
+ criterion_id=c.id,
233
+ description=f"writes-file: {expected_path or '?'}",
234
+ shell_snippet=snippet,
235
+ artifact_paths=tuple(artifact_paths),
236
+ )
237
+
238
+
239
+ def _emit_file_exists(
240
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
241
+ ) -> TestAction | None:
242
+ rel = c.params.get("path") or c.params.get("expected_path")
243
+ if not isinstance(rel, str) or not rel:
244
+ return None
245
+ # file_exists checks are passive — the artifact should already be produced
246
+ # by a dependency. Emit a no-op touch that documents what we expect.
247
+ snippet = f'# file_exists: verifier checks "{workspace}/{rel}" — no command needed'
248
+ return TestAction(
249
+ criterion_id=c.id,
250
+ description=f"file-exists: {rel}",
251
+ shell_snippet=snippet,
252
+ artifact_paths=(rel,),
253
+ )
254
+
255
+
256
+ def _emit_sync_action(
257
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
258
+ ) -> TestAction | None:
259
+ """Emit a sync snippet that copies ``target_repo/.codeprobe/`` into the
260
+ workspace so the Verifier's ``{repo}`` → workspace substitution finds the
261
+ artifacts where the real ``codeprobe`` tool wrote them.
262
+ """
263
+ source_rel = c.params.get("source") or c.params.get("search_in")
264
+ if not isinstance(source_rel, str):
265
+ return None
266
+ snippet = textwrap.dedent(f"""\
267
+ # Sync target_repo output into workspace for {c.id}
268
+ mkdir -p "{workspace}/.codeprobe"
269
+ if [ -d "{target_repo}/.codeprobe" ]; then
270
+ cp -r "{target_repo}/.codeprobe/." "{workspace}/.codeprobe/"
271
+ fi
272
+ touch "{workspace}/{c.id}.synced"
273
+ """).strip()
274
+ return TestAction(
275
+ criterion_id=c.id,
276
+ description=f"sync .codeprobe for {c.check_type}",
277
+ shell_snippet=snippet,
278
+ artifact_paths=(f"{c.id}.synced",),
279
+ )
280
+
281
+
282
+ def _emit_canary_detect(
283
+ c: Criterion, target_repo: Path, workspace: Path, project_root: Path
284
+ ) -> TestAction | None:
285
+ """Emit an action that writes the canary UUID to ``$WORKSPACE/canary.txt``
286
+ and syncs ``.codeprobe/`` so the Verifier's rglob can find the UUID in
287
+ at least one workspace file.
288
+ """
289
+ canary_env = c.params.get("canary_env", "CODEPROBE_CANARY_UUID")
290
+ if not isinstance(canary_env, str) or not _SAFE_ENV_RE.fullmatch(canary_env):
291
+ return None
292
+ snippet = textwrap.dedent(f"""\
293
+ # Canary detection for {c.id}
294
+ echo "${canary_env}" > "{workspace}/canary.txt"
295
+ mkdir -p "{workspace}/.codeprobe"
296
+ if [ -d "{target_repo}/.codeprobe" ]; then
297
+ cp -r "{target_repo}/.codeprobe/." "{workspace}/.codeprobe/"
298
+ fi
299
+ """).strip()
300
+ return TestAction(
301
+ criterion_id=c.id,
302
+ description="canary: write UUID + sync workspace",
303
+ shell_snippet=snippet,
304
+ artifact_paths=("canary.txt",),
305
+ )
306
+
307
+
308
+ # ---------------------------------------------------------------------------
309
+ # Stub emitters (for missing/invalid params)
310
+ # ---------------------------------------------------------------------------
311
+
312
+
313
+ def _stub_compile_error(c: Criterion, workspace: Path) -> TestAction:
314
+ """Emit a stub action that writes a ``COMPILE_ERROR`` marker.
315
+
316
+ The Verifier sees an explicit failure rather than a silent skip.
317
+ """
318
+ snippet = textwrap.dedent(f"""\
319
+ echo "COMPILE_ERROR: missing or invalid params for {c.id}" \\
320
+ > "{workspace}/{c.id}.stdout"
321
+ echo "COMPILE_ERROR: missing or invalid params for {c.id}" \\
322
+ > "{workspace}/{c.id}.stderr"
323
+ echo "255" > "{workspace}/{c.id}.exit"
324
+ """).strip()
325
+ return TestAction(
326
+ criterion_id=c.id,
327
+ description=f"STUB: {c.id} (missing params)",
328
+ shell_snippet=snippet,
329
+ artifact_paths=(f"{c.id}.exit", f"{c.id}.stdout", f"{c.id}.stderr"),
330
+ )
331
+
332
+
333
+ # ---------------------------------------------------------------------------
334
+ # Emitter dispatch table
335
+ # ---------------------------------------------------------------------------
336
+
337
+ _Emitter = Callable[[Criterion, Path, Path, Path], TestAction | None]
338
+
339
+ _EMITTERS: dict[str, _Emitter] = {
340
+ "cli_exit_code": _emit_command_capture,
341
+ "cli_help_contains": _emit_cli_help_contains,
342
+ "cli_stdout_contains": _emit_command_capture,
343
+ "stdout_contains": _emit_command_capture,
344
+ "stderr_contains": _emit_command_capture,
345
+ "cli_writes_file": _emit_cli_writes_file,
346
+ "file_exists": _emit_file_exists,
347
+ "count_ge": _emit_sync_action,
348
+ "json_count_ge": _emit_sync_action,
349
+ "json_field_not_null": _emit_sync_action,
350
+ "json_field_equals": _emit_sync_action,
351
+ "json_field_type": _emit_sync_action,
352
+ "canary_detect": _emit_canary_detect,
353
+ }
354
+
355
+ __all__ = ["TestAction", "compile_actions"]
@@ -24,14 +24,24 @@ _ADAPTER_ENV_WHITELIST: frozenset[str] = frozenset(
24
24
  # System essentials
25
25
  "PATH",
26
26
  "HOME",
27
+ "USER",
28
+ "LOGNAME",
27
29
  "LANG",
28
30
  "TERM",
29
31
  "TMPDIR",
30
32
  "LC_ALL",
33
+ # XDG / desktop-session env — required for Linux keyring (libsecret)
34
+ # lookups so OAuth/keychain-auth agents can reach the session bus
35
+ # when CLAUDE_CONFIG_DIR is overridden for isolation.
36
+ "DBUS_SESSION_BUS_ADDRESS",
37
+ "XDG_RUNTIME_DIR",
38
+ "XDG_DATA_HOME",
39
+ "XDG_CONFIG_HOME",
31
40
  # Codeprobe sandbox signal (eval harness sets this)
32
41
  "CODEPROBE_SANDBOX",
33
42
  # Agent-specific API keys (required by the adapters)
34
43
  "ANTHROPIC_API_KEY",
44
+ "CLAUDE_CODE_OAUTH_TOKEN",
35
45
  "CLAUDE_CONFIG_DIR",
36
46
  "GITHUB_TOKEN",
37
47
  "OPENAI_API_KEY",
@@ -114,9 +124,7 @@ class BaseAdapter:
114
124
  @abstractmethod
115
125
  def build_command(self, prompt: str, config: AgentConfig) -> list[str]: ...
116
126
 
117
- def parse_output(
118
- self, result: subprocess.CompletedProcess[str], duration: float
119
- ) -> AgentOutput:
127
+ def parse_output(self, result: subprocess.CompletedProcess[str], duration: float) -> AgentOutput:
120
128
  """Convert subprocess result to AgentOutput.
121
129
 
122
130
  Subclasses override to extract tokens, cost, etc. from agent output.
@@ -137,9 +145,7 @@ class BaseAdapter:
137
145
  if not config.mcp_config:
138
146
  return None
139
147
  expanded = json.loads(os.path.expandvars(json.dumps(config.mcp_config)))
140
- tmp = tempfile.NamedTemporaryFile(
141
- mode="w", suffix=".json", prefix="codeprobe-mcp-", delete=False
142
- )
148
+ tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".json", prefix="codeprobe-mcp-", delete=False)
143
149
  json.dump(expanded, tmp)
144
150
  tmp.close()
145
151
  return tmp.name