codeprobe 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. {codeprobe-0.5.2 → codeprobe-0.5.3}/PKG-INFO +1 -1
  2. {codeprobe-0.5.2 → codeprobe-0.5.3}/pyproject.toml +1 -1
  3. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/stats.py +25 -1
  4. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/PKG-INFO +1 -1
  5. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_stats.py +57 -0
  6. {codeprobe-0.5.2 → codeprobe-0.5.3}/LICENSE +0 -0
  7. {codeprobe-0.5.2 → codeprobe-0.5.3}/README.md +0 -0
  8. {codeprobe-0.5.2 → codeprobe-0.5.3}/setup.cfg +0 -0
  9. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/__init__.py +0 -0
  10. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/__main__.py +0 -0
  11. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/acceptance_compiler.py +0 -0
  12. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/__init__.py +0 -0
  13. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/_base.py +0 -0
  14. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/claude.py +0 -0
  15. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/codex.py +0 -0
  16. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/copilot.py +0 -0
  17. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/openai_compat.py +0 -0
  18. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/protocol.py +0 -0
  19. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/session.py +0 -0
  20. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/adapters/telemetry.py +0 -0
  21. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/__init__.py +0 -0
  22. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/dual.py +0 -0
  23. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/ranking.py +0 -0
  24. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/analysis/report.py +0 -0
  25. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/api.py +0 -0
  26. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/__init__.py +0 -0
  27. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/heuristics.py +0 -0
  28. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/assess/oracle_diff.py +0 -0
  29. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/__init__.py +0 -0
  30. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/assess_cmd.py +0 -0
  31. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/auth_cmd.py +0 -0
  32. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/doctor_cmd.py +0 -0
  33. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/experiment_cmd.py +0 -0
  34. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/init_cmd.py +0 -0
  35. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/interpret_cmd.py +0 -0
  36. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/json_display.py +0 -0
  37. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/mine_cmd.py +0 -0
  38. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/preamble_cmd.py +0 -0
  39. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/probe_cmd.py +0 -0
  40. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/ratings_cmd.py +0 -0
  41. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/rich_display.py +0 -0
  42. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/run_cmd.py +0 -0
  43. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  44. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/validate_cmd.py +0 -0
  45. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/wizard.py +0 -0
  46. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/cli/yaml_writer.py +0 -0
  47. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/__init__.py +0 -0
  48. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/loader.py +0 -0
  49. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/config/redact.py +0 -0
  50. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/__init__.py +0 -0
  51. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/_shared.py +0 -0
  52. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/adaptive.py +0 -0
  53. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/counterfactual.py +0 -0
  54. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/debate.py +0 -0
  55. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/decision_tree.py +0 -0
  56. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/elo.py +0 -0
  57. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/fingerprint.py +0 -0
  58. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/mutation.py +0 -0
  59. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/pareto.py +0 -0
  60. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/sprt.py +0 -0
  61. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/contrib/tournament.py +0 -0
  62. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/__init__.py +0 -0
  63. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/__main__.py +0 -0
  64. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/checkpoint.py +0 -0
  65. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/events.py +0 -0
  66. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/executor.py +0 -0
  67. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/experiment.py +0 -0
  68. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/isolation.py +0 -0
  69. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/llm.py +0 -0
  70. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/mcp_discovery.py +0 -0
  71. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/preamble.py +0 -0
  72. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/registry.py +0 -0
  73. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/repo_hygiene.py +0 -0
  74. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/sandbox.py +0 -0
  75. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/core/scoring.py +0 -0
  76. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/loaders/__init__.py +0 -0
  77. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/loaders/suite.py +0 -0
  78. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/__init__.py +0 -0
  79. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/_graph.py +0 -0
  80. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/_lang.py +0 -0
  81. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/comprehension.py +0 -0
  82. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/comprehension_writer.py +0 -0
  83. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator.py +0 -0
  84. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator_backends.py +0 -0
  85. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/curator_tiers.py +0 -0
  86. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/extractor.py +0 -0
  87. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/multi_repo.py +0 -0
  88. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale.py +0 -0
  89. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_families.py +0 -0
  90. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  91. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  92. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/org_scale_validate.py +0 -0
  93. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sg_auth.py +0 -0
  94. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  95. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/sources.py +0 -0
  96. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/task_types.py +0 -0
  97. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/mining/writer.py +0 -0
  98. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/__init__.py +0 -0
  99. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/evalrc.py +0 -0
  100. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/experiment.py +0 -0
  101. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/preamble.py +0 -0
  102. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/suite.py +0 -0
  103. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/models/task.py +0 -0
  104. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/__init__.py +0 -0
  105. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/github.md +0 -0
  106. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/preambles/sourcegraph.md +0 -0
  107. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/__init__.py +0 -0
  108. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/adapter.py +0 -0
  109. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/generator.py +0 -0
  110. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/probe/writer.py +0 -0
  111. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/ratings/__init__.py +0 -0
  112. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/ratings/collector.py +0 -0
  113. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/scaffold/__init__.py +0 -0
  114. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/scaffold/writer.py +0 -0
  115. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/__init__.py +0 -0
  116. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  117. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  118. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  119. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  120. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  121. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/entry_points.txt +0 -0
  122. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/requires.txt +0 -0
  123. {codeprobe-0.5.2 → codeprobe-0.5.3}/src/codeprobe.egg-info/top_level.txt +0 -0
  124. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_acceptance_compiler.py +0 -0
  125. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_acceptance_compiler_integration.py +0 -0
  126. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_adapter_contracts.py +0 -0
  127. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_adapters.py +0 -0
  128. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_analysis.py +0 -0
  129. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_api.py +0 -0
  130. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_artifact_scorer.py +0 -0
  131. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_assess.py +0 -0
  132. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_auth_cmd.py +0 -0
  133. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_changed_symbols.py +0 -0
  134. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_checkpoint.py +0 -0
  135. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_checkpoint_scoring.py +0 -0
  136. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_cli.py +0 -0
  137. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_comprehension.py +0 -0
  138. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_config_loader.py +0 -0
  139. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_contrib.py +0 -0
  140. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_convergence.py +0 -0
  141. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_criteria_loader.py +0 -0
  142. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ctrlc_integration.py +0 -0
  143. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_backends.py +0 -0
  144. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_core.py +0 -0
  145. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_integration.py +0 -0
  146. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_curator_tiers.py +0 -0
  147. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_doctor_cmd.py +0 -0
  148. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_adversarial_fixes.py +0 -0
  149. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_composite.py +0 -0
  150. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_e2e.py +0 -0
  151. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_matrix.py +0 -0
  152. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_scorer.py +0 -0
  153. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_dual_scoring_details.py +0 -0
  154. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_events.py +0 -0
  155. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_examples_dual.py +0 -0
  156. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor.py +0 -0
  157. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor_dual_isolation.py +0 -0
  158. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_executor_events.py +0 -0
  159. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_experiment_cmd.py +0 -0
  160. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_experiment_core.py +0 -0
  161. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ground_truth_schema.py +0 -0
  162. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_init_wizard.py +0 -0
  163. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_isolation.py +0 -0
  164. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_json_display.py +0 -0
  165. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_listeners_dual.py +0 -0
  166. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_llm.py +0 -0
  167. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loader.py +0 -0
  168. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loaders.py +0 -0
  169. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_loaders_dual.py +0 -0
  170. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mcp_families_mining.py +0 -0
  171. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mcp_validate.py +0 -0
  172. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_cli.py +0 -0
  173. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_goals.py +0 -0
  174. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_presets.py +0 -0
  175. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mine_profiles.py +0 -0
  176. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mining.py +0 -0
  177. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_mining_dual.py +0 -0
  178. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_models.py +0 -0
  179. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_multi_repo_e2e.py +0 -0
  180. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_multi_repo_mining.py +0 -0
  181. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_new_families.py +0 -0
  182. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_openai_compat.py +0 -0
  183. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_diff.py +0 -0
  184. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_registry.py +0 -0
  185. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_oracle_types.py +0 -0
  186. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_org_scale.py +0 -0
  187. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_pipeline_integration.py +0 -0
  188. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_preamble.py +0 -0
  189. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_preamble_cmd.py +0 -0
  190. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_probe.py +0 -0
  191. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_probe_adapter.py +0 -0
  192. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ratings.py +0 -0
  193. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_ratings_cmd.py +0 -0
  194. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_registry.py +0 -0
  195. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_regression_gate.py +0 -0
  196. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_release_gate.py +0 -0
  197. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_repo_hygiene.py +0 -0
  198. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_report_dual.py +0 -0
  199. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_run_config_resolution.py +0 -0
  200. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_safe_leg_score.py +0 -0
  201. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scaffold.py +0 -0
  202. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scaffold_upgrade.py +0 -0
  203. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scanner_refactor.py +0 -0
  204. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_score_result.py +0 -0
  205. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring.py +0 -0
  206. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring_extended.py +0 -0
  207. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_scoring_v2.py +0 -0
  208. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sdlc_ground_truth.py +0 -0
  209. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_secret_redaction.py +0 -0
  210. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_session.py +0 -0
  211. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sg_auth.py +0 -0
  212. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_sg_ground_truth.py +0 -0
  213. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_shell_shim.py +0 -0
  214. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_show_prompt.py +0 -0
  215. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_suite.py +0 -0
  216. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_suite_manifest.py +0 -0
  217. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_task_model.py +0 -0
  218. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_task_types.py +0 -0
  219. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_telemetry.py +0 -0
  220. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_validate_cmd.py +0 -0
  221. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_validate_dual.py +0 -0
  222. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_verifier.py +0 -0
  223. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_verify.py +0 -0
  224. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_weighted_checklist.py +0 -0
  225. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_weighted_f1.py +0 -0
  226. {codeprobe-0.5.2 → codeprobe-0.5.3}/tests/test_writer_dual.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.5.2"
3
+ version = "0.5.3"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -583,7 +583,31 @@ def compare_configs(
583
583
  elif speed_diff > 0:
584
584
  parts.append(f"{speed_diff:.1f}s slower")
585
585
 
586
- summary = f"{a.label} vs {b.label}: {', '.join(parts)} " f"\u2192 {winner} wins"
586
+ # Soften the verdict when the effect is negligible or the test is
587
+ # underpowered, so we don't confidently declare a "winner" on what may
588
+ # be noise. Thresholds:
589
+ # Cohen's d: |d| < 0.2 is "negligible" (Cohen 1988).
590
+ # Cliff's delta: |delta| < 0.147 is "negligible" (Romano et al. 2006).
591
+ # p-value > 0.05: not significant at the conventional threshold.
592
+ scores_tied = abs(score_diff) < 0.01
593
+ negligible_threshold = 0.2 if eff_method == "cohens_d" else 0.147
594
+ small_effect = (
595
+ eff_size is not None and abs(eff_size) < negligible_threshold
596
+ )
597
+ not_significant = p_val is not None and p_val > 0.05
598
+
599
+ if scores_tied:
600
+ verdict = "effectively tied"
601
+ elif small_effect and not_significant:
602
+ verdict = f"{winner} nominally ahead (not significant; small effect)"
603
+ elif small_effect:
604
+ verdict = f"{winner} nominally ahead (small effect size)"
605
+ elif not_significant:
606
+ verdict = f"{winner} nominally ahead (not significant at p=0.05)"
607
+ else:
608
+ verdict = f"{winner} wins"
609
+
610
+ summary = f"{a.label} vs {b.label}: {', '.join(parts)} \u2192 {verdict}"
587
611
 
588
612
  return PairwiseComparison(
589
613
  config_a=a.label,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -259,3 +259,60 @@ class TestComparePairwiseContinuousRouting:
259
259
  b_scores=[0.0, 1.0, 0.0],
260
260
  )
261
261
  assert cmp.effect_size_method == "cliffs_delta"
262
+
263
+
264
+ class TestVerdictSoftening:
265
+ """Summary text softens the verdict when the effect is small or p is high."""
266
+
267
+ def _run_compare(self, a_scores, b_scores):
268
+ from codeprobe.analysis.stats import compare_configs
269
+ from codeprobe.models.experiment import ConfigResults
270
+
271
+ a_cr = ConfigResults(
272
+ config="a",
273
+ completed=[_task(f"t{i}", s) for i, s in enumerate(a_scores)],
274
+ )
275
+ b_cr = ConfigResults(
276
+ config="b",
277
+ completed=[_task(f"t{i}", s) for i, s in enumerate(b_scores)],
278
+ )
279
+ return compare_configs(
280
+ summarize_config(a_cr), summarize_config(b_cr),
281
+ a_scores=list(a_scores), b_scores=list(b_scores),
282
+ )
283
+
284
+ def test_large_effect_with_power_says_wins(self) -> None:
285
+ """Consistent large gap across enough samples → unqualified winner."""
286
+ # N=8, unambiguous separation in every paired sample
287
+ a = [0.90, 0.88, 0.92, 0.85, 0.87, 0.93, 0.89, 0.91]
288
+ b = [0.10, 0.12, 0.15, 0.08, 0.18, 0.11, 0.14, 0.09]
289
+ cmp = self._run_compare(a, b)
290
+ assert "a wins" in cmp.summary
291
+ assert "nominally" not in cmp.summary
292
+
293
+ def test_small_effect_softens_verdict(self) -> None:
294
+ """Noisy data with a tiny gap → softened verdict.
295
+
296
+ The gap (~0.02) clears the 0.01 tied threshold, but high within-
297
+ config variance keeps Cohen's d < 0.2, which should trigger the
298
+ "nominally ahead (small effect)" wording.
299
+ """
300
+ a = [0.95, 0.10, 0.85, 0.20, 0.75, 0.30]
301
+ b = [0.93, 0.08, 0.83, 0.18, 0.72, 0.28]
302
+ cmp = self._run_compare(a, b)
303
+ assert "nominally ahead" in cmp.summary
304
+ # Should NOT say "wins" unqualified
305
+ assert " a wins" not in cmp.summary
306
+ assert " b wins" not in cmp.summary
307
+
308
+ def test_tied_scores_report_tied(self) -> None:
309
+ cmp = self._run_compare([0.5, 0.5], [0.5, 0.5])
310
+ assert "effectively tied" in cmp.summary
311
+
312
+ def test_real_experiment_numbers_produce_softened_verdict(self) -> None:
313
+ """Regression: the kubernetes-mcp-comparison scenario (N=5, d=0.076)."""
314
+ baseline = [0.75, 0.40, 0.11, 0.71, 0.14]
315
+ with_mcp = [0.71, 0.36, 0.08, 0.71, 0.14]
316
+ cmp = self._run_compare(baseline, with_mcp)
317
+ # score_diff ~0.02, small cohen's d, high p → softened verdict
318
+ assert "nominally ahead" in cmp.summary
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes