codeprobe 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {codeprobe-0.3.6 → codeprobe-0.3.8}/PKG-INFO +1 -1
  2. {codeprobe-0.3.6 → codeprobe-0.3.8}/pyproject.toml +1 -1
  3. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/__init__.py +1 -1
  4. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/init_cmd.py +69 -17
  5. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/mine_cmd.py +29 -9
  6. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/wizard.py +16 -7
  7. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/extractor.py +75 -5
  8. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/PKG-INFO +1 -1
  9. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_init_wizard.py +4 -4
  10. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mining.py +109 -0
  11. {codeprobe-0.3.6 → codeprobe-0.3.8}/LICENSE +0 -0
  12. {codeprobe-0.3.6 → codeprobe-0.3.8}/README.md +0 -0
  13. {codeprobe-0.3.6 → codeprobe-0.3.8}/setup.cfg +0 -0
  14. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/__main__.py +0 -0
  15. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/__init__.py +0 -0
  16. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/_base.py +0 -0
  17. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/claude.py +0 -0
  18. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/codex.py +0 -0
  19. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/copilot.py +0 -0
  20. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/openai_compat.py +0 -0
  21. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/protocol.py +0 -0
  22. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/session.py +0 -0
  23. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/adapters/telemetry.py +0 -0
  24. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/analysis/__init__.py +0 -0
  25. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/analysis/ranking.py +0 -0
  26. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/analysis/report.py +0 -0
  27. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/analysis/stats.py +0 -0
  28. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/api.py +0 -0
  29. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/assess/__init__.py +0 -0
  30. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/assess/heuristics.py +0 -0
  31. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/__init__.py +0 -0
  32. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/assess_cmd.py +0 -0
  33. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/auth_cmd.py +0 -0
  34. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/doctor_cmd.py +0 -0
  35. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/experiment_cmd.py +0 -0
  36. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/interpret_cmd.py +0 -0
  37. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/json_display.py +0 -0
  38. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/preamble_cmd.py +0 -0
  39. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/probe_cmd.py +0 -0
  40. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/ratings_cmd.py +0 -0
  41. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/rich_display.py +0 -0
  42. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/run_cmd.py +0 -0
  43. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/scaffold_cmd.py +0 -0
  44. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/validate_cmd.py +0 -0
  45. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/cli/yaml_writer.py +0 -0
  46. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/config/__init__.py +0 -0
  47. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/config/loader.py +0 -0
  48. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/config/redact.py +0 -0
  49. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/__init__.py +0 -0
  50. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/_shared.py +0 -0
  51. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/adaptive.py +0 -0
  52. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/counterfactual.py +0 -0
  53. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/debate.py +0 -0
  54. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/decision_tree.py +0 -0
  55. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/elo.py +0 -0
  56. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/fingerprint.py +0 -0
  57. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/mutation.py +0 -0
  58. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/pareto.py +0 -0
  59. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/sprt.py +0 -0
  60. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/contrib/tournament.py +0 -0
  61. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/__init__.py +0 -0
  62. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/__main__.py +0 -0
  63. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/checkpoint.py +0 -0
  64. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/events.py +0 -0
  65. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/executor.py +0 -0
  66. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/experiment.py +0 -0
  67. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/isolation.py +0 -0
  68. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/llm.py +0 -0
  69. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/mcp_discovery.py +0 -0
  70. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/preamble.py +0 -0
  71. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/registry.py +0 -0
  72. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/sandbox.py +0 -0
  73. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/core/scoring.py +0 -0
  74. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/loaders/__init__.py +0 -0
  75. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/loaders/suite.py +0 -0
  76. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/__init__.py +0 -0
  77. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/_graph.py +0 -0
  78. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/_lang.py +0 -0
  79. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/comprehension.py +0 -0
  80. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/comprehension_writer.py +0 -0
  81. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/curator.py +0 -0
  82. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/curator_backends.py +0 -0
  83. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/curator_tiers.py +0 -0
  84. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/multi_repo.py +0 -0
  85. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/org_scale.py +0 -0
  86. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/org_scale_families.py +0 -0
  87. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/org_scale_oracle.py +0 -0
  88. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/org_scale_scanner.py +0 -0
  89. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/org_scale_validate.py +0 -0
  90. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/sg_auth.py +0 -0
  91. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/sg_ground_truth.py +0 -0
  92. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/sources.py +0 -0
  93. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/mining/writer.py +0 -0
  94. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/__init__.py +0 -0
  95. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/evalrc.py +0 -0
  96. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/experiment.py +0 -0
  97. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/preamble.py +0 -0
  98. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/suite.py +0 -0
  99. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/models/task.py +0 -0
  100. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/preambles/__init__.py +0 -0
  101. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/preambles/github.md +0 -0
  102. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/preambles/sourcegraph.md +0 -0
  103. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/probe/__init__.py +0 -0
  104. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/probe/adapter.py +0 -0
  105. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/probe/generator.py +0 -0
  106. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/probe/writer.py +0 -0
  107. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/ratings/__init__.py +0 -0
  108. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/ratings/collector.py +0 -0
  109. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/scaffold/__init__.py +0 -0
  110. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/scaffold/writer.py +0 -0
  111. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/templates/__init__.py +0 -0
  112. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/templates/evalrc-mcp-comparison.yaml +0 -0
  113. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/templates/evalrc-model-comparison.yaml +0 -0
  114. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe/templates/evalrc-prompt-comparison.yaml +0 -0
  115. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/SOURCES.txt +0 -0
  116. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/dependency_links.txt +0 -0
  117. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/entry_points.txt +0 -0
  118. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/requires.txt +0 -0
  119. {codeprobe-0.3.6 → codeprobe-0.3.8}/src/codeprobe.egg-info/top_level.txt +0 -0
  120. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_adapter_contracts.py +0 -0
  121. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_adapters.py +0 -0
  122. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_analysis.py +0 -0
  123. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_api.py +0 -0
  124. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_artifact_scorer.py +0 -0
  125. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_assess.py +0 -0
  126. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_auth_cmd.py +0 -0
  127. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_changed_symbols.py +0 -0
  128. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_checkpoint.py +0 -0
  129. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_checkpoint_scoring.py +0 -0
  130. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_cli.py +0 -0
  131. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_comprehension.py +0 -0
  132. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_config_loader.py +0 -0
  133. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_contrib.py +0 -0
  134. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_ctrlc_integration.py +0 -0
  135. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_curator_backends.py +0 -0
  136. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_curator_core.py +0 -0
  137. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_curator_integration.py +0 -0
  138. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_curator_tiers.py +0 -0
  139. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_doctor_cmd.py +0 -0
  140. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_events.py +0 -0
  141. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_executor.py +0 -0
  142. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_executor_events.py +0 -0
  143. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_experiment_cmd.py +0 -0
  144. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_experiment_core.py +0 -0
  145. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_isolation.py +0 -0
  146. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_json_display.py +0 -0
  147. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_llm.py +0 -0
  148. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_loaders.py +0 -0
  149. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mcp_families_mining.py +0 -0
  150. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mcp_validate.py +0 -0
  151. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mine_cli.py +0 -0
  152. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mine_goals.py +0 -0
  153. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mine_presets.py +0 -0
  154. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_mine_profiles.py +0 -0
  155. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_models.py +0 -0
  156. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_multi_repo_e2e.py +0 -0
  157. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_multi_repo_mining.py +0 -0
  158. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_new_families.py +0 -0
  159. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_openai_compat.py +0 -0
  160. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_oracle_types.py +0 -0
  161. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_org_scale.py +0 -0
  162. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_pipeline_integration.py +0 -0
  163. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_preamble.py +0 -0
  164. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_preamble_cmd.py +0 -0
  165. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_probe.py +0 -0
  166. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_probe_adapter.py +0 -0
  167. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_ratings.py +0 -0
  168. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_ratings_cmd.py +0 -0
  169. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_registry.py +0 -0
  170. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_run_config_resolution.py +0 -0
  171. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_scaffold.py +0 -0
  172. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_scanner_refactor.py +0 -0
  173. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_scoring.py +0 -0
  174. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_secret_redaction.py +0 -0
  175. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_session.py +0 -0
  176. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_sg_auth.py +0 -0
  177. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_sg_ground_truth.py +0 -0
  178. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_shell_shim.py +0 -0
  179. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_show_prompt.py +0 -0
  180. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_suite.py +0 -0
  181. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_suite_manifest.py +0 -0
  182. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_telemetry.py +0 -0
  183. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_validate_cmd.py +0 -0
  184. {codeprobe-0.3.6 → codeprobe-0.3.8}/tests/test_weighted_f1.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codeprobe"
3
- version = "0.3.6"
3
+ version = "0.3.8"
4
4
  description = "Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results."
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -1,3 +1,3 @@
1
1
  """codeprobe — Benchmark AI coding agents against your own codebase."""
2
2
 
3
- __version__ = "0.3.6"
3
+ __version__ = "0.3.8"
@@ -153,17 +153,35 @@ def _detect_sourcegraph_in_mcp(
153
153
 
154
154
 
155
155
  def _prompt_sourcegraph_token() -> str:
156
- """Prompt for Sourcegraph access token, checking env var first."""
157
- import os
156
+ """Resolve Sourcegraph token: cached auth > env var > interactive prompt."""
157
+ from codeprobe.mining.sg_auth import AuthError, get_valid_token
158
+
159
+ # 1. Check cached auth and env var via the standard auth resolver
160
+ try:
161
+ cached = get_valid_token()
162
+ masked = cached.access_token[:4] + "..." + cached.access_token[-4:]
163
+ source = "cached" if not cached.refresh_token else "cached (refreshable)"
164
+ click.echo(f" Found Sourcegraph credentials ({masked}, {source})")
165
+ if click.confirm(" Use these credentials?", default=True):
166
+ return cached.access_token
167
+ except AuthError:
168
+ pass
169
+
170
+ # 2. Offer OAuth-style flow via `codeprobe auth sourcegraph`
171
+ click.echo()
172
+ click.echo(" No cached Sourcegraph credentials found.")
173
+ click.echo(" Options:")
174
+ click.echo(" 1. Paste a Personal Access Token now")
175
+ click.echo(" 2. Run `codeprobe auth sourcegraph` first (recommended)")
176
+ choice = click.prompt(" Choose", type=click.IntRange(1, 2), default=1)
158
177
 
159
- env_token = os.environ.get("SOURCEGRAPH_TOKEN", "")
160
- if env_token:
161
- masked = env_token[:4] + "..." + env_token[-4:] if len(env_token) > 8 else "***"
162
- click.echo(f" Found SOURCEGRAPH_TOKEN in environment ({masked})")
163
- if click.confirm(" Use this token?", default=True):
164
- return env_token
178
+ if choice == 2:
179
+ click.echo()
180
+ click.echo(" Run this command, then re-run `codeprobe init`:")
181
+ click.echo(" codeprobe auth sourcegraph")
182
+ raise SystemExit(0)
165
183
 
166
- return click.prompt("Sourcegraph access token")
184
+ return click.prompt(" Sourcegraph access token", hide_input=True)
167
185
 
168
186
 
169
187
  def _prompt_sourcegraph_url() -> str | None:
@@ -176,6 +194,32 @@ def _prompt_sourcegraph_url() -> str | None:
176
194
  return url if url else None
177
195
 
178
196
 
197
+ def _extract_sourcegraph_mcp(
198
+ discovered: list[tuple[Path, list[str]]],
199
+ ) -> dict | None:
200
+ """Load the Sourcegraph MCP config from a discovered config file.
201
+
202
+ Returns the full MCP config dict with only the Sourcegraph server,
203
+ or None if no Sourcegraph server is found.
204
+ """
205
+ import json
206
+
207
+ sg_names = {"sourcegraph", "sg", "sourcegraph-mcp"}
208
+ for path, server_names in discovered:
209
+ matching = [n for n in server_names if n.lower() in sg_names]
210
+ if not matching:
211
+ continue
212
+ try:
213
+ data = json.loads(path.read_text(encoding="utf-8"))
214
+ except (json.JSONDecodeError, OSError):
215
+ continue
216
+ servers = data.get("mcpServers", {})
217
+ for name in matching:
218
+ if name in servers:
219
+ return {"mcpServers": {name: servers[name]}}
220
+ return None
221
+
222
+
179
223
  def _goal_mcp(agents: list[str], name: str) -> _Result:
180
224
  """Goal 1: MCP comparison prompts."""
181
225
  agent = _prompt_agent(agents)
@@ -183,19 +227,27 @@ def _goal_mcp(agents: list[str], name: str) -> _Result:
183
227
 
184
228
  # Check if Sourcegraph is available in discovered MCP configs
185
229
  discovered = discover_mcp_configs()
186
- use_sourcegraph = False
187
230
 
188
231
  if _detect_sourcegraph_in_mcp(discovered):
189
232
  click.echo()
190
233
  click.echo("Detected Sourcegraph MCP server in your configuration.")
191
- click.echo("codeprobe can use the HTTP endpoint for better performance.")
192
- use_sourcegraph = click.confirm("Use Sourcegraph HTTP MCP?", default=True)
193
- else:
194
- click.echo()
195
- click.echo("Would you like to use Sourcegraph as the MCP server?")
196
- use_sourcegraph = click.confirm("Use Sourcegraph?", default=False)
234
+ if click.confirm("Use this Sourcegraph config?", default=True):
235
+ sg_config = _extract_sourcegraph_mcp(discovered)
236
+ if sg_config:
237
+ return ask_mcp_comparison(
238
+ experiment_name=name,
239
+ agent=agent,
240
+ model=model,
241
+ mcp_config=sg_config,
242
+ )
243
+
244
+ click.echo()
245
+ click.echo("MCP server options:")
246
+ click.echo(" 1. Use a Sourcegraph access token (PAT)")
247
+ click.echo(" 2. Use an existing MCP config file")
248
+ choice = click.prompt("Choose", type=click.IntRange(1, 2), default=1)
197
249
 
198
- if use_sourcegraph:
250
+ if choice == 1:
199
251
  token = _prompt_sourcegraph_token()
200
252
  sg_url = _prompt_sourcegraph_url()
201
253
  return ask_mcp_comparison(
@@ -352,24 +352,31 @@ def _show_results_table(tasks: list["Task"]) -> None:
352
352
  click.echo()
353
353
 
354
354
 
355
- def _show_next_steps(repo_path: Path, min_files: int) -> None:
355
+ def _show_next_steps(
356
+ repo_path: Path, min_files: int, *, llm_enriched: bool = False
357
+ ) -> None:
356
358
  """Phase 6: Show next steps."""
357
359
  click.echo("Next steps:")
358
360
  click.echo()
359
- click.echo(" 1. Review and enrich task instructions (recommended):")
360
- click.echo(" codeprobe mine {path} --enrich".format(path=repo_path))
361
- click.echo()
362
- click.echo(" 2. Run the eval:")
361
+ step = 1
362
+ if not llm_enriched:
363
+ click.echo(f" {step}. Review and enrich task instructions (recommended):")
364
+ click.echo(" codeprobe mine {path} --enrich".format(path=repo_path))
365
+ click.echo()
366
+ step += 1
367
+ click.echo(f" {step}. Run the eval:")
363
368
  click.echo(" codeprobe run {path} --agent claude".format(path=repo_path))
364
369
  click.echo()
365
- click.echo(" 3. Try a different model:")
370
+ step += 1
371
+ click.echo(f" {step}. Try a different model:")
366
372
  click.echo(
367
373
  " codeprobe run {path} --agent claude --model claude-sonnet-4-6".format(
368
374
  path=repo_path,
369
375
  )
370
376
  )
371
377
  click.echo()
372
- click.echo(" 4. Set a cost budget:")
378
+ step += 1
379
+ click.echo(f" {step}. Set a cost budget:")
373
380
  click.echo(
374
381
  " codeprobe run {path} --agent claude --max-cost-usd 5.00".format(
375
382
  path=repo_path,
@@ -558,6 +565,15 @@ def _interactive_config(
558
565
  )
559
566
 
560
567
 
568
+ def _was_llm_used(no_llm: bool) -> bool:
569
+ """Check if LLM was available and used for instruction generation."""
570
+ if no_llm:
571
+ return False
572
+ from codeprobe.core.llm import llm_available
573
+
574
+ return llm_available()
575
+
576
+
561
577
  def _enrich_sdlc_tasks(
562
578
  tasks: list["Task"],
563
579
  mine_result: "MineResult",
@@ -1063,6 +1079,7 @@ def _dispatch_sdlc(
1063
1079
  return
1064
1080
 
1065
1081
  tasks = _enrich_sdlc_tasks(tasks, mine_result, no_llm, enrich)
1082
+ llm_used = _was_llm_used(no_llm)
1066
1083
 
1067
1084
  tasks_dir = _clear_tasks_dir(repo_path)
1068
1085
  for task in tasks:
@@ -1078,6 +1095,7 @@ def _dispatch_sdlc(
1078
1095
  subsystems,
1079
1096
  repo_path,
1080
1097
  task_types=("sdlc_code_change",),
1098
+ llm_enriched=llm_used,
1081
1099
  )
1082
1100
 
1083
1101
 
@@ -1255,7 +1273,7 @@ def _dispatch_mixed(
1255
1273
  if subsystems:
1256
1274
  click.echo(f"Subsystems: {', '.join(subsystems)}")
1257
1275
  click.echo()
1258
- _show_next_steps(repo_path, min_files)
1276
+ _show_next_steps(repo_path, min_files, llm_enriched=_was_llm_used(no_llm))
1259
1277
 
1260
1278
 
1261
1279
  def _finish_mine_output(
@@ -1266,6 +1284,8 @@ def _finish_mine_output(
1266
1284
  subsystems: tuple[str, ...],
1267
1285
  repo_path: Path,
1268
1286
  task_types: tuple[str, ...] = (),
1287
+ *,
1288
+ llm_enriched: bool = False,
1269
1289
  ) -> None:
1270
1290
  """Shared output: quality warnings, path, subsystems, next steps."""
1271
1291
  from codeprobe.mining.writer import write_suite_manifest
@@ -1291,7 +1311,7 @@ def _finish_mine_output(
1291
1311
  if subsystems:
1292
1312
  click.echo(f"Subsystems: {', '.join(subsystems)}")
1293
1313
  click.echo()
1294
- _show_next_steps(repo_path, 0)
1314
+ _show_next_steps(repo_path, 0, llm_enriched=llm_enriched)
1295
1315
 
1296
1316
 
1297
1317
  def run_mine(
@@ -45,26 +45,35 @@ def ask_mcp_comparison(
45
45
  agent: str,
46
46
  model: str | None,
47
47
  mcp_config_path: str | None = None,
48
+ mcp_config: dict | None = None,
48
49
  sourcegraph_token: str | None = None,
49
50
  sourcegraph_url: str | None = None,
50
51
  ) -> tuple[EvalrcConfig, list[ExperimentConfig]]:
51
52
  """Goal 1: Compare baseline agent vs MCP-augmented agent.
52
53
 
53
- When *sourcegraph_token* is provided, generates an HTTP-based Sourcegraph
54
- MCP config with an ``Authorization`` header and adds the ``sourcegraph``
55
- preamble. Otherwise falls back to loading the MCP config from
56
- *mcp_config_path*.
54
+ Resolution order for MCP config:
55
+ 1. *mcp_config* pre-built dict (e.g. from discovered Claude Code config)
56
+ 2. *sourcegraph_token* build HTTP config with Authorization header
57
+ 3. *mcp_config_path* — load from a JSON file on disk
57
58
  """
58
- if sourcegraph_token is not None:
59
+ if mcp_config is not None:
60
+ mcp_data = mcp_config
61
+ # Detect if this is a Sourcegraph config for preamble
62
+ servers = mcp_data.get("mcpServers", {})
63
+ sg_names = {"sourcegraph", "sg", "sourcegraph-mcp"}
64
+ preambles: tuple[str, ...] = (
65
+ ("sourcegraph",) if any(k.lower() in sg_names for k in servers) else ()
66
+ )
67
+ elif sourcegraph_token is not None:
59
68
  mcp_data = build_sourcegraph_mcp_config(
60
69
  token=sourcegraph_token,
61
70
  url=sourcegraph_url or _DEFAULT_SOURCEGRAPH_URL,
62
71
  )
63
- preambles: tuple[str, ...] = ("sourcegraph",)
72
+ preambles = ("sourcegraph",)
64
73
  else:
65
74
  if mcp_config_path is None:
66
75
  raise click.BadParameter(
67
- "Either sourcegraph_token or mcp_config_path must be provided."
76
+ "Provide mcp_config, sourcegraph_token, or mcp_config_path."
68
77
  )
69
78
  mcp_data = _load_json(mcp_config_path)
70
79
  preambles = ()
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import json as _json
6
6
  import logging
7
7
  import re
8
+ import shlex
8
9
  import subprocess
9
10
  from dataclasses import dataclass, replace
10
11
  from pathlib import Path
@@ -104,6 +105,39 @@ def _get_changed_files(merge_sha: str, repo_path: Path) -> list[str]:
104
105
  return [f for f in result.stdout.strip().splitlines() if f.strip()]
105
106
 
106
107
 
108
+ def _get_deleted_dirs(merge_sha: str, repo_path: Path) -> set[str]:
109
+ """Return directories that were entirely deleted in a merge commit.
110
+
111
+ Uses ``git diff --diff-filter=D --name-status`` to find deleted files,
112
+ then returns the set of parent directories where ALL files were deleted.
113
+ """
114
+ try:
115
+ result = subprocess.run(
116
+ [
117
+ "git",
118
+ "diff",
119
+ f"{merge_sha}^..{merge_sha}",
120
+ "--diff-filter=D",
121
+ "--name-only",
122
+ ],
123
+ cwd=str(repo_path),
124
+ capture_output=True,
125
+ text=True,
126
+ timeout=_DIFF_STAT_TIMEOUT,
127
+ )
128
+ if result.returncode != 0:
129
+ return set()
130
+ except (subprocess.TimeoutExpired, OSError):
131
+ return set()
132
+
133
+ deleted_files = [f for f in result.stdout.strip().splitlines() if f.strip()]
134
+ if not deleted_files:
135
+ return set()
136
+
137
+ # Collect parent directories of deleted files
138
+ return {str(Path(f).parent) for f in deleted_files}
139
+
140
+
107
141
  def extract_subsystems(
108
142
  prs: list[MergedPR],
109
143
  repo_path: Path,
@@ -156,26 +190,57 @@ _ISSUE_REF_PATTERN = re.compile(
156
190
  )
157
191
 
158
192
 
159
- def _build_test_command(language: str, test_files: list[str]) -> str:
193
+ def _build_test_command(
194
+ language: str,
195
+ test_files: list[str],
196
+ repo_path: Path | None = None,
197
+ deleted_dirs: set[str] | None = None,
198
+ ) -> str:
160
199
  """Build a targeted test command from language and test file paths.
161
200
 
162
201
  Supports Python (pytest), Go (go test), and JS/TS (npm test with jest pattern).
163
202
  Falls back to the generic test.sh for unsupported languages or empty file lists.
203
+
204
+ When *repo_path* is provided and exists on disk, validates that referenced
205
+ paths exist in the target repo and drops any that don't.
206
+
207
+ When *deleted_dirs* is provided and ALL test packages fall within deleted
208
+ directories, generates a removal-verification command (checks dirs no longer
209
+ exist) instead of a test command.
164
210
  """
211
+ validate = repo_path is not None and repo_path.is_dir()
212
+
165
213
  if not test_files:
166
214
  return _DEFAULT_TEST_COMMAND
167
215
 
168
216
  if language == "python":
217
+ if validate:
218
+ test_files = [f for f in test_files if (repo_path / f).exists()]
219
+ if not test_files:
220
+ return _DEFAULT_TEST_COMMAND
169
221
  return f"pytest {' '.join(test_files)}"
170
222
 
171
223
  if language == "go":
172
- # Extract unique package directories from test file paths
173
224
  packages = sorted({str(Path(f).parent) for f in test_files})
225
+
226
+ # Removal task: all test packages were deleted in this merge
227
+ if deleted_dirs and all(
228
+ any(pkg == d or pkg.startswith(d + "/") for d in deleted_dirs)
229
+ for pkg in packages
230
+ ):
231
+ checks = " && ".join(
232
+ f"test ! -d {shlex.quote('./' + pkg)}" for pkg in packages
233
+ )
234
+ return f"bash -c {shlex.quote(checks)}"
235
+
236
+ if validate:
237
+ packages = [p for p in packages if (repo_path / p).is_dir()]
238
+ if not packages:
239
+ return _DEFAULT_TEST_COMMAND
174
240
  go_paths = " ".join(f"./{pkg}/..." for pkg in packages)
175
241
  return f"go test {go_paths}"
176
242
 
177
243
  if language in ("javascript", "typescript"):
178
- # Use the first test file's basename as the pattern
179
244
  pattern = Path(test_files[0]).name
180
245
  return f"npm test -- --testPathPattern={pattern}"
181
246
 
@@ -471,8 +536,13 @@ def extract_task_from_merge(
471
536
  )
472
537
  return None
473
538
 
474
- # Build a verification command targeted to the detected test files and language
475
- test_command = _build_test_command(language, test_files)
539
+ # Detect deleted directories for removal-task verification
540
+ deleted_dirs = _get_deleted_dirs(merge_sha, repo_path)
541
+
542
+ # Build a verification command targeted to the detected test files and language.
543
+ # Pass repo_path so missing packages (e.g. stripped vendor dirs) are filtered out.
544
+ # Pass deleted_dirs so removal tasks verify non-existence instead of compilation.
545
+ test_command = _build_test_command(language, test_files, repo_path, deleted_dirs)
476
546
 
477
547
  # Hard gate: stub test command means verification is meaningless — skip
478
548
  if test_command == _DEFAULT_TEST_COMMAND:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeprobe
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: Benchmark AI coding agents against your own codebase. Mine real tasks from repo history, run agents, interpret results.
5
5
  Author: codeprobe contributors
6
6
  License-Expression: Apache-2.0
@@ -473,8 +473,8 @@ class TestInitCliIntegration:
473
473
 
474
474
  runner = CliRunner()
475
475
  # Inputs: goal=1, experiment name (enter=default), agent (enter=default),
476
- # model (enter=skip), decline Sourcegraph, mcp config path
477
- input_text = f"1\n\nclaude\n\nN\n{mcp_file}\n"
476
+ # model (enter=skip), choose=2 (MCP config file), mcp config path
477
+ input_text = f"1\n\nclaude\n\n2\n{mcp_file}\n"
478
478
  result = runner.invoke(main, ["init", str(tmp_path)], input=input_text)
479
479
  assert result.exit_code == 0, result.output
480
480
  assert not (tmp_path / ".evalrc.yaml").exists()
@@ -489,8 +489,8 @@ class TestInitCliIntegration:
489
489
 
490
490
  runner = CliRunner()
491
491
  # Inputs: goal=1, name=default, agent=claude, model=skip,
492
- # use Sourcegraph=Y, token, url=default (enter)
493
- input_text = "1\n\nclaude\n\nY\ntok_test123\n\n"
492
+ # choose=1 (PAT), choose=1 (paste now), token, url=default (enter)
493
+ input_text = "1\n\nclaude\n\n1\n1\ntok_test123\n\n"
494
494
  result = runner.invoke(main, ["init", str(tmp_path)], input=input_text)
495
495
  assert result.exit_code == 0, result.output
496
496
  assert not (tmp_path / ".evalrc.yaml").exists()
@@ -617,6 +617,111 @@ class TestBuildTestCommand:
617
617
  result = _build_test_command("python", [])
618
618
  assert result == "bash tests/test.sh"
619
619
 
620
+ def test_go_filters_missing_packages(self, tmp_path: Path) -> None:
621
+ """Go packages that don't exist in repo_path are dropped."""
622
+ (tmp_path / "pkg" / "real").mkdir(parents=True)
623
+ result = _build_test_command(
624
+ "go",
625
+ ["pkg/real/foo_test.go", "pkg/missing/bar_test.go"],
626
+ repo_path=tmp_path,
627
+ )
628
+ assert "./pkg/real/..." in result
629
+ assert "missing" not in result
630
+
631
+ def test_go_all_missing_falls_back(self, tmp_path: Path) -> None:
632
+ """When all Go packages are missing, falls back to default."""
633
+ result = _build_test_command(
634
+ "go",
635
+ ["vendor/gone/x_test.go"],
636
+ repo_path=tmp_path,
637
+ )
638
+ assert result == "bash tests/test.sh"
639
+
640
+ def test_python_filters_missing_files(self, tmp_path: Path) -> None:
641
+ """Python test files that don't exist in repo_path are dropped."""
642
+ (tmp_path / "tests").mkdir()
643
+ (tmp_path / "tests" / "test_real.py").touch()
644
+ result = _build_test_command(
645
+ "python",
646
+ ["tests/test_real.py", "tests/test_gone.py"],
647
+ repo_path=tmp_path,
648
+ )
649
+ assert "test_real.py" in result
650
+ assert "test_gone.py" not in result
651
+
652
+ def test_no_repo_path_skips_validation(self) -> None:
653
+ """Without repo_path, all paths are kept (backward compat)."""
654
+ result = _build_test_command(
655
+ "go",
656
+ ["pkg/might/not/exist/x_test.go"],
657
+ )
658
+ assert "./pkg/might/not/exist/..." in result
659
+
660
+
661
+ # ---------------------------------------------------------------------------
662
+ # _build_test_command removal verification tests
663
+ # ---------------------------------------------------------------------------
664
+
665
+
666
+ class TestBuildTestCommandRemoval:
667
+ def test_go_removal_when_all_packages_deleted(self) -> None:
668
+ """When all test packages are in deleted dirs, generate removal check."""
669
+ result = _build_test_command(
670
+ "go",
671
+ ["cluster/images/etcd/migrate/migrate_test.go"],
672
+ deleted_dirs={"cluster/images/etcd/migrate"},
673
+ )
674
+ assert "test ! -d" in result
675
+ assert "go test" not in result
676
+ assert "cluster/images/etcd/migrate" in result
677
+
678
+ def test_go_removal_nested_deleted_dir(self) -> None:
679
+ """Packages under a deleted parent are detected as removal."""
680
+ result = _build_test_command(
681
+ "go",
682
+ [
683
+ "legacy/pkg/a/a_test.go",
684
+ "legacy/pkg/b/b_test.go",
685
+ ],
686
+ deleted_dirs={"legacy"},
687
+ )
688
+ assert "test ! -d" in result
689
+ assert "legacy/pkg/a" in result
690
+ assert "legacy/pkg/b" in result
691
+
692
+ def test_go_mixed_deleted_and_existing(self, tmp_path: Path) -> None:
693
+ """When some packages are deleted but others exist, use go test for survivors."""
694
+ (tmp_path / "pkg" / "alive").mkdir(parents=True)
695
+ result = _build_test_command(
696
+ "go",
697
+ ["pkg/alive/x_test.go", "pkg/dead/y_test.go"],
698
+ repo_path=tmp_path,
699
+ deleted_dirs={"pkg/dead"},
700
+ )
701
+ # Not all packages are deleted, so no removal check
702
+ assert "go test" in result
703
+ assert "pkg/alive" in result
704
+ # Dead package filtered by path validation
705
+ assert "pkg/dead" not in result
706
+
707
+ def test_go_no_deleted_dirs_normal_flow(self) -> None:
708
+ """Without deleted_dirs, normal go test command."""
709
+ result = _build_test_command(
710
+ "go",
711
+ ["pkg/auth/auth_test.go"],
712
+ deleted_dirs=None,
713
+ )
714
+ assert result == "go test ./pkg/auth/..."
715
+
716
+ def test_go_empty_deleted_dirs_normal_flow(self) -> None:
717
+ """Empty deleted_dirs set treated same as None."""
718
+ result = _build_test_command(
719
+ "go",
720
+ ["pkg/auth/auth_test.go"],
721
+ deleted_dirs=set(),
722
+ )
723
+ assert result == "go test ./pkg/auth/..."
724
+
620
725
 
621
726
  # ---------------------------------------------------------------------------
622
727
  # score_pr_quality tests
@@ -1113,6 +1218,10 @@ class TestRunMineClearsStale:
1113
1218
  stale_dir.mkdir(parents=True)
1114
1219
  (stale_dir / "instruction.md").write_text("stale")
1115
1220
 
1221
+ # Create test file paths so path validation passes
1222
+ (tmp_path / "tests").mkdir(exist_ok=True)
1223
+ (tmp_path / "tests" / "test_auth.py").touch()
1224
+
1116
1225
  merge_log = "aaaa1111bbbb2222 Merge pull request #1 from fix/auth\n"
1117
1226
  diff_files = "src/auth.py\ntests/test_auth.py\n"
1118
1227
 
File without changes
File without changes
File without changes