open-atp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. open_atp-0.1.0/.env.example +26 -0
  2. open_atp-0.1.0/.github/workflows/ci-python.yml +115 -0
  3. open_atp-0.1.0/.github/workflows/release.yml +69 -0
  4. open_atp-0.1.0/.gitignore +32 -0
  5. open_atp-0.1.0/.readthedocs.yaml +20 -0
  6. open_atp-0.1.0/AGENTS.md +260 -0
  7. open_atp-0.1.0/CLAUDE.md +1 -0
  8. open_atp-0.1.0/LICENSE +21 -0
  9. open_atp-0.1.0/Makefile +87 -0
  10. open_atp-0.1.0/PKG-INFO +28 -0
  11. open_atp-0.1.0/README.md +88 -0
  12. open_atp-0.1.0/THIRD-PARTY-LICENSES.txt +79 -0
  13. open_atp-0.1.0/codecov.yml +24 -0
  14. open_atp-0.1.0/docs/_static/custom.css +200 -0
  15. open_atp-0.1.0/docs/_static/logo_dark.svg +1 -0
  16. open_atp-0.1.0/docs/_static/logo_light.svg +1 -0
  17. open_atp-0.1.0/docs/api/backends.md +47 -0
  18. open_atp-0.1.0/docs/api/examples.md +16 -0
  19. open_atp-0.1.0/docs/api/harness.md +61 -0
  20. open_atp-0.1.0/docs/api/index.md +21 -0
  21. open_atp-0.1.0/docs/api/lean.md +46 -0
  22. open_atp-0.1.0/docs/api/provers.md +78 -0
  23. open_atp-0.1.0/docs/api/verify.md +36 -0
  24. open_atp-0.1.0/docs/brand/logo.ai +2195 -3
  25. open_atp-0.1.0/docs/brand/logo_dark.svg +1 -0
  26. open_atp-0.1.0/docs/brand/logo_light.svg +1 -0
  27. open_atp-0.1.0/docs/compute_backend/docker.md +74 -0
  28. open_atp-0.1.0/docs/compute_backend/index.md +12 -0
  29. open_atp-0.1.0/docs/compute_backend/modal.md +70 -0
  30. open_atp-0.1.0/docs/conf.py +202 -0
  31. open_atp-0.1.0/docs/examples.md +64 -0
  32. open_atp-0.1.0/docs/index.md +43 -0
  33. open_atp-0.1.0/docs/installation.md +48 -0
  34. open_atp-0.1.0/docs/provers/aristotle.md +57 -0
  35. open_atp-0.1.0/docs/provers/axprover.md +92 -0
  36. open_atp-0.1.0/docs/provers/claude_code.md +110 -0
  37. open_atp-0.1.0/docs/provers/codex.md +97 -0
  38. open_atp-0.1.0/docs/provers/index.md +52 -0
  39. open_atp-0.1.0/docs/provers/numina.md +55 -0
  40. open_atp-0.1.0/docs/provers/opencode.md +101 -0
  41. open_atp-0.1.0/docs/provers/vibe.md +115 -0
  42. open_atp-0.1.0/docs/user_guide/index.md +14 -0
  43. open_atp-0.1.0/docs/user_guide/run_provers.md +113 -0
  44. open_atp-0.1.0/images/Dockerfile +103 -0
  45. open_atp-0.1.0/images/lean/lakefile.toml +7 -0
  46. open_atp-0.1.0/images/lean/lean-toolchain +1 -0
  47. open_atp-0.1.0/lefthook.yml +14 -0
  48. open_atp-0.1.0/pyproject.toml +86 -0
  49. open_atp-0.1.0/src/open_atp/__init__.py +45 -0
  50. open_atp-0.1.0/src/open_atp/__main__.py +241 -0
  51. open_atp-0.1.0/src/open_atp/backends/__init__.py +25 -0
  52. open_atp-0.1.0/src/open_atp/backends/base.py +355 -0
  53. open_atp-0.1.0/src/open_atp/backends/docker.py +365 -0
  54. open_atp-0.1.0/src/open_atp/backends/modal.py +540 -0
  55. open_atp-0.1.0/src/open_atp/config.py +163 -0
  56. open_atp-0.1.0/src/open_atp/examples/__init__.py +82 -0
  57. open_atp-0.1.0/src/open_atp/examples/assets/AbsMulLt.lean +7 -0
  58. open_atp-0.1.0/src/open_atp/examples/assets/InterSubset.lean +9 -0
  59. open_atp-0.1.0/src/open_atp/examples/assets/InterUnionDistrib.lean +9 -0
  60. open_atp-0.1.0/src/open_atp/examples/assets/MulReorder.lean +7 -0
  61. open_atp-0.1.0/src/open_atp/examples/assets/SmulAdd.lean +9 -0
  62. open_atp-0.1.0/src/open_atp/harness/__init__.py +49 -0
  63. open_atp-0.1.0/src/open_atp/harness/_catalog.py +64 -0
  64. open_atp-0.1.0/src/open_atp/harness/_numina.py +78 -0
  65. open_atp-0.1.0/src/open_atp/harness/_paths.py +53 -0
  66. open_atp-0.1.0/src/open_atp/harness/assets/configs/mcp.json +10 -0
  67. open_atp-0.1.0/src/open_atp/harness/assets/scripts/axprover_agent.sh +51 -0
  68. open_atp-0.1.0/src/open_atp/harness/assets/scripts/claude_code_agent.sh +24 -0
  69. open_atp-0.1.0/src/open_atp/harness/assets/scripts/codex_agent.sh +20 -0
  70. open_atp-0.1.0/src/open_atp/harness/assets/scripts/opencode_agent.sh +15 -0
  71. open_atp-0.1.0/src/open_atp/harness/assets/scripts/vibe_agent.sh +23 -0
  72. open_atp-0.1.0/src/open_atp/harness/assets/vibe/lean-standin.toml +34 -0
  73. open_atp-0.1.0/src/open_atp/harness/axprover.py +242 -0
  74. open_atp-0.1.0/src/open_atp/harness/base.py +397 -0
  75. open_atp-0.1.0/src/open_atp/harness/claude_code.py +153 -0
  76. open_atp-0.1.0/src/open_atp/harness/codex.py +114 -0
  77. open_atp-0.1.0/src/open_atp/harness/cost.py +46 -0
  78. open_atp-0.1.0/src/open_atp/harness/opencode.py +149 -0
  79. open_atp-0.1.0/src/open_atp/harness/vibe.py +226 -0
  80. open_atp-0.1.0/src/open_atp/images/__init__.py +55 -0
  81. open_atp-0.1.0/src/open_atp/lean.py +233 -0
  82. open_atp-0.1.0/src/open_atp/provers/__init__.py +24 -0
  83. open_atp-0.1.0/src/open_atp/provers/agent_prover.py +348 -0
  84. open_atp-0.1.0/src/open_atp/provers/aristotle.py +408 -0
  85. open_atp-0.1.0/src/open_atp/provers/base.py +227 -0
  86. open_atp-0.1.0/src/open_atp/provers/numina.py +535 -0
  87. open_atp-0.1.0/src/open_atp/provers/numina_tracker.py +436 -0
  88. open_atp-0.1.0/src/open_atp/verify.py +344 -0
  89. open_atp-0.1.0/tests/backends/test_modal.py +168 -0
  90. open_atp-0.1.0/tests/conftest.py +99 -0
  91. open_atp-0.1.0/tests/fixtures/agent_streams/claude_code.jsonl +6 -0
  92. open_atp-0.1.0/tests/fixtures/mil_trivial/MILExample.lean +7 -0
  93. open_atp-0.1.0/tests/fixtures/mil_trivial/lake-manifest.json +95 -0
  94. open_atp-0.1.0/tests/fixtures/mil_trivial/lakefile.toml +7 -0
  95. open_atp-0.1.0/tests/fixtures/mil_trivial/lean-toolchain +1 -0
  96. open_atp-0.1.0/tests/fixtures/skills/probe-skill/SKILL.md +12 -0
  97. open_atp-0.1.0/tests/harness/test_axprover.py +308 -0
  98. open_atp-0.1.0/tests/harness/test_capabilities.py +656 -0
  99. open_atp-0.1.0/tests/harness/test_vibe.py +250 -0
  100. open_atp-0.1.0/tests/provers/test_agent_prover.py +334 -0
  101. open_atp-0.1.0/tests/provers/test_aristotle.py +224 -0
  102. open_atp-0.1.0/tests/provers/test_numina.py +414 -0
  103. open_atp-0.1.0/tests/test_api.py +307 -0
  104. open_atp-0.1.0/tests/test_config.py +132 -0
  105. open_atp-0.1.0/tests/test_e2e_provers.py +165 -0
  106. open_atp-0.1.0/tests/verify/test_verify.py +96 -0
  107. open_atp-0.1.0/uv.lock +3022 -0
  108. open_atp-0.1.0/vendor/lean4-skills/.claude-plugin/marketplace.json +18 -0
  109. open_atp-0.1.0/vendor/lean4-skills/LICENSE +21 -0
  110. open_atp-0.1.0/vendor/lean4-skills/UPSTREAM_README.md +163 -0
  111. open_atp-0.1.0/vendor/lean4-skills/VENDOR.md +40 -0
  112. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/.claude-plugin/plugin.json +6 -0
  113. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/MIGRATION.md +237 -0
  114. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/README.md +307 -0
  115. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/axiom-eliminator.md +123 -0
  116. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/proof-golfer.md +157 -0
  117. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/proof-repair.md +117 -0
  118. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/sorry-filler-deep.md +126 -0
  119. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/autoformalize.md +156 -0
  120. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/autoprove.md +282 -0
  121. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/checkpoint.md +88 -0
  122. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/doctor.md +222 -0
  123. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/draft.md +145 -0
  124. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/formalize.md +187 -0
  125. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/golf.md +167 -0
  126. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/learn.md +205 -0
  127. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/prove.md +227 -0
  128. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/refactor.md +96 -0
  129. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/review.md +322 -0
  130. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/bootstrap.sh +29 -0
  131. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/guardrails.sh +801 -0
  132. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/hooks.json +16 -0
  133. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/validate_user_prompt.py +146 -0
  134. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/__init__.py +49 -0
  135. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/coercions.py +710 -0
  136. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/formatter.py +69 -0
  137. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/parser.py +264 -0
  138. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/__init__.py +22 -0
  139. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/_common.py +357 -0
  140. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/autoformalize.py +383 -0
  141. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/autoprove.py +607 -0
  142. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/draft.py +101 -0
  143. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/formalize.py +297 -0
  144. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/learn.py +256 -0
  145. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/prove.py +197 -0
  146. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/tokenizer.py +33 -0
  147. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/types.py +146 -0
  148. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/README.md +215 -0
  149. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/TESTING.md +63 -0
  150. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/analyze_let_usage.py +415 -0
  151. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/check_axioms_inline.sh +423 -0
  152. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/cycle_tracker.sh +902 -0
  153. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_exact_candidates.py +337 -0
  154. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_golfable.py +788 -0
  155. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_instances.sh +119 -0
  156. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_usages.sh +233 -0
  157. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/minimize_imports.py +275 -0
  158. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/parse_command_args.py +110 -0
  159. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/parse_lean_errors.py +233 -0
  160. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/search_mathlib.sh +153 -0
  161. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/smart_search.sh +229 -0
  162. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/solver_cascade.py +155 -0
  163. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/sorry_analyzer.py +540 -0
  164. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/test_apply_exact_chains.py +125 -0
  165. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/tests/test_ordering.py +178 -0
  166. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/try_exact_at_step.py +419 -0
  167. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/unused_declarations.sh +240 -0
  168. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/SKILL.md +318 -0
  169. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/agent-workflows.md +355 -0
  170. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/axiom-elimination.md +289 -0
  171. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/calc-patterns.md +246 -0
  172. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/command-examples.md +1295 -0
  173. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/command-invocation.md +113 -0
  174. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compilation-errors.md +747 -0
  175. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compiler-guided-repair.md +720 -0
  176. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compiler-internals.md +182 -0
  177. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/cycle-engine.md +528 -0
  178. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/domain-patterns.md +751 -0
  179. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/ffi-interop.md +206 -0
  180. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/grind-tactic.md +384 -0
  181. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/instance-pollution.md +435 -0
  182. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/json-patterns.md +145 -0
  183. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-lsp-server.md +335 -0
  184. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-lsp-tools-api.md +988 -0
  185. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-phrasebook.md +847 -0
  186. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean4-custom-syntax.md +452 -0
  187. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/learn-pathways.md +329 -0
  188. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/linter-authoring.md +138 -0
  189. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/mathlib-guide.md +496 -0
  190. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/mathlib-style.md +378 -0
  191. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/measure-theory.md +811 -0
  192. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/metaprogramming-patterns.md +158 -0
  193. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/performance-optimization.md +549 -0
  194. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/profiling-workflows.md +56 -0
  195. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-golfing-patterns.md +527 -0
  196. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-golfing.md +348 -0
  197. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-refactoring.md +867 -0
  198. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-simplification.md +236 -0
  199. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-templates.md +169 -0
  200. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/review-hook-schema.md +252 -0
  201. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/scaffold-dsl.md +60 -0
  202. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/simp-reference.md +233 -0
  203. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/sorry-filling.md +244 -0
  204. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/subagent-workflows.md +633 -0
  205. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/tactic-patterns.md +126 -0
  206. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/tactics-reference.md +687 -0
  207. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/verso-docs.md +68 -0
  208. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/__init__.py +1 -0
  209. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/_doc_sync_allowlist.py +20 -0
  210. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/_doc_sync_forward_exclusions.py +34 -0
  211. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_formatter.py +203 -0
  212. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_hook_block_roundtrip.py +466 -0
  213. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parse_command_args_cli.py +137 -0
  214. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_autoformalize.py +142 -0
  215. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_autoprove.py +178 -0
  216. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_common.py +135 -0
  217. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_draft.py +146 -0
  218. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_formalize.py +117 -0
  219. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_learn.py +121 -0
  220. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_prove.py +133 -0
  221. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_bash3_smoke.sh +194 -0
  222. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_cycle_tracker.sh +1373 -0
  223. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_guardrails.sh +469 -0
  224. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_lint_runtime_portability.sh +352 -0
  225. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_validate_user_prompt.sh +375 -0
  226. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/lint_docs.sh +1683 -0
  227. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/lint_runtime_portability.sh +287 -0
  228. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/smoke_snippets.sh +80 -0
  229. open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/test_contracts.sh +497 -0
  230. open_atp-0.1.0/vendor/leanprover-skills/LICENSE +201 -0
  231. open_atp-0.1.0/vendor/leanprover-skills/UPSTREAM_README.md +116 -0
  232. open_atp-0.1.0/vendor/leanprover-skills/VENDOR.md +38 -0
  233. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-bisect/SKILL.md +77 -0
  234. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-mwe/SKILL.md +107 -0
  235. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-pr/SKILL.md +86 -0
  236. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-proof/SKILL.md +93 -0
  237. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-proof/tests/example.yaml +7 -0
  238. open_atp-0.1.0/vendor/leanprover-skills/skills/lean-setup/SKILL.md +66 -0
  239. open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-build/SKILL.md +24 -0
  240. open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-pr/SKILL.md +60 -0
  241. open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-review/SKILL.md +28 -0
  242. open_atp-0.1.0/vendor/leanprover-skills/skills/nightly-testing/SKILL.md +28 -0
  243. open_atp-0.1.0/vendor/numina/VENDOR.md +91 -0
  244. open_atp-0.1.0/vendor/numina/prompts/main_entry.md +28 -0
  245. open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/common.md +595 -0
  246. open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/coordinator.md +485 -0
  247. open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/golfer.md +393 -0
  248. open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/informal_agent.md +296 -0
  249. open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/proof_agent.md +381 -0
  250. open_atp-0.1.0/vendor/numina/skills/.claude-plugin/plugin.json +5 -0
  251. open_atp-0.1.0/vendor/numina/skills/SKILL.md +21 -0
  252. open_atp-0.1.0/vendor/numina/skills/cli/axle.py +76 -0
  253. open_atp-0.1.0/vendor/numina/skills/cli/code_golf.py +76 -0
  254. open_atp-0.1.0/vendor/numina/skills/cli/discussion_partner.py +141 -0
  255. open_atp-0.1.0/vendor/numina/skills/cli/hammer_premise.py +56 -0
  256. open_atp-0.1.0/vendor/numina/skills/cli/informal_prover.py +429 -0
  257. open_atp-0.1.0/vendor/numina/skills/cli/lean_check.py +137 -0
  258. open_atp-0.1.0/vendor/numina/skills/cli/leanexplore.py +97 -0
  259. open_atp-0.1.0/vendor/numina/skills/cli/leanfinder.py +64 -0
  260. open_atp-0.1.0/vendor/numina/skills/cli/leansearch.py +73 -0
  261. open_atp-0.1.0/vendor/numina/skills/cli/loogle.py +58 -0
  262. open_atp-0.1.0/vendor/numina/skills/cli/requirements.txt +4 -0
  263. open_atp-0.1.0/vendor/numina/skills/cli/state_search.py +50 -0
  264. open_atp-0.1.0/vendor/numina/skills/code-transform/SKILL.md +19 -0
  265. open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-extract-theorems.md +41 -0
  266. open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-repair-proofs.md +42 -0
  267. open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-simplify-theorems.md +40 -0
  268. open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-sorry2lemma.md +37 -0
  269. open_atp-0.1.0/vendor/numina/skills/llm/SKILL.md +18 -0
  270. open_atp-0.1.0/vendor/numina/skills/llm/reference-code-golf.md +29 -0
  271. open_atp-0.1.0/vendor/numina/skills/llm/reference-discussion-partner.md +43 -0
  272. open_atp-0.1.0/vendor/numina/skills/llm/reference-informal-prover.md +56 -0
  273. open_atp-0.1.0/vendor/numina/skills/search/SKILL.md +21 -0
  274. open_atp-0.1.0/vendor/numina/skills/search/reference-hammer-premise.md +27 -0
  275. open_atp-0.1.0/vendor/numina/skills/search/reference-leanexplore.md +34 -0
  276. open_atp-0.1.0/vendor/numina/skills/search/reference-leanfinder.md +28 -0
  277. open_atp-0.1.0/vendor/numina/skills/search/reference-leansearch.md +27 -0
  278. open_atp-0.1.0/vendor/numina/skills/search/reference-loogle.md +40 -0
  279. open_atp-0.1.0/vendor/numina/skills/search/reference-state-search.md +26 -0
  280. open_atp-0.1.0/vendor/numina/skills/sorrifier/SKILL.md +82 -0
  281. open_atp-0.1.0/vendor/numina/skills/verification/SKILL.md +18 -0
  282. open_atp-0.1.0/vendor/numina/skills/verification/reference-axle-disprove.md +38 -0
  283. open_atp-0.1.0/vendor/numina/skills/verification/reference-axle-verify-proof.md +40 -0
  284. open_atp-0.1.0/vendor/numina/skills/verification/reference-lean-check.md +35 -0
@@ -0,0 +1,26 @@
1
+ # Copy to `.env` and fill in. `.env` is gitignored and never committed.
2
+
3
+ # Required only for the live Aristotle end-to-end test (`pytest -m aristotle_api`).
4
+ ARISTOTLE_API_KEY=arstl_your_key_here
5
+
6
+ # Required only for the live AgentProver test (`pytest -m agent_api`) with the
7
+ # default claude_code harness. Generate with `claude setup-token`.
8
+ CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat-your_token_here
9
+
10
+ # Required only for the live NuminaProver test (`pytest -m numina_api`): the OAuth
11
+ # token above plus at least GEMINI_API_KEY. Numina's helper skills call out to
12
+ # Gemini / OpenAI / Leandex from inside the sandbox; each key is optional and the
13
+ # corresponding skill simply degrades or skips when its key is absent.
14
+ GEMINI_API_KEY=your_gemini_key_here
15
+ OPENAI_API_KEY=sk-your_openai_key_here
16
+ LEAN_LEANDEX_API_KEY=your_leandex_key_here
17
+
18
+ # Required for the `agent:axprover` harness (ax-prover-base). It uses a raw provider
19
+ # key matching the configured `model`: the default model is claude-opus-4-8, so
20
+ # ANTHROPIC_API_KEY is needed unless you override `model` to a gpt-*/gemini-* id (then
21
+ # OPENAI_API_KEY / GOOGLE_API_KEY instead). The harness forwards whichever is present.
22
+ ANTHROPIC_API_KEY=sk-ant-your_key_here
23
+ GOOGLE_API_KEY=your_google_genai_key_here
24
+ # Optional: ax-prover's web-search tool degrades to a no-op without this, and the
25
+ # harness does not forward it by default (see AxProverHarness.auth_spec).
26
+ # TAVILY_API_KEY=tvly-your_key_here
@@ -0,0 +1,115 @@
1
+ name: CI - Python
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - "src/**"
8
+ - "tests/**"
9
+ - "pyproject.toml"
10
+ - "uv.lock"
11
+ - ".github/workflows/ci-python.yml"
12
+ - "codecov.yml"
13
+ pull_request:
14
+ paths:
15
+ - "src/**"
16
+ - "tests/**"
17
+ - "pyproject.toml"
18
+ - "uv.lock"
19
+ - ".github/workflows/ci-python.yml"
20
+ - "codecov.yml"
21
+
22
+ jobs:
23
+ lint:
24
+ name: Lint
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+
29
+ - uses: astral-sh/setup-uv@v5
30
+ with:
31
+ enable-cache: true
32
+ cache-dependency-glob: uv.lock
33
+
34
+ - name: Install dependencies
35
+ run: uv sync --all-extras
36
+
37
+ - name: Lint (ruff check)
38
+ run: uv run ruff check src tests
39
+
40
+ format:
41
+ name: Format
42
+ runs-on: ubuntu-latest
43
+ steps:
44
+ - uses: actions/checkout@v4
45
+
46
+ - uses: astral-sh/setup-uv@v5
47
+ with:
48
+ enable-cache: true
49
+ cache-dependency-glob: uv.lock
50
+
51
+ - name: Install dependencies
52
+ run: uv sync --all-extras
53
+
54
+ - name: Format check (ruff format)
55
+ run: uv run ruff format --check src tests
56
+
57
+ typecheck:
58
+ name: Type Check
59
+ runs-on: ubuntu-latest
60
+ steps:
61
+ - uses: actions/checkout@v4
62
+
63
+ - uses: astral-sh/setup-uv@v5
64
+ with:
65
+ enable-cache: true
66
+ cache-dependency-glob: uv.lock
67
+
68
+ - name: Install dependencies
69
+ run: uv sync --all-extras
70
+
71
+ - name: Type check (mypy)
72
+ run: uv run mypy
73
+
74
+ test:
75
+ name: Test
76
+ runs-on: ubuntu-latest
77
+ steps:
78
+ - uses: actions/checkout@v4
79
+
80
+ - uses: astral-sh/setup-uv@v5
81
+ with:
82
+ enable-cache: true
83
+ cache-dependency-glob: uv.lock
84
+
85
+ - name: Install dependencies
86
+ run: uv sync --all-extras
87
+
88
+ - name: Run unit tests (no docker, no modal)
89
+ run: uv run pytest -m 'not docker and not modal'
90
+
91
+ coverage:
92
+ name: Coverage
93
+ runs-on: ubuntu-latest
94
+ steps:
95
+ - uses: actions/checkout@v4
96
+
97
+ - uses: astral-sh/setup-uv@v5
98
+ with:
99
+ enable-cache: true
100
+ cache-dependency-glob: uv.lock
101
+
102
+ - name: Install dependencies
103
+ run: uv sync --all-extras
104
+
105
+ - name: Coverage — src
106
+ run: uv run pytest -m 'not docker and not modal' --cov=open_atp --cov-report=term-missing --cov-report=xml
107
+
108
+ - name: Upload coverage to Codecov
109
+ uses: codecov/codecov-action@v4
110
+ with:
111
+ token: ${{ secrets.CODECOV_TOKEN }}
112
+ files: coverage.xml
113
+ flags: src
114
+ name: src
115
+ fail_ci_if_error: false
@@ -0,0 +1,69 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+ inputs:
9
+ tag:
10
+ description: "Release tag (e.g. v0.1.0)"
11
+ required: true
12
+
13
+ permissions:
14
+ contents: write
15
+ id-token: write # OIDC for PyPI Trusted Publishing
16
+
17
+ jobs:
18
+ release:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+ with:
23
+ fetch-depth: 0
24
+ fetch-tags: true
25
+
26
+ - name: Resolve tag
27
+ id: tag
28
+ run: |
29
+ if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
30
+ echo "tag=${{ inputs.tag }}" >> "$GITHUB_OUTPUT"
31
+ else
32
+ echo "tag=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
33
+ fi
34
+
35
+ - name: Refetch annotated tag and checkout
36
+ run: |
37
+ git fetch --force --tags origin "refs/tags/${{ steps.tag.outputs.tag }}:refs/tags/${{ steps.tag.outputs.tag }}"
38
+ git checkout "${{ steps.tag.outputs.tag }}"
39
+
40
+ - uses: astral-sh/setup-uv@v5
41
+
42
+ - name: Build distributions
43
+ run: uv build
44
+
45
+ - name: Publish to PyPI
46
+ run: uv publish dist/open_atp-* # Trusted Publishing via OIDC, no token needed
47
+
48
+ - name: Create release
49
+ env:
50
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
51
+ run: |
52
+ TAG="${{ steps.tag.outputs.tag }}"
53
+ SHORT=$(git rev-parse --short "$TAG^{commit}")
54
+ MSG=$(git tag -l --format='%(contents:body)' "$TAG")
55
+ {
56
+ echo "$MSG"
57
+ echo
58
+ echo "open-atp release at $SHORT."
59
+ } > dist-notes.md
60
+ VERSION="${TAG#v}"
61
+ PRERELEASE=()
62
+ if [[ "${VERSION%%.*}" == "0" ]]; then
63
+ PRERELEASE=(--prerelease)
64
+ fi
65
+ gh release create "$TAG" \
66
+ dist/open_atp-* \
67
+ --title "$TAG" \
68
+ --notes-file dist-notes.md \
69
+ "${PRERELEASE[@]}"
@@ -0,0 +1,32 @@
1
+ # Reference symlinks — for development context only, never committed.
2
+ refs/
3
+
4
+ # Integration test artifacts (agent logs, workdirs from test_agent_capabilities)
5
+ tests/.runs/
6
+
7
+ # Sphinx docs build
8
+ docs/_build/
9
+
10
+ # Python
11
+ __pycache__/
12
+ *.py[cod]
13
+ .venv/
14
+ venv/
15
+ *.egg-info/
16
+ build/
17
+ dist/
18
+ .mypy_cache/
19
+ .pytest_cache/
20
+ .ruff_cache/
21
+
22
+ # Lean build artifacts (e.g. created by an editor opening a fixture project)
23
+ .lake/
24
+
25
+ # Env / secrets
26
+ .env
27
+
28
+ # OS
29
+ .DS_Store
30
+
31
+ # Claude Code transient state
32
+ .claude/scheduled_tasks.lock
@@ -0,0 +1,20 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-24.04
5
+ tools:
6
+ python: "3.12"
7
+
8
+ sphinx:
9
+ configuration: docs/conf.py
10
+ fail_on_warning: true
11
+
12
+ formats:
13
+ - htmlzip
14
+
15
+ python:
16
+ install:
17
+ - method: pip
18
+ path: .
19
+ extra_requirements:
20
+ - docs
@@ -0,0 +1,260 @@
1
+ # AGENTS.md
2
+
3
+ Developer guide for **open-atp** (Open Automated Formal Proof Synthesis). Read this
4
+ before making changes. The user-facing overview lives in [README.md](README.md);
5
+ this file is the engineering reference.
6
+
7
+ ## What this project does
8
+
9
+ Upload one or more Lean files containing `sorry`, run them through proof-synthesis
10
+ backends, and get back **verified** completed proofs with metadata (verification
11
+ status, cost, duration). Every prover — including the hosted Aristotle — funnels its
12
+ output through one **shared verifier** that compiles the candidate in a Lean+Mathlib
13
+ sandbox and checks it compiles, is sorry-free, and is axiom-clean.
14
+
15
+ ### Two primitives + thin generators
16
+
17
+ 1. **`ComputeBackend`** (`backends/`) — run a command over a working directory inside a
18
+ Lean+Mathlib sandbox. Two impls: `DockerBackend`, `ModalBackend`.
19
+ 2. **`Verifier`** (`verify.py`) — compile a candidate project in a backend and
20
+ report `verified` / `sorry_free` / `axioms`.
21
+
22
+ ```
23
+ ComputeBackend (docker | modal) ← the sandbox primitive
24
+
25
+ ├── Verifier ──────────────────← shared final check (ALL provers)
26
+
27
+ AutomatedProver (provers/base.py, base)
28
+ ├── AgentProver coding-agent harness (claude/codex/opencode/axprover/vibe) + lean-lsp-mcp
29
+ ├── NuminaProver configured AgentProver: claude + vendored Numina assets + round loop
30
+ └── AristotleProver remote `aristotle submit --project-dir --wait` (no local generation sandbox)
31
+ ```
32
+
33
+ ### Input contract
34
+
35
+ Submit a **full lake project** (carries `lean-toolchain` + `lake-manifest.json`). The
36
+ verifier **rejects** projects whose toolchain doesn't match the sandbox image's pin
37
+ (`ToolchainMismatch`) instead of failing deep in a build. The CLI can also take bare
38
+ `.lean` files and stage them into the pinned skeleton. One Mathlib image to start
39
+ (pinned Lean/Mathlib **v4.28.0**); `image` is a config field so more can be added.
40
+
41
+ ## Project structure (high-level)
42
+
43
+ ```
44
+ src/open_atp/
45
+ api.py Platform + prover registry — the dispatch/orchestration layer
46
+ __main__.py `open-atp solve | build-image | build-modal-image` CLI
47
+ images/ image name + toolchain pins (DEFAULT_IMAGE, DEFAULT_TOOLCHAIN)
48
+ lean.py LeanProject, ProofTask, create_project (the Lean input contract)
49
+ verify.py VerificationReport, Verifier (the shared final check)
50
+ backends/ base.py docker.py modal.py (ComputeBackend impls)
51
+ provers/ agent_prover.py numina.py numina_tracker.py aristotle.py
52
+ harness/ coding-agent CLIs staged into the sandbox:
53
+ base.py claude_code.py codex.py opencode.py
54
+ axprover.py vibe.py bundles.py cost.py _paths.py
55
+ assets/ scripts/*.sh configs/mcp.json vibe/lean-standin.toml
56
+
57
+ images/ Dockerfile (Mathlib base image) + lean/ skeleton (toolchain, lakefile)
58
+ vendor/ vendored third-party assets, tracked to upstream SHAs (see VENDOR.md in each)
59
+ numina/ Numina skills + prompts (round-loop prover)
60
+ leanprover-skills/ host-agnostic Lean skills
61
+ lean4-skills/ Claude `lean4` plugin
62
+ tests/ pytest suite (+ tests/.runs/ integration artifacts, gitignored)
63
+ docs/ Sphinx docs (user_guide/, provers/, agent_harness/, api/)
64
+ refs/ read-only symlinks to reference projects (NEVER modify or commit)
65
+ ```
66
+
67
+ The README's `Layout` section predates the `harness/` split — trust the tree above.
68
+
69
+ ### Vendored code
70
+
71
+ `vendor/*` is upstream third-party code pinned to a SHA (each has a `VENDOR.md`).
72
+ Ruff is configured with `extend-exclude = ["vendor"]` — **do not reformat or lint
73
+ vendored code**, and keep its upstream style. It ships in the wheel via
74
+ `force-include` and is resolved at runtime by `harness/_paths.py` (wheel:
75
+ `open_atp/vendor/<name>`; checkout: repo-root `vendor/<name>`).
76
+
77
+ ## Provers
78
+
79
+ Names accepted by `--provers` and the `Platform` registry (`api.py`):
80
+
81
+ | Name | Backing tool | Notes |
82
+ | --- | --- | --- |
83
+ | `aristotle` | Harmonic Aristotle (hosted) | remote API via `aristotlelib`, no local gen sandbox |
84
+ | `agent` | Claude Code (`claude_code` harness) | default; coding agent + lean-lsp-mcp |
85
+ | `codex` | OpenAI Codex CLI | model `gpt-5.5` |
86
+ | `opencode` | opencode | |
87
+ | `axprover` | ax-prover (LangGraph) | proposer→builder→reviewer loop; default model `claude-opus-4-8`, effort `high` |
88
+ | `numina` | Numina skills/prompts on Claude Code | round-continuation loop |
89
+ | `vibe` | Mistral Vibe `lean` scaffold | hosted model (default `magistral-medium-latest`), no GPU; `--model` configurable |
90
+
91
+ Agentic harnesses share **lean-lsp-mcp** as their LSP server. The shared `Verifier`
92
+ does the final compile/sorry/axiom check regardless of which tool generated the proof.
93
+
94
+ ## Tooling
95
+
96
+ - **Python ≥ 3.12**, packaged with **hatchling**, deps managed by **uv** (`uv.lock`).
97
+ - **ruff** — lint (`E,F,I,UP`) + format, line length 88, excludes `vendor`.
98
+ - **mypy** — `strict`, `files = ["src/open_atp"]`.
99
+ - **pytest** — `pytest-cov`, `pytest-xdist` (default `-n 5`).
100
+ - **lefthook** — pre-commit runs ruff check, ruff format --check, and mypy on staged
101
+ `*.py` (with `--force-exclude` so vendored code is skipped). Install with
102
+ `uv run lefthook install`.
103
+ - **Sphinx** (furo + myst) for docs; Read the Docs config in `.readthedocs.yaml`.
104
+ - CLI entry point: `open-atp` → `open_atp.__main__:main`.
105
+
106
+ ## Makefile commands
107
+
108
+ ```
109
+ make install uv sync
110
+ make test pytest, skipping docker/modal/live-API tests (default markers)
111
+ make test-docker -m docker (requires the built image)
112
+ make test-modal -m modal (requires a Modal token)
113
+ make test-aristotle -m aristotle_api (live, needs ARISTOTLE_API_KEY)
114
+ make test-agent -m agent_api (live + billable, needs creds)
115
+ make cov pytest with coverage → htmlcov/, coverage.xml
116
+ make cov-open build + open the HTML coverage report
117
+ make cov-clean remove coverage artifacts
118
+ make lint ruff check src tests
119
+ make format ruff format + ruff check --fix on src tests
120
+ make typecheck mypy
121
+ make check lint + typecheck + test
122
+ make build-image docker build -t open-atp:latest images/
123
+ make docs sphinx-build -W -b html docs docs/_build/html
124
+ make docs-serve live-reload docs
125
+ make docs-clean remove built docs
126
+ make clean remove build + cache artifacts
127
+ ```
128
+
129
+ Run `make check` before pushing.
130
+
131
+ ## Testing
132
+
133
+ Default `addopts`: `-m 'not aristotle_api and not agent_api and not numina_api' -n 5`.
134
+ The live/billable credentialed suites are **opt-out by default** and run only when you
135
+ select their marker. Markers (`pyproject.toml`):
136
+
137
+ - `docker` — needs the `open-atp` Docker image (opt-out: `-m 'not docker'`)
138
+ - `modal` — launches a Modal sandbox (opt-out: `-m 'not modal'`)
139
+ - `aristotle_api` — live Aristotle API (opt-in: `-m aristotle_api`)
140
+ - `agent_api` — live agent CLI, billable + creds (opt-in: `-m agent_api`)
141
+ - `numina_api` — live NuminaProver, billable + creds (opt-in: `-m numina_api`)
142
+
143
+ > Project convention: when running tests (even by explicit path), exclude the
144
+ > docker / modal / `*_api` markers by default — they are slow, billable, or need
145
+ > external compute. `make test` already does this. Use `-n 0` to run serially when
146
+ > debugging.
147
+
148
+ Integration artifacts (agent logs, workdirs) land in `tests/.runs/` (gitignored).
149
+
150
+ ## Compute setup: Docker vs. Modal
151
+
152
+ Both backends run the shared `Verifier` **and** the agentic provers end-to-end against
153
+ the Mathlib image. Pick a backend with `--backend`, or split generation from the cheap
154
+ verify with `--agent-backend`.
155
+
156
+ - **Docker** (`DockerBackend`) — bind-mounts the workdir; uses `images/Dockerfile`,
157
+ runs as the `agent` user. Local; build the image first:
158
+ ```bash
159
+ docker build -t open-atp:latest images/ # or: make build-image / open-atp build-image
160
+ uv run pytest -m docker
161
+ ```
162
+ - **Modal** (`ModalBackend`) — pushes/pulls the workdir around an isolated Sandbox
163
+ filesystem; runs as **root**, so its image is built programmatically with the same
164
+ toolchain installed globally. Publish the image, then run the parity suite:
165
+ ```bash
166
+ uv run open-atp build-modal-image --name open-atp --app open-atp
167
+ uv run pytest -m modal # needs MODAL_TOKEN_ID / MODAL_TOKEN_SECRET
168
+ ```
169
+ `ModalBackend`'s `image` (sans `:tag`) must match the `--name` you publish under.
170
+
171
+ Example splits:
172
+ ```bash
173
+ uv run open-atp solve path/to/project --provers agent --backend modal
174
+ uv run open-atp solve path/to/project --provers agent \
175
+ --agent-backend modal --backend docker # Modal generates, Docker does cheap verify
176
+ ```
177
+
178
+ ## CLI quick reference
179
+
180
+ ```
181
+ open-atp solve <inputs...> [options] # lake project dir, or bare .lean files
182
+ --provers comma-separated names (default: agent)
183
+ --instructions TEXT # guidance forwarded to provers
184
+ --targets comma-separated files relative to project
185
+ --image / --toolchain # default to images/ pins
186
+ --backend {docker,modal} default docker
187
+ --agent-backend {docker,modal} separate generation backend (defaults to --backend)
188
+ --runs-dir DIR default runs/
189
+ --max-workers N
190
+ --json emit SolveResult as JSON
191
+
192
+ open-atp build-image [--tag TAG] [--no-cache]
193
+ open-atp build-modal-image [--name N] [--app A] [--force]
194
+ ```
195
+
196
+ Programmatic verify:
197
+ ```python
198
+ from open_atp.lean import LeanProject
199
+ from open_atp.verify import docker_verifier
200
+ report = docker_verifier().verify(LeanProject("path/to/lake/project"))
201
+ print(report.verified, report.sorry_free, report.axioms)
202
+ ```
203
+
204
+ ## Environment / secrets
205
+
206
+ Copy `.env.example` → `.env` (gitignored, never committed). All keys are needed only
207
+ for the corresponding **live** test or harness; absent keys make the dependent
208
+ skill/test degrade or skip:
209
+
210
+ - `ARISTOTLE_API_KEY` — `pytest -m aristotle_api`
211
+ - `CLAUDE_CODE_OAUTH_TOKEN` — `agent_api` test with default claude_code harness
212
+ (`claude setup-token`)
213
+ - `GEMINI_API_KEY` / `OPENAI_API_KEY` / `LEAN_LEANDEX_API_KEY` — Numina helper skills
214
+ - `ANTHROPIC_API_KEY` / `GOOGLE_API_KEY` — `axprover` (raw provider key matching
215
+ the configured `model`); `TAVILY_API_KEY` optional (ax-prover web search)
216
+ - `MODAL_TOKEN_ID` / `MODAL_TOKEN_SECRET` — Modal backend
217
+
218
+ ## Docs: API reference convention
219
+
220
+ The API pages (`docs/api/*.md`) are Sphinx `autoclass` directives; **numpydoc** renders
221
+ the class docstring's `Parameters`/`Attributes` sections, and a single
222
+ `autodoc-skip-member` hook in `docs/conf.py` (`_skip_non_methods`) drops every class
223
+ member that isn't a method. So the split is:
224
+
225
+ - **Constructor params and attributes** (instance state + `@property`) live **only in
226
+ the docstring**, in `Parameters`/`Attributes` sections. The hook hides them as
227
+ members, so they render once, from the prose. Never re-list them with `:members:`.
228
+ - **List each name once — `Parameters` *or* `Attributes`, never both** (the
229
+ numpy/scipy/sklearn convention). A constructor arg stored verbatim as an attribute is
230
+ documented only under `Parameters`; readers know `self.<arg>` exists without it being
231
+ repeated. `Attributes` is reserved for state **not** in the signature: `@property`
232
+ (e.g. `Harness.command`, `Verifier.image`) and derived/computed fields. If a
233
+ `@property` shares a name with a param (e.g. `OpenCodeHarness.provider`), document the
234
+ resolution in the param and leave it out of `Attributes`.
235
+ - **Methods** are the only members `autoclass` enumerates. Document each method **once,
236
+ on the class that defines it.**
237
+ - **Inheritance**: numpydoc does *not* walk the MRO, so each leaf class must
238
+ **re-document every constructor param it accepts, including inherited ones** (e.g.
239
+ `backend`/`timeout_s` from `AutomatedProver`) — otherwise they don't render. Pages do
240
+ **not** use `:inherited-members:`: an inherited method (e.g. `prove`) appears only on
241
+ its base class, not on each child.
242
+
243
+ Practical rules:
244
+
245
+ - **Do not** add `:exclude-members:` for attributes, params, or `name` — the hook
246
+ already handles them. The only legitimate `:exclude-members:` is to hide an
247
+ **overridden method** from a child page so it stays documented on the base only
248
+ (current uses: `start` on the backend impls, `stage` on the harness impls).
249
+ - A new attribute/`@property` only shows up if you add it to the docstring `Attributes`
250
+ section.
251
+ - `make docs` builds with `-W` (warnings are errors) — a broken xref or duplicate
252
+ fails the build.
253
+
254
+ ## Conventions
255
+
256
+ - Commit directly to `main` unless told otherwise; warn before committing work that
257
+ clearly belongs on another branch.
258
+ - Never modify or commit anything under `refs/` (read-only reference symlinks) or
259
+ reformat anything under `vendor/` (upstream-tracked).
260
+ - Keep `mypy --strict` and ruff clean; run `make check` before pushing.
@@ -0,0 +1 @@
1
+ @AGENTS.md
open_atp-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Henry Robbins
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,87 @@
1
+ # Common dev commands for the open-atp package.
2
+
3
+ .PHONY: help install test test-docker test-modal test-aristotle test-agent cov cov-open cov-clean lint format typecheck check build build-image docs docs-serve docs-clean clean
4
+
5
+ help:
6
+ @echo "Targets:"
7
+ @echo " install Sync deps with uv"
8
+ @echo " test Run pytest, skipping docker/modal/live-API tests"
9
+ @echo " test-docker Run docker-marked tests (requires the built image)"
10
+ @echo " test-modal Run modal-marked tests (requires a Modal token)"
11
+ @echo " test-aristotle Run the live Aristotle API test (needs ARISTOTLE_API_KEY)"
12
+ @echo " test-agent Run the live agent CLI test (billable + needs creds)"
13
+ @echo " cov Run pytest with coverage; HTML to htmlcov/, XML to coverage.xml"
14
+ @echo " cov-open Open the HTML coverage report"
15
+ @echo " cov-clean Remove coverage artifacts"
16
+ @echo " lint Run ruff check"
17
+ @echo " format Run ruff format + ruff check --fix"
18
+ @echo " typecheck Run mypy"
19
+ @echo " check Run lint + typecheck + test"
20
+ @echo " build Build the sdist + wheel into dist/"
21
+ @echo " build-image Build the open-atp:latest Docker image"
22
+ @echo " docs Build the Sphinx docs once"
23
+ @echo " docs-serve Live-reload docs in browser"
24
+ @echo " docs-clean Remove built docs"
25
+ @echo " clean Remove build + cache artifacts"
26
+
27
+ install:
28
+ uv sync
29
+
30
+ test:
31
+ uv run pytest
32
+
33
+ test-docker:
34
+ uv run pytest -m 'docker'
35
+
36
+ test-modal:
37
+ uv run pytest -m 'modal'
38
+
39
+ test-aristotle:
40
+ uv run pytest -m 'aristotle_api'
41
+
42
+ test-agent:
43
+ uv run pytest -m 'agent_api'
44
+
45
+ cov:
46
+ uv run pytest \
47
+ --cov=open_atp \
48
+ --cov-report=term-missing \
49
+ --cov-report=html \
50
+ --cov-report=xml
51
+
52
+ cov-open: cov
53
+ @python -c "import os, webbrowser; webbrowser.open('file://' + os.path.abspath('htmlcov/index.html'))"
54
+
55
+ cov-clean:
56
+ rm -rf htmlcov coverage.xml .coverage
57
+
58
+ lint:
59
+ uv run ruff check src tests
60
+
61
+ format:
62
+ uv run ruff format src tests
63
+ uv run ruff check --fix src tests
64
+
65
+ typecheck:
66
+ uv run mypy
67
+
68
+ check: lint typecheck test
69
+
70
+ build:
71
+ uv build
72
+
73
+ build-image:
74
+ docker build -t open-atp:latest images/
75
+
76
+ docs:
77
+ uv run --extra docs sphinx-build -W -b html docs docs/_build/html
78
+
79
+ docs-serve:
80
+ uv run --extra docs sphinx-autobuild --watch src docs docs/_build/html
81
+
82
+ docs-clean:
83
+ rm -rf docs/_build
84
+
85
+ clean: docs-clean cov-clean
86
+ rm -rf build dist .pytest_cache .mypy_cache .ruff_cache
87
+ find . -type d -name __pycache__ -exec rm -rf {} +
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: open-atp
3
+ Version: 0.1.0
4
+ Summary: Open Automated Formal Proof Synthesis: upload Lean files with sorrys, get verified completed proofs across multiple proving backends.
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: anthropic>=0.75.0
8
+ Requires-Dist: aristotlelib
9
+ Requires-Dist: google-genai>=1.44.0
10
+ Requires-Dist: jinja2>=3.1
11
+ Requires-Dist: lean-explore>=1.2.1
12
+ Requires-Dist: modal>=1.5
13
+ Requires-Dist: openai>=2.7.1
14
+ Provides-Extra: dev
15
+ Requires-Dist: mypy; extra == 'dev'
16
+ Requires-Dist: pytest; extra == 'dev'
17
+ Requires-Dist: pytest-cov; extra == 'dev'
18
+ Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
19
+ Requires-Dist: python-dotenv; extra == 'dev'
20
+ Requires-Dist: ruff; extra == 'dev'
21
+ Provides-Extra: docs
22
+ Requires-Dist: furo; extra == 'docs'
23
+ Requires-Dist: myst-parser>=3; extra == 'docs'
24
+ Requires-Dist: numpydoc; extra == 'docs'
25
+ Requires-Dist: sphinx-autobuild; extra == 'docs'
26
+ Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
27
+ Requires-Dist: sphinx-design; extra == 'docs'
28
+ Requires-Dist: sphinx>=7; extra == 'docs'