open-atp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_atp-0.1.0/.env.example +26 -0
- open_atp-0.1.0/.github/workflows/ci-python.yml +115 -0
- open_atp-0.1.0/.github/workflows/release.yml +69 -0
- open_atp-0.1.0/.gitignore +32 -0
- open_atp-0.1.0/.readthedocs.yaml +20 -0
- open_atp-0.1.0/AGENTS.md +260 -0
- open_atp-0.1.0/CLAUDE.md +1 -0
- open_atp-0.1.0/LICENSE +21 -0
- open_atp-0.1.0/Makefile +87 -0
- open_atp-0.1.0/PKG-INFO +28 -0
- open_atp-0.1.0/README.md +88 -0
- open_atp-0.1.0/THIRD-PARTY-LICENSES.txt +79 -0
- open_atp-0.1.0/codecov.yml +24 -0
- open_atp-0.1.0/docs/_static/custom.css +200 -0
- open_atp-0.1.0/docs/_static/logo_dark.svg +1 -0
- open_atp-0.1.0/docs/_static/logo_light.svg +1 -0
- open_atp-0.1.0/docs/api/backends.md +47 -0
- open_atp-0.1.0/docs/api/examples.md +16 -0
- open_atp-0.1.0/docs/api/harness.md +61 -0
- open_atp-0.1.0/docs/api/index.md +21 -0
- open_atp-0.1.0/docs/api/lean.md +46 -0
- open_atp-0.1.0/docs/api/provers.md +78 -0
- open_atp-0.1.0/docs/api/verify.md +36 -0
- open_atp-0.1.0/docs/brand/logo.ai +2195 -3
- open_atp-0.1.0/docs/brand/logo_dark.svg +1 -0
- open_atp-0.1.0/docs/brand/logo_light.svg +1 -0
- open_atp-0.1.0/docs/compute_backend/docker.md +74 -0
- open_atp-0.1.0/docs/compute_backend/index.md +12 -0
- open_atp-0.1.0/docs/compute_backend/modal.md +70 -0
- open_atp-0.1.0/docs/conf.py +202 -0
- open_atp-0.1.0/docs/examples.md +64 -0
- open_atp-0.1.0/docs/index.md +43 -0
- open_atp-0.1.0/docs/installation.md +48 -0
- open_atp-0.1.0/docs/provers/aristotle.md +57 -0
- open_atp-0.1.0/docs/provers/axprover.md +92 -0
- open_atp-0.1.0/docs/provers/claude_code.md +110 -0
- open_atp-0.1.0/docs/provers/codex.md +97 -0
- open_atp-0.1.0/docs/provers/index.md +52 -0
- open_atp-0.1.0/docs/provers/numina.md +55 -0
- open_atp-0.1.0/docs/provers/opencode.md +101 -0
- open_atp-0.1.0/docs/provers/vibe.md +115 -0
- open_atp-0.1.0/docs/user_guide/index.md +14 -0
- open_atp-0.1.0/docs/user_guide/run_provers.md +113 -0
- open_atp-0.1.0/images/Dockerfile +103 -0
- open_atp-0.1.0/images/lean/lakefile.toml +7 -0
- open_atp-0.1.0/images/lean/lean-toolchain +1 -0
- open_atp-0.1.0/lefthook.yml +14 -0
- open_atp-0.1.0/pyproject.toml +86 -0
- open_atp-0.1.0/src/open_atp/__init__.py +45 -0
- open_atp-0.1.0/src/open_atp/__main__.py +241 -0
- open_atp-0.1.0/src/open_atp/backends/__init__.py +25 -0
- open_atp-0.1.0/src/open_atp/backends/base.py +355 -0
- open_atp-0.1.0/src/open_atp/backends/docker.py +365 -0
- open_atp-0.1.0/src/open_atp/backends/modal.py +540 -0
- open_atp-0.1.0/src/open_atp/config.py +163 -0
- open_atp-0.1.0/src/open_atp/examples/__init__.py +82 -0
- open_atp-0.1.0/src/open_atp/examples/assets/AbsMulLt.lean +7 -0
- open_atp-0.1.0/src/open_atp/examples/assets/InterSubset.lean +9 -0
- open_atp-0.1.0/src/open_atp/examples/assets/InterUnionDistrib.lean +9 -0
- open_atp-0.1.0/src/open_atp/examples/assets/MulReorder.lean +7 -0
- open_atp-0.1.0/src/open_atp/examples/assets/SmulAdd.lean +9 -0
- open_atp-0.1.0/src/open_atp/harness/__init__.py +49 -0
- open_atp-0.1.0/src/open_atp/harness/_catalog.py +64 -0
- open_atp-0.1.0/src/open_atp/harness/_numina.py +78 -0
- open_atp-0.1.0/src/open_atp/harness/_paths.py +53 -0
- open_atp-0.1.0/src/open_atp/harness/assets/configs/mcp.json +10 -0
- open_atp-0.1.0/src/open_atp/harness/assets/scripts/axprover_agent.sh +51 -0
- open_atp-0.1.0/src/open_atp/harness/assets/scripts/claude_code_agent.sh +24 -0
- open_atp-0.1.0/src/open_atp/harness/assets/scripts/codex_agent.sh +20 -0
- open_atp-0.1.0/src/open_atp/harness/assets/scripts/opencode_agent.sh +15 -0
- open_atp-0.1.0/src/open_atp/harness/assets/scripts/vibe_agent.sh +23 -0
- open_atp-0.1.0/src/open_atp/harness/assets/vibe/lean-standin.toml +34 -0
- open_atp-0.1.0/src/open_atp/harness/axprover.py +242 -0
- open_atp-0.1.0/src/open_atp/harness/base.py +397 -0
- open_atp-0.1.0/src/open_atp/harness/claude_code.py +153 -0
- open_atp-0.1.0/src/open_atp/harness/codex.py +114 -0
- open_atp-0.1.0/src/open_atp/harness/cost.py +46 -0
- open_atp-0.1.0/src/open_atp/harness/opencode.py +149 -0
- open_atp-0.1.0/src/open_atp/harness/vibe.py +226 -0
- open_atp-0.1.0/src/open_atp/images/__init__.py +55 -0
- open_atp-0.1.0/src/open_atp/lean.py +233 -0
- open_atp-0.1.0/src/open_atp/provers/__init__.py +24 -0
- open_atp-0.1.0/src/open_atp/provers/agent_prover.py +348 -0
- open_atp-0.1.0/src/open_atp/provers/aristotle.py +408 -0
- open_atp-0.1.0/src/open_atp/provers/base.py +227 -0
- open_atp-0.1.0/src/open_atp/provers/numina.py +535 -0
- open_atp-0.1.0/src/open_atp/provers/numina_tracker.py +436 -0
- open_atp-0.1.0/src/open_atp/verify.py +344 -0
- open_atp-0.1.0/tests/backends/test_modal.py +168 -0
- open_atp-0.1.0/tests/conftest.py +99 -0
- open_atp-0.1.0/tests/fixtures/agent_streams/claude_code.jsonl +6 -0
- open_atp-0.1.0/tests/fixtures/mil_trivial/MILExample.lean +7 -0
- open_atp-0.1.0/tests/fixtures/mil_trivial/lake-manifest.json +95 -0
- open_atp-0.1.0/tests/fixtures/mil_trivial/lakefile.toml +7 -0
- open_atp-0.1.0/tests/fixtures/mil_trivial/lean-toolchain +1 -0
- open_atp-0.1.0/tests/fixtures/skills/probe-skill/SKILL.md +12 -0
- open_atp-0.1.0/tests/harness/test_axprover.py +308 -0
- open_atp-0.1.0/tests/harness/test_capabilities.py +656 -0
- open_atp-0.1.0/tests/harness/test_vibe.py +250 -0
- open_atp-0.1.0/tests/provers/test_agent_prover.py +334 -0
- open_atp-0.1.0/tests/provers/test_aristotle.py +224 -0
- open_atp-0.1.0/tests/provers/test_numina.py +414 -0
- open_atp-0.1.0/tests/test_api.py +307 -0
- open_atp-0.1.0/tests/test_config.py +132 -0
- open_atp-0.1.0/tests/test_e2e_provers.py +165 -0
- open_atp-0.1.0/tests/verify/test_verify.py +96 -0
- open_atp-0.1.0/uv.lock +3022 -0
- open_atp-0.1.0/vendor/lean4-skills/.claude-plugin/marketplace.json +18 -0
- open_atp-0.1.0/vendor/lean4-skills/LICENSE +21 -0
- open_atp-0.1.0/vendor/lean4-skills/UPSTREAM_README.md +163 -0
- open_atp-0.1.0/vendor/lean4-skills/VENDOR.md +40 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/.claude-plugin/plugin.json +6 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/MIGRATION.md +237 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/README.md +307 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/axiom-eliminator.md +123 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/proof-golfer.md +157 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/proof-repair.md +117 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/agents/sorry-filler-deep.md +126 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/autoformalize.md +156 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/autoprove.md +282 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/checkpoint.md +88 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/doctor.md +222 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/draft.md +145 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/formalize.md +187 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/golf.md +167 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/learn.md +205 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/prove.md +227 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/refactor.md +96 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/commands/review.md +322 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/bootstrap.sh +29 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/guardrails.sh +801 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/hooks.json +16 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/hooks/validate_user_prompt.py +146 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/__init__.py +49 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/coercions.py +710 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/formatter.py +69 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/parser.py +264 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/__init__.py +22 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/_common.py +357 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/autoformalize.py +383 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/autoprove.py +607 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/draft.py +101 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/formalize.py +297 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/learn.py +256 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/specs/prove.py +197 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/tokenizer.py +33 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/command_args/types.py +146 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/README.md +215 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/TESTING.md +63 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/analyze_let_usage.py +415 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/check_axioms_inline.sh +423 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/cycle_tracker.sh +902 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_exact_candidates.py +337 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_golfable.py +788 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_instances.sh +119 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/find_usages.sh +233 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/minimize_imports.py +275 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/parse_command_args.py +110 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/parse_lean_errors.py +233 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/search_mathlib.sh +153 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/smart_search.sh +229 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/solver_cascade.py +155 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/sorry_analyzer.py +540 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/test_apply_exact_chains.py +125 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/tests/test_ordering.py +178 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/try_exact_at_step.py +419 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/lib/scripts/unused_declarations.sh +240 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/SKILL.md +318 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/agent-workflows.md +355 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/axiom-elimination.md +289 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/calc-patterns.md +246 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/command-examples.md +1295 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/command-invocation.md +113 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compilation-errors.md +747 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compiler-guided-repair.md +720 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/compiler-internals.md +182 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/cycle-engine.md +528 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/domain-patterns.md +751 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/ffi-interop.md +206 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/grind-tactic.md +384 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/instance-pollution.md +435 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/json-patterns.md +145 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-lsp-server.md +335 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-lsp-tools-api.md +988 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean-phrasebook.md +847 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/lean4-custom-syntax.md +452 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/learn-pathways.md +329 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/linter-authoring.md +138 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/mathlib-guide.md +496 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/mathlib-style.md +378 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/measure-theory.md +811 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/metaprogramming-patterns.md +158 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/performance-optimization.md +549 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/profiling-workflows.md +56 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-golfing-patterns.md +527 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-golfing.md +348 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-refactoring.md +867 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-simplification.md +236 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/proof-templates.md +169 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/review-hook-schema.md +252 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/scaffold-dsl.md +60 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/simp-reference.md +233 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/sorry-filling.md +244 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/subagent-workflows.md +633 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/tactic-patterns.md +126 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/tactics-reference.md +687 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/skills/lean4/references/verso-docs.md +68 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/__init__.py +1 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/_doc_sync_allowlist.py +20 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/_doc_sync_forward_exclusions.py +34 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_formatter.py +203 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_hook_block_roundtrip.py +466 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parse_command_args_cli.py +137 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_autoformalize.py +142 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_autoprove.py +178 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_common.py +135 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_draft.py +146 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_formalize.py +117 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_learn.py +121 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/command_args/test_parser_prove.py +133 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_bash3_smoke.sh +194 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_cycle_tracker.sh +1373 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_guardrails.sh +469 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_lint_runtime_portability.sh +352 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tests/test_validate_user_prompt.sh +375 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/lint_docs.sh +1683 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/lint_runtime_portability.sh +287 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/smoke_snippets.sh +80 -0
- open_atp-0.1.0/vendor/lean4-skills/plugins/lean4/tools/test_contracts.sh +497 -0
- open_atp-0.1.0/vendor/leanprover-skills/LICENSE +201 -0
- open_atp-0.1.0/vendor/leanprover-skills/UPSTREAM_README.md +116 -0
- open_atp-0.1.0/vendor/leanprover-skills/VENDOR.md +38 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-bisect/SKILL.md +77 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-mwe/SKILL.md +107 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-pr/SKILL.md +86 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-proof/SKILL.md +93 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-proof/tests/example.yaml +7 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/lean-setup/SKILL.md +66 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-build/SKILL.md +24 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-pr/SKILL.md +60 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/mathlib-review/SKILL.md +28 -0
- open_atp-0.1.0/vendor/leanprover-skills/skills/nightly-testing/SKILL.md +28 -0
- open_atp-0.1.0/vendor/numina/VENDOR.md +91 -0
- open_atp-0.1.0/vendor/numina/prompts/main_entry.md +28 -0
- open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/common.md +595 -0
- open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/coordinator.md +485 -0
- open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/golfer.md +393 -0
- open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/informal_agent.md +296 -0
- open_atp-0.1.0/vendor/numina/prompts/subagent_prompts/proof_agent.md +381 -0
- open_atp-0.1.0/vendor/numina/skills/.claude-plugin/plugin.json +5 -0
- open_atp-0.1.0/vendor/numina/skills/SKILL.md +21 -0
- open_atp-0.1.0/vendor/numina/skills/cli/axle.py +76 -0
- open_atp-0.1.0/vendor/numina/skills/cli/code_golf.py +76 -0
- open_atp-0.1.0/vendor/numina/skills/cli/discussion_partner.py +141 -0
- open_atp-0.1.0/vendor/numina/skills/cli/hammer_premise.py +56 -0
- open_atp-0.1.0/vendor/numina/skills/cli/informal_prover.py +429 -0
- open_atp-0.1.0/vendor/numina/skills/cli/lean_check.py +137 -0
- open_atp-0.1.0/vendor/numina/skills/cli/leanexplore.py +97 -0
- open_atp-0.1.0/vendor/numina/skills/cli/leanfinder.py +64 -0
- open_atp-0.1.0/vendor/numina/skills/cli/leansearch.py +73 -0
- open_atp-0.1.0/vendor/numina/skills/cli/loogle.py +58 -0
- open_atp-0.1.0/vendor/numina/skills/cli/requirements.txt +4 -0
- open_atp-0.1.0/vendor/numina/skills/cli/state_search.py +50 -0
- open_atp-0.1.0/vendor/numina/skills/code-transform/SKILL.md +19 -0
- open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-extract-theorems.md +41 -0
- open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-repair-proofs.md +42 -0
- open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-simplify-theorems.md +40 -0
- open_atp-0.1.0/vendor/numina/skills/code-transform/reference-axle-sorry2lemma.md +37 -0
- open_atp-0.1.0/vendor/numina/skills/llm/SKILL.md +18 -0
- open_atp-0.1.0/vendor/numina/skills/llm/reference-code-golf.md +29 -0
- open_atp-0.1.0/vendor/numina/skills/llm/reference-discussion-partner.md +43 -0
- open_atp-0.1.0/vendor/numina/skills/llm/reference-informal-prover.md +56 -0
- open_atp-0.1.0/vendor/numina/skills/search/SKILL.md +21 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-hammer-premise.md +27 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-leanexplore.md +34 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-leanfinder.md +28 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-leansearch.md +27 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-loogle.md +40 -0
- open_atp-0.1.0/vendor/numina/skills/search/reference-state-search.md +26 -0
- open_atp-0.1.0/vendor/numina/skills/sorrifier/SKILL.md +82 -0
- open_atp-0.1.0/vendor/numina/skills/verification/SKILL.md +18 -0
- open_atp-0.1.0/vendor/numina/skills/verification/reference-axle-disprove.md +38 -0
- open_atp-0.1.0/vendor/numina/skills/verification/reference-axle-verify-proof.md +40 -0
- open_atp-0.1.0/vendor/numina/skills/verification/reference-lean-check.md +35 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copy to `.env` and fill in. `.env` is gitignored and never committed.
|
|
2
|
+
|
|
3
|
+
# Required only for the live Aristotle end-to-end test (`pytest -m aristotle_api`).
|
|
4
|
+
ARISTOTLE_API_KEY=arstl_your_key_here
|
|
5
|
+
|
|
6
|
+
# Required only for the live AgentProver test (`pytest -m agent_api`) with the
|
|
7
|
+
# default claude_code harness. Generate with `claude setup-token`.
|
|
8
|
+
CLAUDE_CODE_OAUTH_TOKEN=sk-ant-oat-your_token_here
|
|
9
|
+
|
|
10
|
+
# Required only for the live NuminaProver test (`pytest -m numina_api`): the OAuth
|
|
11
|
+
# token above plus at least GEMINI_API_KEY. Numina's helper skills call out to
|
|
12
|
+
# Gemini / OpenAI / Leandex from inside the sandbox; each key is optional and the
|
|
13
|
+
# corresponding skill simply degrades or skips when its key is absent.
|
|
14
|
+
GEMINI_API_KEY=your_gemini_key_here
|
|
15
|
+
OPENAI_API_KEY=sk-your_openai_key_here
|
|
16
|
+
LEAN_LEANDEX_API_KEY=your_leandex_key_here
|
|
17
|
+
|
|
18
|
+
# Required for the `agent:axprover` harness (ax-prover-base). It uses a raw provider
|
|
19
|
+
# key matching the configured `model`: the default model is claude-opus-4-8, so
|
|
20
|
+
# ANTHROPIC_API_KEY is needed unless you override `model` to a gpt-*/gemini-* id (then
|
|
21
|
+
# OPENAI_API_KEY / GOOGLE_API_KEY instead). The harness forwards whichever is present.
|
|
22
|
+
ANTHROPIC_API_KEY=sk-ant-your_key_here
|
|
23
|
+
GOOGLE_API_KEY=your_google_genai_key_here
|
|
24
|
+
# Optional: ax-prover's web-search tool degrades to a no-op without this, and the
|
|
25
|
+
# harness does not forward it by default (see AxProverHarness.auth_spec).
|
|
26
|
+
# TAVILY_API_KEY=tvly-your_key_here
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
name: CI - Python
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths:
|
|
7
|
+
- "src/**"
|
|
8
|
+
- "tests/**"
|
|
9
|
+
- "pyproject.toml"
|
|
10
|
+
- "uv.lock"
|
|
11
|
+
- ".github/workflows/ci-python.yml"
|
|
12
|
+
- "codecov.yml"
|
|
13
|
+
pull_request:
|
|
14
|
+
paths:
|
|
15
|
+
- "src/**"
|
|
16
|
+
- "tests/**"
|
|
17
|
+
- "pyproject.toml"
|
|
18
|
+
- "uv.lock"
|
|
19
|
+
- ".github/workflows/ci-python.yml"
|
|
20
|
+
- "codecov.yml"
|
|
21
|
+
|
|
22
|
+
jobs:
|
|
23
|
+
lint:
|
|
24
|
+
name: Lint
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
|
|
29
|
+
- uses: astral-sh/setup-uv@v5
|
|
30
|
+
with:
|
|
31
|
+
enable-cache: true
|
|
32
|
+
cache-dependency-glob: uv.lock
|
|
33
|
+
|
|
34
|
+
- name: Install dependencies
|
|
35
|
+
run: uv sync --all-extras
|
|
36
|
+
|
|
37
|
+
- name: Lint (ruff check)
|
|
38
|
+
run: uv run ruff check src tests
|
|
39
|
+
|
|
40
|
+
format:
|
|
41
|
+
name: Format
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v4
|
|
45
|
+
|
|
46
|
+
- uses: astral-sh/setup-uv@v5
|
|
47
|
+
with:
|
|
48
|
+
enable-cache: true
|
|
49
|
+
cache-dependency-glob: uv.lock
|
|
50
|
+
|
|
51
|
+
- name: Install dependencies
|
|
52
|
+
run: uv sync --all-extras
|
|
53
|
+
|
|
54
|
+
- name: Format check (ruff format)
|
|
55
|
+
run: uv run ruff format --check src tests
|
|
56
|
+
|
|
57
|
+
typecheck:
|
|
58
|
+
name: Type Check
|
|
59
|
+
runs-on: ubuntu-latest
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
|
|
63
|
+
- uses: astral-sh/setup-uv@v5
|
|
64
|
+
with:
|
|
65
|
+
enable-cache: true
|
|
66
|
+
cache-dependency-glob: uv.lock
|
|
67
|
+
|
|
68
|
+
- name: Install dependencies
|
|
69
|
+
run: uv sync --all-extras
|
|
70
|
+
|
|
71
|
+
- name: Type check (mypy)
|
|
72
|
+
run: uv run mypy
|
|
73
|
+
|
|
74
|
+
test:
|
|
75
|
+
name: Test
|
|
76
|
+
runs-on: ubuntu-latest
|
|
77
|
+
steps:
|
|
78
|
+
- uses: actions/checkout@v4
|
|
79
|
+
|
|
80
|
+
- uses: astral-sh/setup-uv@v5
|
|
81
|
+
with:
|
|
82
|
+
enable-cache: true
|
|
83
|
+
cache-dependency-glob: uv.lock
|
|
84
|
+
|
|
85
|
+
- name: Install dependencies
|
|
86
|
+
run: uv sync --all-extras
|
|
87
|
+
|
|
88
|
+
- name: Run unit tests (no docker, no modal)
|
|
89
|
+
run: uv run pytest -m 'not docker and not modal'
|
|
90
|
+
|
|
91
|
+
coverage:
|
|
92
|
+
name: Coverage
|
|
93
|
+
runs-on: ubuntu-latest
|
|
94
|
+
steps:
|
|
95
|
+
- uses: actions/checkout@v4
|
|
96
|
+
|
|
97
|
+
- uses: astral-sh/setup-uv@v5
|
|
98
|
+
with:
|
|
99
|
+
enable-cache: true
|
|
100
|
+
cache-dependency-glob: uv.lock
|
|
101
|
+
|
|
102
|
+
- name: Install dependencies
|
|
103
|
+
run: uv sync --all-extras
|
|
104
|
+
|
|
105
|
+
- name: Coverage — src
|
|
106
|
+
run: uv run pytest -m 'not docker and not modal' --cov=open_atp --cov-report=term-missing --cov-report=xml
|
|
107
|
+
|
|
108
|
+
- name: Upload coverage to Codecov
|
|
109
|
+
uses: codecov/codecov-action@v4
|
|
110
|
+
with:
|
|
111
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
112
|
+
files: coverage.xml
|
|
113
|
+
flags: src
|
|
114
|
+
name: src
|
|
115
|
+
fail_ci_if_error: false
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag:
|
|
10
|
+
description: "Release tag (e.g. v0.1.0)"
|
|
11
|
+
required: true
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: write
|
|
15
|
+
id-token: write # OIDC for PyPI Trusted Publishing
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
release:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
with:
|
|
23
|
+
fetch-depth: 0
|
|
24
|
+
fetch-tags: true
|
|
25
|
+
|
|
26
|
+
- name: Resolve tag
|
|
27
|
+
id: tag
|
|
28
|
+
run: |
|
|
29
|
+
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
|
30
|
+
echo "tag=${{ inputs.tag }}" >> "$GITHUB_OUTPUT"
|
|
31
|
+
else
|
|
32
|
+
echo "tag=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
- name: Refetch annotated tag and checkout
|
|
36
|
+
run: |
|
|
37
|
+
git fetch --force --tags origin "refs/tags/${{ steps.tag.outputs.tag }}:refs/tags/${{ steps.tag.outputs.tag }}"
|
|
38
|
+
git checkout "${{ steps.tag.outputs.tag }}"
|
|
39
|
+
|
|
40
|
+
- uses: astral-sh/setup-uv@v5
|
|
41
|
+
|
|
42
|
+
- name: Build distributions
|
|
43
|
+
run: uv build
|
|
44
|
+
|
|
45
|
+
- name: Publish to PyPI
|
|
46
|
+
run: uv publish dist/open_atp-* # Trusted Publishing via OIDC, no token needed
|
|
47
|
+
|
|
48
|
+
- name: Create release
|
|
49
|
+
env:
|
|
50
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
51
|
+
run: |
|
|
52
|
+
TAG="${{ steps.tag.outputs.tag }}"
|
|
53
|
+
SHORT=$(git rev-parse --short "$TAG^{commit}")
|
|
54
|
+
MSG=$(git tag -l --format='%(contents:body)' "$TAG")
|
|
55
|
+
{
|
|
56
|
+
echo "$MSG"
|
|
57
|
+
echo
|
|
58
|
+
echo "open-atp release at $SHORT."
|
|
59
|
+
} > dist-notes.md
|
|
60
|
+
VERSION="${TAG#v}"
|
|
61
|
+
PRERELEASE=()
|
|
62
|
+
if [[ "${VERSION%%.*}" == "0" ]]; then
|
|
63
|
+
PRERELEASE=(--prerelease)
|
|
64
|
+
fi
|
|
65
|
+
gh release create "$TAG" \
|
|
66
|
+
dist/open_atp-* \
|
|
67
|
+
--title "$TAG" \
|
|
68
|
+
--notes-file dist-notes.md \
|
|
69
|
+
"${PRERELEASE[@]}"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Reference symlinks — for development context only, never committed.
|
|
2
|
+
refs/
|
|
3
|
+
|
|
4
|
+
# Integration test artifacts (agent logs, workdirs from test_agent_capabilities)
|
|
5
|
+
tests/.runs/
|
|
6
|
+
|
|
7
|
+
# Sphinx docs build
|
|
8
|
+
docs/_build/
|
|
9
|
+
|
|
10
|
+
# Python
|
|
11
|
+
__pycache__/
|
|
12
|
+
*.py[cod]
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
*.egg-info/
|
|
16
|
+
build/
|
|
17
|
+
dist/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.ruff_cache/
|
|
21
|
+
|
|
22
|
+
# Lean build artifacts (e.g. created by an editor opening a fixture project)
|
|
23
|
+
.lake/
|
|
24
|
+
|
|
25
|
+
# Env / secrets
|
|
26
|
+
.env
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
|
|
31
|
+
# Claude Code transient state
|
|
32
|
+
.claude/scheduled_tasks.lock
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
|
|
3
|
+
build:
|
|
4
|
+
os: ubuntu-24.04
|
|
5
|
+
tools:
|
|
6
|
+
python: "3.12"
|
|
7
|
+
|
|
8
|
+
sphinx:
|
|
9
|
+
configuration: docs/conf.py
|
|
10
|
+
fail_on_warning: true
|
|
11
|
+
|
|
12
|
+
formats:
|
|
13
|
+
- htmlzip
|
|
14
|
+
|
|
15
|
+
python:
|
|
16
|
+
install:
|
|
17
|
+
- method: pip
|
|
18
|
+
path: .
|
|
19
|
+
extra_requirements:
|
|
20
|
+
- docs
|
open_atp-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Developer guide for **open-atp** (Open Automated Formal Proof Synthesis). Read this
|
|
4
|
+
before making changes. The user-facing overview lives in [README.md](README.md);
|
|
5
|
+
this file is the engineering reference.
|
|
6
|
+
|
|
7
|
+
## What this project does
|
|
8
|
+
|
|
9
|
+
Upload one or more Lean files containing `sorry`, run them through proof-synthesis
|
|
10
|
+
backends, and get back **verified** completed proofs with metadata (verification
|
|
11
|
+
status, cost, duration). Every prover — including the hosted Aristotle — funnels its
|
|
12
|
+
output through one **shared verifier** that compiles the candidate in a Lean+Mathlib
|
|
13
|
+
sandbox and checks it compiles, is sorry-free, and is axiom-clean.
|
|
14
|
+
|
|
15
|
+
### Two primitives + thin generators
|
|
16
|
+
|
|
17
|
+
1. **`ComputeBackend`** (`backends/`) — run a command over a working directory inside a
|
|
18
|
+
Lean+Mathlib sandbox. Two impls: `DockerBackend`, `ModalBackend`.
|
|
19
|
+
2. **`Verifier`** (`verify.py`) — compile a candidate project in a backend and
|
|
20
|
+
report `verified` / `sorry_free` / `axioms`.
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
ComputeBackend (docker | modal) ← the sandbox primitive
|
|
24
|
+
│
|
|
25
|
+
├── Verifier ──────────────────← shared final check (ALL provers)
|
|
26
|
+
│
|
|
27
|
+
AutomatedProver (provers/base.py, base)
|
|
28
|
+
├── AgentProver coding-agent harness (claude/codex/opencode/axprover/vibe) + lean-lsp-mcp
|
|
29
|
+
├── NuminaProver configured AgentProver: claude + vendored Numina assets + round loop
|
|
30
|
+
└── AristotleProver remote `aristotle submit --project-dir --wait` (no local generation sandbox)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Input contract
|
|
34
|
+
|
|
35
|
+
Submit a **full lake project** (carries `lean-toolchain` + `lake-manifest.json`). The
|
|
36
|
+
verifier **rejects** projects whose toolchain doesn't match the sandbox image's pin
|
|
37
|
+
(`ToolchainMismatch`) instead of failing deep in a build. The CLI can also take bare
|
|
38
|
+
`.lean` files and stage them into the pinned skeleton. One Mathlib image to start
|
|
39
|
+
(pinned Lean/Mathlib **v4.28.0**); `image` is a config field so more can be added.
|
|
40
|
+
|
|
41
|
+
## Project structure (high-level)
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
src/open_atp/
|
|
45
|
+
api.py Platform + prover registry — the dispatch/orchestration layer
|
|
46
|
+
__main__.py `open-atp solve | build-image | build-modal-image` CLI
|
|
47
|
+
images/ image name + toolchain pins (DEFAULT_IMAGE, DEFAULT_TOOLCHAIN)
|
|
48
|
+
lean.py LeanProject, ProofTask, create_project (the Lean input contract)
|
|
49
|
+
verify.py VerificationReport, Verifier (the shared final check)
|
|
50
|
+
backends/ base.py docker.py modal.py (ComputeBackend impls)
|
|
51
|
+
provers/ agent_prover.py numina.py numina_tracker.py aristotle.py
|
|
52
|
+
harness/ coding-agent CLIs staged into the sandbox:
|
|
53
|
+
base.py claude_code.py codex.py opencode.py
|
|
54
|
+
axprover.py vibe.py bundles.py cost.py _paths.py
|
|
55
|
+
assets/ scripts/*.sh configs/mcp.json vibe/lean-standin.toml
|
|
56
|
+
|
|
57
|
+
images/ Dockerfile (Mathlib base image) + lean/ skeleton (toolchain, lakefile)
|
|
58
|
+
vendor/ vendored third-party assets, tracked to upstream SHAs (see VENDOR.md in each)
|
|
59
|
+
numina/ Numina skills + prompts (round-loop prover)
|
|
60
|
+
leanprover-skills/ host-agnostic Lean skills
|
|
61
|
+
lean4-skills/ Claude `lean4` plugin
|
|
62
|
+
tests/ pytest suite (+ tests/.runs/ integration artifacts, gitignored)
|
|
63
|
+
docs/ Sphinx docs (user_guide/, provers/, agent_harness/, api/)
|
|
64
|
+
refs/ read-only symlinks to reference projects (NEVER modify or commit)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The README's `Layout` section predates the `harness/` split — trust the tree above.
|
|
68
|
+
|
|
69
|
+
### Vendored code
|
|
70
|
+
|
|
71
|
+
`vendor/*` is upstream third-party code pinned to a SHA (each has a `VENDOR.md`).
|
|
72
|
+
Ruff is configured with `extend-exclude = ["vendor"]` — **do not reformat or lint
|
|
73
|
+
vendored code**, and keep its upstream style. It ships in the wheel via
|
|
74
|
+
`force-include` and is resolved at runtime by `harness/_paths.py` (wheel:
|
|
75
|
+
`open_atp/vendor/<name>`; checkout: repo-root `vendor/<name>`).
|
|
76
|
+
|
|
77
|
+
## Provers
|
|
78
|
+
|
|
79
|
+
Names accepted by `--provers` and the `Platform` registry (`api.py`):
|
|
80
|
+
|
|
81
|
+
| Name | Backing tool | Notes |
|
|
82
|
+
| --- | --- | --- |
|
|
83
|
+
| `aristotle` | Harmonic Aristotle (hosted) | remote API via `aristotlelib`, no local gen sandbox |
|
|
84
|
+
| `agent` | Claude Code (`claude_code` harness) | default; coding agent + lean-lsp-mcp |
|
|
85
|
+
| `codex` | OpenAI Codex CLI | model `gpt-5.5` |
|
|
86
|
+
| `opencode` | opencode | |
|
|
87
|
+
| `axprover` | ax-prover (LangGraph) | proposer→builder→reviewer loop; default model `claude-opus-4-8`, effort `high` |
|
|
88
|
+
| `numina` | Numina skills/prompts on Claude Code | round-continuation loop |
|
|
89
|
+
| `vibe` | Mistral Vibe `lean` scaffold | hosted model (default `magistral-medium-latest`), no GPU; `--model` configurable |
|
|
90
|
+
|
|
91
|
+
Agentic harnesses share **lean-lsp-mcp** as their LSP server. The shared `Verifier`
|
|
92
|
+
does the final compile/sorry/axiom check regardless of which tool generated the proof.
|
|
93
|
+
|
|
94
|
+
## Tooling
|
|
95
|
+
|
|
96
|
+
- **Python ≥ 3.12**, packaged with **hatchling**, deps managed by **uv** (`uv.lock`).
|
|
97
|
+
- **ruff** — lint (`E,F,I,UP`) + format, line length 88, excludes `vendor`.
|
|
98
|
+
- **mypy** — `strict`, `files = ["src/open_atp"]`.
|
|
99
|
+
- **pytest** — `pytest-cov`, `pytest-xdist` (default `-n 5`).
|
|
100
|
+
- **lefthook** — pre-commit runs ruff check, ruff format --check, and mypy on staged
|
|
101
|
+
`*.py` (with `--force-exclude` so vendored code is skipped). Install with
|
|
102
|
+
`uv run lefthook install`.
|
|
103
|
+
- **Sphinx** (furo + myst) for docs; Read the Docs config in `.readthedocs.yaml`.
|
|
104
|
+
- CLI entry point: `open-atp` → `open_atp.__main__:main`.
|
|
105
|
+
|
|
106
|
+
## Makefile commands
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
make install uv sync
|
|
110
|
+
make test pytest, skipping docker/modal/live-API tests (default markers)
|
|
111
|
+
make test-docker -m docker (requires the built image)
|
|
112
|
+
make test-modal -m modal (requires a Modal token)
|
|
113
|
+
make test-aristotle -m aristotle_api (live, needs ARISTOTLE_API_KEY)
|
|
114
|
+
make test-agent -m agent_api (live + billable, needs creds)
|
|
115
|
+
make cov pytest with coverage → htmlcov/, coverage.xml
|
|
116
|
+
make cov-open build + open the HTML coverage report
|
|
117
|
+
make cov-clean remove coverage artifacts
|
|
118
|
+
make lint ruff check src tests
|
|
119
|
+
make format ruff format + ruff check --fix on src tests
|
|
120
|
+
make typecheck mypy
|
|
121
|
+
make check lint + typecheck + test
|
|
122
|
+
make build-image docker build -t open-atp:latest images/
|
|
123
|
+
make docs sphinx-build -W -b html docs docs/_build/html
|
|
124
|
+
make docs-serve live-reload docs
|
|
125
|
+
make docs-clean remove built docs
|
|
126
|
+
make clean remove build + cache artifacts
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Run `make check` before pushing.
|
|
130
|
+
|
|
131
|
+
## Testing
|
|
132
|
+
|
|
133
|
+
Default `addopts`: `-m 'not aristotle_api and not agent_api and not numina_api' -n 5`.
|
|
134
|
+
The live/billable credentialed suites are **opt-out by default** and run only when you
|
|
135
|
+
select their marker. Markers (`pyproject.toml`):
|
|
136
|
+
|
|
137
|
+
- `docker` — needs the `open-atp` Docker image (opt-out: `-m 'not docker'`)
|
|
138
|
+
- `modal` — launches a Modal sandbox (opt-out: `-m 'not modal'`)
|
|
139
|
+
- `aristotle_api` — live Aristotle API (opt-in: `-m aristotle_api`)
|
|
140
|
+
- `agent_api` — live agent CLI, billable + creds (opt-in: `-m agent_api`)
|
|
141
|
+
- `numina_api` — live NuminaProver, billable + creds (opt-in: `-m numina_api`)
|
|
142
|
+
|
|
143
|
+
> Project convention: when running tests (even by explicit path), exclude the
|
|
144
|
+
> docker / modal / `*_api` markers by default — they are slow, billable, or need
|
|
145
|
+
> external compute. `make test` already does this. Use `-n 0` to run serially when
|
|
146
|
+
> debugging.
|
|
147
|
+
|
|
148
|
+
Integration artifacts (agent logs, workdirs) land in `tests/.runs/` (gitignored).
|
|
149
|
+
|
|
150
|
+
## Compute setup: Docker vs. Modal
|
|
151
|
+
|
|
152
|
+
Both backends run the shared `Verifier` **and** the agentic provers end-to-end against
|
|
153
|
+
the Mathlib image. Pick a backend with `--backend`, or split generation from the cheap
|
|
154
|
+
verify with `--agent-backend`.
|
|
155
|
+
|
|
156
|
+
- **Docker** (`DockerBackend`) — bind-mounts the workdir; uses `images/Dockerfile`,
|
|
157
|
+
runs as the `agent` user. Local; build the image first:
|
|
158
|
+
```bash
|
|
159
|
+
docker build -t open-atp:latest images/ # or: make build-image / open-atp build-image
|
|
160
|
+
uv run pytest -m docker
|
|
161
|
+
```
|
|
162
|
+
- **Modal** (`ModalBackend`) — pushes/pulls the workdir around an isolated Sandbox
|
|
163
|
+
filesystem; runs as **root**, so its image is built programmatically with the same
|
|
164
|
+
toolchain installed globally. Publish the image, then run the parity suite:
|
|
165
|
+
```bash
|
|
166
|
+
uv run open-atp build-modal-image --name open-atp --app open-atp
|
|
167
|
+
uv run pytest -m modal # needs MODAL_TOKEN_ID / MODAL_TOKEN_SECRET
|
|
168
|
+
```
|
|
169
|
+
`ModalBackend`'s `image` (sans `:tag`) must match the `--name` you publish under.
|
|
170
|
+
|
|
171
|
+
Example splits:
|
|
172
|
+
```bash
|
|
173
|
+
uv run open-atp solve path/to/project --provers agent --backend modal
|
|
174
|
+
uv run open-atp solve path/to/project --provers agent \
|
|
175
|
+
--agent-backend modal --backend docker # Modal generates, Docker does cheap verify
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## CLI quick reference
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
open-atp solve <inputs...> [options] # lake project dir, or bare .lean files
|
|
182
|
+
--provers comma-separated names (default: agent)
|
|
183
|
+
--instructions TEXT # guidance forwarded to provers
|
|
184
|
+
--targets comma-separated files relative to project
|
|
185
|
+
--image / --toolchain # default to images/ pins
|
|
186
|
+
--backend {docker,modal} default docker
|
|
187
|
+
--agent-backend {docker,modal} separate generation backend (defaults to --backend)
|
|
188
|
+
--runs-dir DIR default runs/
|
|
189
|
+
--max-workers N
|
|
190
|
+
--json emit SolveResult as JSON
|
|
191
|
+
|
|
192
|
+
open-atp build-image [--tag TAG] [--no-cache]
|
|
193
|
+
open-atp build-modal-image [--name N] [--app A] [--force]
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Programmatic verify:
|
|
197
|
+
```python
|
|
198
|
+
from open_atp.lean import LeanProject
|
|
199
|
+
from open_atp.verify import docker_verifier
|
|
200
|
+
report = docker_verifier().verify(LeanProject("path/to/lake/project"))
|
|
201
|
+
print(report.verified, report.sorry_free, report.axioms)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Environment / secrets
|
|
205
|
+
|
|
206
|
+
Copy `.env.example` → `.env` (gitignored, never committed). All keys are needed only
|
|
207
|
+
for the corresponding **live** test or harness; absent keys make the dependent
|
|
208
|
+
skill/test degrade or skip:
|
|
209
|
+
|
|
210
|
+
- `ARISTOTLE_API_KEY` — `pytest -m aristotle_api`
|
|
211
|
+
- `CLAUDE_CODE_OAUTH_TOKEN` — `agent_api` test with default claude_code harness
|
|
212
|
+
(`claude setup-token`)
|
|
213
|
+
- `GEMINI_API_KEY` / `OPENAI_API_KEY` / `LEAN_LEANDEX_API_KEY` — Numina helper skills
|
|
214
|
+
- `ANTHROPIC_API_KEY` / `GOOGLE_API_KEY` — `axprover` (raw provider key matching
|
|
215
|
+
the configured `model`); `TAVILY_API_KEY` optional (ax-prover web search)
|
|
216
|
+
- `MODAL_TOKEN_ID` / `MODAL_TOKEN_SECRET` — Modal backend
|
|
217
|
+
|
|
218
|
+
## Docs: API reference convention
|
|
219
|
+
|
|
220
|
+
The API pages (`docs/api/*.md`) are Sphinx `autoclass` directives; **numpydoc** renders
|
|
221
|
+
the class docstring's `Parameters`/`Attributes` sections, and a single
|
|
222
|
+
`autodoc-skip-member` hook in `docs/conf.py` (`_skip_non_methods`) drops every class
|
|
223
|
+
member that isn't a method. So the split is:
|
|
224
|
+
|
|
225
|
+
- **Constructor params and attributes** (instance state + `@property`) live **only in
|
|
226
|
+
the docstring**, in `Parameters`/`Attributes` sections. The hook hides them as
|
|
227
|
+
members, so they render once, from the prose. Never re-list them with `:members:`.
|
|
228
|
+
- **List each name once — `Parameters` *or* `Attributes`, never both** (the
|
|
229
|
+
numpy/scipy/sklearn convention). A constructor arg stored verbatim as an attribute is
|
|
230
|
+
documented only under `Parameters`; readers know `self.<arg>` exists without it being
|
|
231
|
+
repeated. `Attributes` is reserved for state **not** in the signature: `@property`
|
|
232
|
+
(e.g. `Harness.command`, `Verifier.image`) and derived/computed fields. If a
|
|
233
|
+
`@property` shares a name with a param (e.g. `OpenCodeHarness.provider`), document the
|
|
234
|
+
resolution in the param and leave it out of `Attributes`.
|
|
235
|
+
- **Methods** are the only members `autoclass` enumerates. Document each method **once,
|
|
236
|
+
on the class that defines it.**
|
|
237
|
+
- **Inheritance**: numpydoc does *not* walk the MRO, so each leaf class must
|
|
238
|
+
**re-document every constructor param it accepts, including inherited ones** (e.g.
|
|
239
|
+
`backend`/`timeout_s` from `AutomatedProver`) — otherwise they don't render. Pages do
|
|
240
|
+
**not** use `:inherited-members:`: an inherited method (e.g. `prove`) appears only on
|
|
241
|
+
its base class, not on each child.
|
|
242
|
+
|
|
243
|
+
Practical rules:
|
|
244
|
+
|
|
245
|
+
- **Do not** add `:exclude-members:` for attributes, params, or `name` — the hook
|
|
246
|
+
already handles them. The only legitimate `:exclude-members:` is to hide an
|
|
247
|
+
**overridden method** from a child page so it stays documented on the base only
|
|
248
|
+
(current uses: `start` on the backend impls, `stage` on the harness impls).
|
|
249
|
+
- A new attribute/`@property` only shows up if you add it to the docstring `Attributes`
|
|
250
|
+
section.
|
|
251
|
+
- `make docs` builds with `-W` (warnings are errors) — a broken xref or duplicate
|
|
252
|
+
fails the build.
|
|
253
|
+
|
|
254
|
+
## Conventions
|
|
255
|
+
|
|
256
|
+
- Commit directly to `main` unless told otherwise; warn before committing work that
|
|
257
|
+
clearly belongs on another branch.
|
|
258
|
+
- Never modify or commit anything under `refs/` (read-only reference symlinks) or
|
|
259
|
+
reformat anything under `vendor/` (upstream-tracked).
|
|
260
|
+
- Keep `mypy --strict` and ruff clean; run `make check` before pushing.
|
open_atp-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
@AGENTS.md
|
open_atp-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Henry Robbins
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
open_atp-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Common dev commands for the open-atp package.
|
|
2
|
+
|
|
3
|
+
.PHONY: help install test test-docker test-modal test-aristotle test-agent cov cov-open cov-clean lint format typecheck check build build-image docs docs-serve docs-clean clean
|
|
4
|
+
|
|
5
|
+
help:
|
|
6
|
+
@echo "Targets:"
|
|
7
|
+
@echo " install Sync deps with uv"
|
|
8
|
+
@echo " test Run pytest, skipping docker/modal/live-API tests"
|
|
9
|
+
@echo " test-docker Run docker-marked tests (requires the built image)"
|
|
10
|
+
@echo " test-modal Run modal-marked tests (requires a Modal token)"
|
|
11
|
+
@echo " test-aristotle Run the live Aristotle API test (needs ARISTOTLE_API_KEY)"
|
|
12
|
+
@echo " test-agent Run the live agent CLI test (billable + needs creds)"
|
|
13
|
+
@echo " cov Run pytest with coverage; HTML to htmlcov/, XML to coverage.xml"
|
|
14
|
+
@echo " cov-open Open the HTML coverage report"
|
|
15
|
+
@echo " cov-clean Remove coverage artifacts"
|
|
16
|
+
@echo " lint Run ruff check"
|
|
17
|
+
@echo " format Run ruff format + ruff check --fix"
|
|
18
|
+
@echo " typecheck Run mypy"
|
|
19
|
+
@echo " check Run lint + typecheck + test"
|
|
20
|
+
@echo " build Build the sdist + wheel into dist/"
|
|
21
|
+
@echo " build-image Build the open-atp:latest Docker image"
|
|
22
|
+
@echo " docs Build the Sphinx docs once"
|
|
23
|
+
@echo " docs-serve Live-reload docs in browser"
|
|
24
|
+
@echo " docs-clean Remove built docs"
|
|
25
|
+
@echo " clean Remove build + cache artifacts"
|
|
26
|
+
|
|
27
|
+
install:
|
|
28
|
+
uv sync
|
|
29
|
+
|
|
30
|
+
test:
|
|
31
|
+
uv run pytest
|
|
32
|
+
|
|
33
|
+
test-docker:
|
|
34
|
+
uv run pytest -m 'docker'
|
|
35
|
+
|
|
36
|
+
test-modal:
|
|
37
|
+
uv run pytest -m 'modal'
|
|
38
|
+
|
|
39
|
+
test-aristotle:
|
|
40
|
+
uv run pytest -m 'aristotle_api'
|
|
41
|
+
|
|
42
|
+
test-agent:
|
|
43
|
+
uv run pytest -m 'agent_api'
|
|
44
|
+
|
|
45
|
+
cov:
|
|
46
|
+
uv run pytest \
|
|
47
|
+
--cov=open_atp \
|
|
48
|
+
--cov-report=term-missing \
|
|
49
|
+
--cov-report=html \
|
|
50
|
+
--cov-report=xml
|
|
51
|
+
|
|
52
|
+
cov-open: cov
|
|
53
|
+
@python -c "import os, webbrowser; webbrowser.open('file://' + os.path.abspath('htmlcov/index.html'))"
|
|
54
|
+
|
|
55
|
+
cov-clean:
|
|
56
|
+
rm -rf htmlcov coverage.xml .coverage
|
|
57
|
+
|
|
58
|
+
lint:
|
|
59
|
+
uv run ruff check src tests
|
|
60
|
+
|
|
61
|
+
format:
|
|
62
|
+
uv run ruff format src tests
|
|
63
|
+
uv run ruff check --fix src tests
|
|
64
|
+
|
|
65
|
+
typecheck:
|
|
66
|
+
uv run mypy
|
|
67
|
+
|
|
68
|
+
check: lint typecheck test
|
|
69
|
+
|
|
70
|
+
build:
|
|
71
|
+
uv build
|
|
72
|
+
|
|
73
|
+
build-image:
|
|
74
|
+
docker build -t open-atp:latest images/
|
|
75
|
+
|
|
76
|
+
docs:
|
|
77
|
+
uv run --extra docs sphinx-build -W -b html docs docs/_build/html
|
|
78
|
+
|
|
79
|
+
docs-serve:
|
|
80
|
+
uv run --extra docs sphinx-autobuild --watch src docs docs/_build/html
|
|
81
|
+
|
|
82
|
+
docs-clean:
|
|
83
|
+
rm -rf docs/_build
|
|
84
|
+
|
|
85
|
+
clean: docs-clean cov-clean
|
|
86
|
+
rm -rf build dist .pytest_cache .mypy_cache .ruff_cache
|
|
87
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
open_atp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: open-atp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Open Automated Formal Proof Synthesis: upload Lean files with sorrys, get verified completed proofs across multiple proving backends.
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Requires-Dist: anthropic>=0.75.0
|
|
8
|
+
Requires-Dist: aristotlelib
|
|
9
|
+
Requires-Dist: google-genai>=1.44.0
|
|
10
|
+
Requires-Dist: jinja2>=3.1
|
|
11
|
+
Requires-Dist: lean-explore>=1.2.1
|
|
12
|
+
Requires-Dist: modal>=1.5
|
|
13
|
+
Requires-Dist: openai>=2.7.1
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
|
|
19
|
+
Requires-Dist: python-dotenv; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
21
|
+
Provides-Extra: docs
|
|
22
|
+
Requires-Dist: furo; extra == 'docs'
|
|
23
|
+
Requires-Dist: myst-parser>=3; extra == 'docs'
|
|
24
|
+
Requires-Dist: numpydoc; extra == 'docs'
|
|
25
|
+
Requires-Dist: sphinx-autobuild; extra == 'docs'
|
|
26
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'docs'
|
|
27
|
+
Requires-Dist: sphinx-design; extra == 'docs'
|
|
28
|
+
Requires-Dist: sphinx>=7; extra == 'docs'
|