neurocore-skill-math 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurocore_skill_math-0.1.0/.gitignore +152 -0
- neurocore_skill_math-0.1.0/PKG-INFO +120 -0
- neurocore_skill_math-0.1.0/README.md +87 -0
- neurocore_skill_math-0.1.0/blueprints/lean-first-math-worker.flow.yaml +90 -0
- neurocore_skill_math-0.1.0/blueprints/math-proof-validation-worker.flow.yaml +116 -0
- neurocore_skill_math-0.1.0/pyproject.toml +98 -0
- neurocore_skill_math-0.1.0/scripts/install_math_tools.sh +130 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/__init__.py +72 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/_availability.py +51 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/_base.py +112 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/_formats.py +54 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/_llm.py +54 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/_run.py +81 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/atp.py +188 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/cas.py +144 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/check.py +26 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/formal.py +238 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/numeric.py +84 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/planning.py +84 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/prep.py +120 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/report.py +91 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/smt.py +145 -0
- neurocore_skill_math-0.1.0/src/neurocore_skill_math/symbolic.py +161 -0
- neurocore_skill_math-0.1.0/tests/test_availability.py +26 -0
- neurocore_skill_math-0.1.0/tests/test_blueprints.py +27 -0
- neurocore_skill_math-0.1.0/tests/test_external.py +167 -0
- neurocore_skill_math-0.1.0/tests/test_llm.py +112 -0
- neurocore_skill_math-0.1.0/tests/test_pure.py +129 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
cover/
|
|
51
|
+
|
|
52
|
+
# Translations
|
|
53
|
+
*.mo
|
|
54
|
+
*.pot
|
|
55
|
+
|
|
56
|
+
# Django stuff:
|
|
57
|
+
*.log
|
|
58
|
+
local_settings.py
|
|
59
|
+
db.sqlite3
|
|
60
|
+
db.sqlite3-journal
|
|
61
|
+
|
|
62
|
+
# Flask stuff:
|
|
63
|
+
instance/
|
|
64
|
+
.webassets-cache
|
|
65
|
+
|
|
66
|
+
# Scrapy stuff:
|
|
67
|
+
.scrapy
|
|
68
|
+
|
|
69
|
+
# Sphinx documentation
|
|
70
|
+
docs/_build/
|
|
71
|
+
|
|
72
|
+
# PyBuilder
|
|
73
|
+
.pybuilder/
|
|
74
|
+
target/
|
|
75
|
+
|
|
76
|
+
# Jupyter Notebook
|
|
77
|
+
.ipynb_checkpoints
|
|
78
|
+
|
|
79
|
+
# IPython
|
|
80
|
+
profile_default/
|
|
81
|
+
ipython_config.py
|
|
82
|
+
|
|
83
|
+
# pyenv
|
|
84
|
+
.python-version
|
|
85
|
+
|
|
86
|
+
# pipenv
|
|
87
|
+
Pipfile.lock
|
|
88
|
+
|
|
89
|
+
# poetry
|
|
90
|
+
poetry.lock
|
|
91
|
+
|
|
92
|
+
# pdm
|
|
93
|
+
.pdm.toml
|
|
94
|
+
.pdm-python
|
|
95
|
+
.pdm-build/
|
|
96
|
+
|
|
97
|
+
# PEP 582
|
|
98
|
+
__pypackages__/
|
|
99
|
+
|
|
100
|
+
# Celery stuff
|
|
101
|
+
celerybeat-schedule
|
|
102
|
+
celerybeat.pid
|
|
103
|
+
|
|
104
|
+
# SageMath parsed files
|
|
105
|
+
*.sage.py
|
|
106
|
+
|
|
107
|
+
# Environments
|
|
108
|
+
.env
|
|
109
|
+
.venv
|
|
110
|
+
env/
|
|
111
|
+
venv/
|
|
112
|
+
ENV/
|
|
113
|
+
env.bak/
|
|
114
|
+
venv.bak/
|
|
115
|
+
|
|
116
|
+
# Spyder project settings
|
|
117
|
+
.spyderproject
|
|
118
|
+
.spyproject
|
|
119
|
+
|
|
120
|
+
# Rope project settings
|
|
121
|
+
.ropeproject
|
|
122
|
+
|
|
123
|
+
# mkdocs documentation
|
|
124
|
+
/site
|
|
125
|
+
|
|
126
|
+
# mypy
|
|
127
|
+
.mypy_cache/
|
|
128
|
+
.dmypy.json
|
|
129
|
+
dmypy.json
|
|
130
|
+
|
|
131
|
+
# Pyre type checker
|
|
132
|
+
.pyre/
|
|
133
|
+
|
|
134
|
+
# pytype static type analyzer
|
|
135
|
+
.pytype/
|
|
136
|
+
|
|
137
|
+
# Cython debug symbols
|
|
138
|
+
cython_debug/
|
|
139
|
+
|
|
140
|
+
# Ruff
|
|
141
|
+
.ruff_cache/
|
|
142
|
+
|
|
143
|
+
# IDEs and editors
|
|
144
|
+
.idea/
|
|
145
|
+
.vscode/
|
|
146
|
+
*.swp
|
|
147
|
+
*.swo
|
|
148
|
+
*~
|
|
149
|
+
|
|
150
|
+
# OS files
|
|
151
|
+
.DS_Store
|
|
152
|
+
Thumbs.db
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: neurocore-skill-math
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Math-proof toolchain skills for NeuroCore: CAS, SMT, ATP, and formal proof assistants
|
|
5
|
+
Author: NeuroCore Contributors
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Keywords: ai,lean,math,neurocore,proof,skill,sympy,theorem-proving,z3
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.13
|
|
17
|
+
Requires-Dist: mpmath>=1.3
|
|
18
|
+
Requires-Dist: neurocore-ai>=0.4.0
|
|
19
|
+
Requires-Dist: sympy>=1.12
|
|
20
|
+
Requires-Dist: z3-solver>=4.12
|
|
21
|
+
Provides-Extra: cvc5
|
|
22
|
+
Requires-Dist: cvc5>=1.1; extra == 'cvc5'
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=9.0.2; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
28
|
+
Provides-Extra: numeric
|
|
29
|
+
Requires-Dist: networkx>=3.0; extra == 'numeric'
|
|
30
|
+
Requires-Dist: numpy>=1.26; extra == 'numeric'
|
|
31
|
+
Requires-Dist: scipy>=1.11; extra == 'numeric'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# neurocore-skill-math
|
|
35
|
+
|
|
36
|
+
A **math-proof toolchain** for [NeuroCore](https://github.com/alexh-scrt/neurocore):
|
|
37
|
+
one package exposing many small, composable skills that wrap computer-algebra
|
|
38
|
+
systems, SMT solvers, automated theorem provers, and formal proof assistants. A
|
|
39
|
+
supervisor agent / FlowEngine blueprint composes them into proof-search and
|
|
40
|
+
proof-validation chains.
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install neurocore-skill-math # pulls sympy, mpmath, z3-solver
|
|
44
|
+
pip install "neurocore-skill-math[cvc5]" # + cvc5 SMT backend (optional)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Most external engines (Lean, Vampire, Isabelle, Coq, SageMath, GAP, PARI/GP, …)
|
|
48
|
+
are **not** Python packages — install them with the provided script
|
|
49
|
+
(`scripts/install_math_tools.sh`, Ubuntu 24.04) and check what's available:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
python -m neurocore_skill_math.check # prints a tool-availability report
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Every skill **detects whether its backend is installed** and degrades gracefully
|
|
56
|
+
(status `tool_unavailable`) rather than crashing a flow.
|
|
57
|
+
|
|
58
|
+
## Skills
|
|
59
|
+
|
|
60
|
+
| `type` | Group | Backend | Reads → writes |
|
|
61
|
+
|--------|-------|---------|----------------|
|
|
62
|
+
| `math_problem_parser` | prep | LLM | `problem` → `math.parsed` |
|
|
63
|
+
| `math_domain_classifier` | prep | LLM | `math.parsed` → `math.domain` |
|
|
64
|
+
| `math_statement_normalizer` | prep | LLM | `math.parsed` → `math.normalized` |
|
|
65
|
+
| `sympy_simplify` / `sympy_solve` / `sympy_calculus` | symbolic | SymPy | `math.normalized` → `evidence.sympy` |
|
|
66
|
+
| `mpmath_high_precision_check` | numeric | mpmath | `math.normalized` → `evidence.numeric` |
|
|
67
|
+
| `pari_gp_number_theory` | CAS | `gp` | `math.normalized` → `evidence.pari` |
|
|
68
|
+
| `gap_group_theory` | CAS | `gap` | `math.normalized` → `evidence.gap` |
|
|
69
|
+
| `sagemath_compute` | CAS | `sage`/Docker | `math.normalized` → `evidence.sage` |
|
|
70
|
+
| `z3_smt_check` | SMT | z3 | `math.normalized` → `counterexamples.z3` |
|
|
71
|
+
| `cvc5_smt_check` | SMT | cvc5 | `math.normalized` → `counterexamples.cvc5` |
|
|
72
|
+
| `mace4_countermodel` | counterexample | `mace4` | `math.normalized` → `counterexamples.mace4` |
|
|
73
|
+
| `vampire_prove_tptp` / `eprover_prove_tptp` | ATP | `vampire`/`eprover` | `math.normalized` → `proof.*` |
|
|
74
|
+
| `prover9_prove` | ATP | `prover9` | `math.normalized` → `proof.prover9` |
|
|
75
|
+
| `llm_proof_planner` | planning | LLM | evidence → `proof.strategy` |
|
|
76
|
+
| `theorem_retriever` | planning | LLM | `proof.strategy` → `proof.premises` |
|
|
77
|
+
| `lean4_formalize_statement` | formal | LLM | `math.normalized` → `formal.lean_candidate` |
|
|
78
|
+
| `lean4_check` | formal | `lean`/`lake` | `formal.lean_candidate` → `formal.lean_result` |
|
|
79
|
+
| `lean4_repair` | formal | LLM | `formal.lean_candidate` + errors → `formal.lean_candidate` |
|
|
80
|
+
| `isabelle_check_theory` | formal | `isabelle` | `formal.isabelle_candidate` → `formal.isabelle_result` |
|
|
81
|
+
| `coq_check` | formal | `coqc` | `formal.coq_candidate` → `formal.coq_result` |
|
|
82
|
+
| `proof_report_builder` | report | — | all envelopes → `validation_status` / `final_answer` / `proof_artifacts` |
|
|
83
|
+
|
|
84
|
+
### Result envelope & ports
|
|
85
|
+
|
|
86
|
+
Each skill writes a uniform envelope:
|
|
87
|
+
`{status, tool, available, result, log, error, duration_ms}` with
|
|
88
|
+
`status ∈ {ok, proved, refuted, unknown, tool_unavailable, error, timeout}`.
|
|
89
|
+
|
|
90
|
+
Skills set **output ports** so graph blueprints can route:
|
|
91
|
+
- SMT / Mace4: `counterexample_found` / `no_counterexample`
|
|
92
|
+
- ATP: `proof_found` / `no_proof`
|
|
93
|
+
- Lean/Isabelle/Coq check: `verified` / `repair_needed` / `failed`
|
|
94
|
+
- domain classifier: `number_theory` / `group_theory` / …
|
|
95
|
+
|
|
96
|
+
All skills take configurable `input_key` / `output_key` (the doc's dotted-key
|
|
97
|
+
contract, e.g. `evidence.sympy`), so the same skill can be wired into different
|
|
98
|
+
positions in a chain.
|
|
99
|
+
|
|
100
|
+
## Blueprints
|
|
101
|
+
|
|
102
|
+
`blueprints/` ships two reference proof workers from the design:
|
|
103
|
+
- `lean-first-math-worker.flow.yaml` — a focused parse → explore → refute →
|
|
104
|
+
formalize → verify → repair → report loop.
|
|
105
|
+
- `math-proof-validation-worker.flow.yaml` — the full fan-out across CAS/SMT/ATP
|
|
106
|
+
then Lean/Isabelle/Coq validation.
|
|
107
|
+
|
|
108
|
+
> **Graph routing.** These workers use edge **ports**, edge **conditions**, and a
|
|
109
|
+
> Lean **repair loop**. With `neurocore-ai>=0.4.0` (on `flowengine>=0.6.0`),
|
|
110
|
+
> NeuroCore routes such graph flows through flowengine's `GraphExecutor`, which
|
|
111
|
+
> honors port/condition gating and cyclic `max_iterations` — so the conditional
|
|
112
|
+
> early-exits and the repair loop execute as drawn. Plain DAGs (no
|
|
113
|
+
> ports/conditions/cycles) still use the concurrent layer executor. On older
|
|
114
|
+
> NeuroCore the skills' ports are simply ignored (all reachable nodes run).
|
|
115
|
+
|
|
116
|
+
## Convention
|
|
117
|
+
|
|
118
|
+
Standard NeuroCore skill package: entry-point group `neurocore.skills`, import
|
|
119
|
+
package `neurocore_skill_math`, kebab distribution `neurocore-skill-math`. See the
|
|
120
|
+
[skill-authoring guide](https://neurocore.readthedocs.io/skill-authoring.html).
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# neurocore-skill-math
|
|
2
|
+
|
|
3
|
+
A **math-proof toolchain** for [NeuroCore](https://github.com/alexh-scrt/neurocore):
|
|
4
|
+
one package exposing many small, composable skills that wrap computer-algebra
|
|
5
|
+
systems, SMT solvers, automated theorem provers, and formal proof assistants. A
|
|
6
|
+
supervisor agent / FlowEngine blueprint composes them into proof-search and
|
|
7
|
+
proof-validation chains.
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install neurocore-skill-math # pulls sympy, mpmath, z3-solver
|
|
11
|
+
pip install "neurocore-skill-math[cvc5]" # + cvc5 SMT backend (optional)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Most external engines (Lean, Vampire, Isabelle, Coq, SageMath, GAP, PARI/GP, …)
|
|
15
|
+
are **not** Python packages — install them with the provided script
|
|
16
|
+
(`scripts/install_math_tools.sh`, Ubuntu 24.04) and check what's available:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
python -m neurocore_skill_math.check # prints a tool-availability report
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Every skill **detects whether its backend is installed** and degrades gracefully
|
|
23
|
+
(status `tool_unavailable`) rather than crashing a flow.
|
|
24
|
+
|
|
25
|
+
## Skills
|
|
26
|
+
|
|
27
|
+
| `type` | Group | Backend | Reads → writes |
|
|
28
|
+
|--------|-------|---------|----------------|
|
|
29
|
+
| `math_problem_parser` | prep | LLM | `problem` → `math.parsed` |
|
|
30
|
+
| `math_domain_classifier` | prep | LLM | `math.parsed` → `math.domain` |
|
|
31
|
+
| `math_statement_normalizer` | prep | LLM | `math.parsed` → `math.normalized` |
|
|
32
|
+
| `sympy_simplify` / `sympy_solve` / `sympy_calculus` | symbolic | SymPy | `math.normalized` → `evidence.sympy` |
|
|
33
|
+
| `mpmath_high_precision_check` | numeric | mpmath | `math.normalized` → `evidence.numeric` |
|
|
34
|
+
| `pari_gp_number_theory` | CAS | `gp` | `math.normalized` → `evidence.pari` |
|
|
35
|
+
| `gap_group_theory` | CAS | `gap` | `math.normalized` → `evidence.gap` |
|
|
36
|
+
| `sagemath_compute` | CAS | `sage`/Docker | `math.normalized` → `evidence.sage` |
|
|
37
|
+
| `z3_smt_check` | SMT | z3 | `math.normalized` → `counterexamples.z3` |
|
|
38
|
+
| `cvc5_smt_check` | SMT | cvc5 | `math.normalized` → `counterexamples.cvc5` |
|
|
39
|
+
| `mace4_countermodel` | counterexample | `mace4` | `math.normalized` → `counterexamples.mace4` |
|
|
40
|
+
| `vampire_prove_tptp` / `eprover_prove_tptp` | ATP | `vampire`/`eprover` | `math.normalized` → `proof.*` |
|
|
41
|
+
| `prover9_prove` | ATP | `prover9` | `math.normalized` → `proof.prover9` |
|
|
42
|
+
| `llm_proof_planner` | planning | LLM | evidence → `proof.strategy` |
|
|
43
|
+
| `theorem_retriever` | planning | LLM | `proof.strategy` → `proof.premises` |
|
|
44
|
+
| `lean4_formalize_statement` | formal | LLM | `math.normalized` → `formal.lean_candidate` |
|
|
45
|
+
| `lean4_check` | formal | `lean`/`lake` | `formal.lean_candidate` → `formal.lean_result` |
|
|
46
|
+
| `lean4_repair` | formal | LLM | `formal.lean_candidate` + errors → `formal.lean_candidate` |
|
|
47
|
+
| `isabelle_check_theory` | formal | `isabelle` | `formal.isabelle_candidate` → `formal.isabelle_result` |
|
|
48
|
+
| `coq_check` | formal | `coqc` | `formal.coq_candidate` → `formal.coq_result` |
|
|
49
|
+
| `proof_report_builder` | report | — | all envelopes → `validation_status` / `final_answer` / `proof_artifacts` |
|
|
50
|
+
|
|
51
|
+
### Result envelope & ports
|
|
52
|
+
|
|
53
|
+
Each skill writes a uniform envelope:
|
|
54
|
+
`{status, tool, available, result, log, error, duration_ms}` with
|
|
55
|
+
`status ∈ {ok, proved, refuted, unknown, tool_unavailable, error, timeout}`.
|
|
56
|
+
|
|
57
|
+
Skills set **output ports** so graph blueprints can route:
|
|
58
|
+
- SMT / Mace4: `counterexample_found` / `no_counterexample`
|
|
59
|
+
- ATP: `proof_found` / `no_proof`
|
|
60
|
+
- Lean/Isabelle/Coq check: `verified` / `repair_needed` / `failed`
|
|
61
|
+
- domain classifier: `number_theory` / `group_theory` / …
|
|
62
|
+
|
|
63
|
+
All skills take configurable `input_key` / `output_key` (the doc's dotted-key
|
|
64
|
+
contract, e.g. `evidence.sympy`), so the same skill can be wired into different
|
|
65
|
+
positions in a chain.
|
|
66
|
+
|
|
67
|
+
## Blueprints
|
|
68
|
+
|
|
69
|
+
`blueprints/` ships two reference proof workers from the design:
|
|
70
|
+
- `lean-first-math-worker.flow.yaml` — a focused parse → explore → refute →
|
|
71
|
+
formalize → verify → repair → report loop.
|
|
72
|
+
- `math-proof-validation-worker.flow.yaml` — the full fan-out across CAS/SMT/ATP
|
|
73
|
+
then Lean/Isabelle/Coq validation.
|
|
74
|
+
|
|
75
|
+
> **Graph routing.** These workers use edge **ports**, edge **conditions**, and a
|
|
76
|
+
> Lean **repair loop**. With `neurocore-ai>=0.4.0` (on `flowengine>=0.6.0`),
|
|
77
|
+
> NeuroCore routes such graph flows through flowengine's `GraphExecutor`, which
|
|
78
|
+
> honors port/condition gating and cyclic `max_iterations` — so the conditional
|
|
79
|
+
> early-exits and the repair loop execute as drawn. Plain DAGs (no
|
|
80
|
+
> ports/conditions/cycles) still use the concurrent layer executor. On older
|
|
81
|
+
> NeuroCore the skills' ports are simply ignored (all reachable nodes run).
|
|
82
|
+
|
|
83
|
+
## Convention
|
|
84
|
+
|
|
85
|
+
Standard NeuroCore skill package: entry-point group `neurocore.skills`, import
|
|
86
|
+
package `neurocore_skill_math`, kebab distribution `neurocore-skill-math`. See the
|
|
87
|
+
[skill-authoring guide](https://neurocore.readthedocs.io/skill-authoring.html).
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# lean-first-math-worker — a focused proof-validation loop (design doc §6).
|
|
2
|
+
#
|
|
3
|
+
# parse → (sympy explore + z3 counterexample) → plan → retrieve premises
|
|
4
|
+
# → formalize in Lean → check → (repair loop) → report
|
|
5
|
+
#
|
|
6
|
+
# Run:
|
|
7
|
+
# neurocore run blueprints/lean-first-math-worker.flow.yaml \
|
|
8
|
+
# --data problem="Every integer n > 1 has a prime divisor."
|
|
9
|
+
#
|
|
10
|
+
# ROUTING: with neurocore-ai>=0.4.0 (flowengine>=0.6.0), graph flows that use
|
|
11
|
+
# edge ports/conditions or cycles route through flowengine's GraphExecutor, so the
|
|
12
|
+
# z3 counterexample early-exit and the lean check→repair loop execute as drawn.
|
|
13
|
+
# LLM skills require an `llm:` provider in neurocore.yaml; unavailable external
|
|
14
|
+
# tools degrade to status=tool_unavailable (run `python -m neurocore_skill_math.check`).
|
|
15
|
+
name: lean-first-math-worker
|
|
16
|
+
version: "0.1.0"
|
|
17
|
+
description: >
|
|
18
|
+
Parse a problem, explore with SymPy, search for a counterexample with Z3, plan a
|
|
19
|
+
proof, retrieve premises, formalize and check in Lean 4 (with a repair loop), and
|
|
20
|
+
emit a proof report.
|
|
21
|
+
components:
|
|
22
|
+
- name: parse
|
|
23
|
+
type: math_problem_parser
|
|
24
|
+
config: {input_key: problem, output_key: math.parsed}
|
|
25
|
+
- name: sympy
|
|
26
|
+
type: sympy_simplify
|
|
27
|
+
config: {input_key: math.parsed, output_key: evidence.sympy, timeout_seconds: 15}
|
|
28
|
+
- name: z3
|
|
29
|
+
type: z3_smt_check
|
|
30
|
+
config: {input_key: math.parsed, output_key: counterexamples.z3, timeout_seconds: 20}
|
|
31
|
+
- name: planner
|
|
32
|
+
type: llm_proof_planner
|
|
33
|
+
config: {input_key: math.parsed, output_key: proof.strategy}
|
|
34
|
+
- name: retrieve
|
|
35
|
+
type: theorem_retriever
|
|
36
|
+
config: {input_key: proof.strategy, output_key: proof.premises, backends: [lean_mathlib]}
|
|
37
|
+
- name: formalize
|
|
38
|
+
type: lean4_formalize_statement
|
|
39
|
+
config:
|
|
40
|
+
input_key: math.parsed
|
|
41
|
+
strategy_key: proof.strategy
|
|
42
|
+
premises_key: proof.premises
|
|
43
|
+
output_key: formal.lean_candidate
|
|
44
|
+
- name: check
|
|
45
|
+
type: lean4_check
|
|
46
|
+
config:
|
|
47
|
+
input_key: formal.lean_candidate
|
|
48
|
+
output_key: formal.lean_result
|
|
49
|
+
project_root: ./lean/MathAgent
|
|
50
|
+
timeout_seconds: 120
|
|
51
|
+
- name: repair
|
|
52
|
+
type: lean4_repair
|
|
53
|
+
config:
|
|
54
|
+
candidate_key: formal.lean_candidate
|
|
55
|
+
error_key: formal.lean_result
|
|
56
|
+
output_key: formal.lean_candidate
|
|
57
|
+
max_repair_attempts: 3
|
|
58
|
+
- name: report
|
|
59
|
+
type: proof_report_builder
|
|
60
|
+
config: {input_key: math.parsed}
|
|
61
|
+
flow:
|
|
62
|
+
type: graph
|
|
63
|
+
settings:
|
|
64
|
+
max_iterations: 3
|
|
65
|
+
on_max_iterations: exit
|
|
66
|
+
timeout_seconds: 300
|
|
67
|
+
fail_fast: false
|
|
68
|
+
nodes:
|
|
69
|
+
- {id: parse, component: parse}
|
|
70
|
+
- {id: sympy, component: sympy}
|
|
71
|
+
- {id: z3, component: z3}
|
|
72
|
+
- {id: planner, component: planner}
|
|
73
|
+
- {id: retrieve, component: retrieve}
|
|
74
|
+
- {id: formalize, component: formalize}
|
|
75
|
+
- {id: check, component: check}
|
|
76
|
+
- {id: repair, component: repair}
|
|
77
|
+
- {id: report, component: report}
|
|
78
|
+
edges:
|
|
79
|
+
- {source: parse, target: sympy}
|
|
80
|
+
- {source: parse, target: z3}
|
|
81
|
+
- {source: z3, target: report, port: counterexample_found}
|
|
82
|
+
- {source: sympy, target: planner}
|
|
83
|
+
- {source: z3, target: planner, port: no_counterexample}
|
|
84
|
+
- {source: planner, target: retrieve}
|
|
85
|
+
- {source: retrieve, target: formalize}
|
|
86
|
+
- {source: formalize, target: check}
|
|
87
|
+
- {source: check, target: report, port: verified}
|
|
88
|
+
- {source: check, target: repair, port: repair_needed}
|
|
89
|
+
- {source: repair, target: check, port: repaired}
|
|
90
|
+
- {source: check, target: report, port: failed}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# math-proof-validation-worker — the full proof toolchain (design doc §5).
|
|
2
|
+
#
|
|
3
|
+
# parse → classify → normalize
|
|
4
|
+
# → fan-out: sympy / sage / pari / gap / numeric / z3 / cvc5 / mace4
|
|
5
|
+
# → plan → retrieve premises
|
|
6
|
+
# → ATP: vampire / eprover / prover9
|
|
7
|
+
# → formalize in Lean → check (+ isabelle/coq checks) → repair loop
|
|
8
|
+
# → report
|
|
9
|
+
#
|
|
10
|
+
# Run:
|
|
11
|
+
# neurocore run blueprints/math-proof-validation-worker.flow.yaml \
|
|
12
|
+
# --data problem="<your statement>" --project-root <project-with-llm-config>
|
|
13
|
+
#
|
|
14
|
+
# ROUTING: with neurocore-ai>=0.4.0 (flowengine>=0.6.0), this graph routes through
|
|
15
|
+
# flowengine's GraphExecutor, which honors edge ports/conditions and cyclic
|
|
16
|
+
# max_iterations — so the early counterexample exits and the Lean repair loop
|
|
17
|
+
# execute as drawn. Heavy backends degrade to status=tool_unavailable when not
|
|
18
|
+
# installed (run `python -m neurocore_skill_math.check`). LLM skills need an
|
|
19
|
+
# `llm:` provider configured in neurocore.yaml.
|
|
20
|
+
name: math-proof-validation-worker
|
|
21
|
+
version: "0.1.0"
|
|
22
|
+
description: >
|
|
23
|
+
Explore, search for counterexamples, attempt proof search, formalize candidate
|
|
24
|
+
proofs, validate them with proof assistants, and emit a proof report.
|
|
25
|
+
components:
|
|
26
|
+
- {name: parse, type: math_problem_parser, config: {input_key: problem, output_key: math.parsed}}
|
|
27
|
+
- {name: classify, type: math_domain_classifier, config: {input_key: math.parsed, output_key: math.domain}}
|
|
28
|
+
- {name: normalize, type: math_statement_normalizer, config: {input_key: math.parsed, domain_key: math.domain, output_key: math.normalized}}
|
|
29
|
+
- {name: sympy, type: sympy_simplify, config: {input_key: math.normalized, output_key: evidence.sympy, timeout_seconds: 15}}
|
|
30
|
+
- {name: sage, type: sagemath_compute, config: {input_key: math.normalized, output_key: evidence.sage, timeout_seconds: 60}}
|
|
31
|
+
- {name: pari, type: pari_gp_number_theory, config: {input_key: math.normalized, output_key: evidence.pari, timeout_seconds: 20}}
|
|
32
|
+
- {name: gap, type: gap_group_theory, config: {input_key: math.normalized, output_key: evidence.gap, timeout_seconds: 20}}
|
|
33
|
+
- {name: numeric, type: mpmath_high_precision_check, config: {input_key: math.normalized, output_key: evidence.numeric, precision_digits: 100, timeout_seconds: 20}}
|
|
34
|
+
- {name: z3, type: z3_smt_check, config: {input_key: math.normalized, output_key: counterexamples.z3, timeout_seconds: 20}}
|
|
35
|
+
- {name: cvc5, type: cvc5_smt_check, config: {input_key: math.normalized, output_key: counterexamples.cvc5, timeout_seconds: 20}}
|
|
36
|
+
- {name: mace4, type: mace4_countermodel, config: {input_key: math.normalized, output_key: counterexamples.mace4, timeout_seconds: 30, max_domain_size: 8}}
|
|
37
|
+
- {name: plan, type: llm_proof_planner, config: {input_key: math.normalized, output_key: proof.strategy}}
|
|
38
|
+
- {name: retrieve, type: theorem_retriever, config: {input_key: proof.strategy, output_key: proof.premises, backends: [lean_mathlib, isabelle_afp]}}
|
|
39
|
+
- {name: vampire, type: vampire_prove_tptp, config: {input_key: math.normalized, premises_key: proof.premises, output_key: proof.vampire, timeout_seconds: 30}}
|
|
40
|
+
- {name: eprover, type: eprover_prove_tptp, config: {input_key: math.normalized, premises_key: proof.premises, output_key: proof.eprover, timeout_seconds: 30}}
|
|
41
|
+
- {name: prover9, type: prover9_prove, config: {input_key: math.normalized, premises_key: proof.premises, output_key: proof.prover9, timeout_seconds: 30}}
|
|
42
|
+
- {name: lean_formalize, type: lean4_formalize_statement, config: {input_key: math.normalized, strategy_key: proof.strategy, premises_key: proof.premises, output_key: formal.lean_candidate}}
|
|
43
|
+
- {name: lean_check, type: lean4_check, config: {input_key: formal.lean_candidate, output_key: formal.lean_result, project_root: ./lean/MathAgent, timeout_seconds: 120}}
|
|
44
|
+
- {name: lean_repair, type: lean4_repair, config: {candidate_key: formal.lean_candidate, error_key: formal.lean_result, output_key: formal.lean_candidate, max_repair_attempts: 3}}
|
|
45
|
+
- {name: isabelle_check, type: isabelle_check_theory, config: {input_key: formal.isabelle_candidate, output_key: formal.isabelle_result, timeout_seconds: 180}}
|
|
46
|
+
- {name: coq_check, type: coq_check, config: {input_key: formal.coq_candidate, output_key: formal.coq_result, timeout_seconds: 180}}
|
|
47
|
+
- {name: report, type: proof_report_builder, config: {input_key: math.normalized}}
|
|
48
|
+
flow:
|
|
49
|
+
type: graph
|
|
50
|
+
settings:
|
|
51
|
+
max_iterations: 4
|
|
52
|
+
on_max_iterations: exit
|
|
53
|
+
timeout_seconds: 600
|
|
54
|
+
fail_fast: false
|
|
55
|
+
nodes:
|
|
56
|
+
- {id: parse, component: parse}
|
|
57
|
+
- {id: classify, component: classify}
|
|
58
|
+
- {id: normalize, component: normalize}
|
|
59
|
+
- {id: sympy, component: sympy}
|
|
60
|
+
- {id: sage, component: sage}
|
|
61
|
+
- {id: pari, component: pari}
|
|
62
|
+
- {id: gap, component: gap}
|
|
63
|
+
- {id: numeric, component: numeric}
|
|
64
|
+
- {id: z3, component: z3}
|
|
65
|
+
- {id: cvc5, component: cvc5}
|
|
66
|
+
- {id: mace4, component: mace4}
|
|
67
|
+
- {id: plan, component: plan}
|
|
68
|
+
- {id: retrieve, component: retrieve}
|
|
69
|
+
- {id: vampire, component: vampire}
|
|
70
|
+
- {id: eprover, component: eprover}
|
|
71
|
+
- {id: prover9, component: prover9}
|
|
72
|
+
- {id: lean_formalize, component: lean_formalize}
|
|
73
|
+
- {id: lean_check, component: lean_check}
|
|
74
|
+
- {id: lean_repair, component: lean_repair}
|
|
75
|
+
- {id: isabelle_check, component: isabelle_check}
|
|
76
|
+
- {id: coq_check, component: coq_check}
|
|
77
|
+
- {id: report, component: report}
|
|
78
|
+
edges:
|
|
79
|
+
- {source: parse, target: classify}
|
|
80
|
+
- {source: classify, target: normalize}
|
|
81
|
+
# Exploration + counterexample fan-out.
|
|
82
|
+
- {source: normalize, target: sympy}
|
|
83
|
+
- {source: normalize, target: sage}
|
|
84
|
+
- {source: normalize, target: pari, port: number_theory}
|
|
85
|
+
- {source: normalize, target: gap, port: group_theory}
|
|
86
|
+
- {source: normalize, target: numeric}
|
|
87
|
+
- {source: normalize, target: z3}
|
|
88
|
+
- {source: normalize, target: cvc5}
|
|
89
|
+
- {source: normalize, target: mace4}
|
|
90
|
+
# Early refutation.
|
|
91
|
+
- {source: z3, target: report, port: counterexample_found}
|
|
92
|
+
- {source: cvc5, target: report, port: counterexample_found}
|
|
93
|
+
- {source: mace4, target: report, port: counterexample_found}
|
|
94
|
+
# Otherwise plan + retrieve.
|
|
95
|
+
- {source: sympy, target: plan}
|
|
96
|
+
- {source: numeric, target: plan}
|
|
97
|
+
- {source: z3, target: plan, port: no_counterexample}
|
|
98
|
+
- {source: plan, target: retrieve}
|
|
99
|
+
# ATP attempts.
|
|
100
|
+
- {source: retrieve, target: vampire}
|
|
101
|
+
- {source: retrieve, target: eprover}
|
|
102
|
+
- {source: retrieve, target: prover9}
|
|
103
|
+
# Formalization + validation.
|
|
104
|
+
- {source: vampire, target: lean_formalize, port: proof_found}
|
|
105
|
+
- {source: eprover, target: lean_formalize, port: proof_found}
|
|
106
|
+
- {source: prover9, target: lean_formalize, port: proof_found}
|
|
107
|
+
- {source: retrieve, target: lean_formalize}
|
|
108
|
+
- {source: lean_formalize, target: lean_check}
|
|
109
|
+
- {source: lean_formalize, target: isabelle_check}
|
|
110
|
+
- {source: lean_formalize, target: coq_check}
|
|
111
|
+
- {source: lean_check, target: report, port: verified}
|
|
112
|
+
- {source: lean_check, target: lean_repair, port: repair_needed}
|
|
113
|
+
- {source: lean_repair, target: lean_check, port: repaired}
|
|
114
|
+
- {source: lean_check, target: report, port: failed}
|
|
115
|
+
- {source: isabelle_check, target: report}
|
|
116
|
+
- {source: coq_check, target: report}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "neurocore-skill-math"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Math-proof toolchain skills for NeuroCore: CAS, SMT, ATP, and formal proof assistants"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.13"
|
|
12
|
+
authors = [{ name = "NeuroCore Contributors" }]
|
|
13
|
+
keywords = ["neurocore", "skill", "math", "proof", "sympy", "z3", "lean", "theorem-proving", "ai"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: Apache Software License",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
"Typing :: Typed",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"neurocore-ai>=0.4.0",
|
|
26
|
+
"sympy>=1.12",
|
|
27
|
+
"mpmath>=1.3",
|
|
28
|
+
"z3-solver>=4.12",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
# Optional backends; the matching skills detect availability and degrade gracefully.
|
|
33
|
+
cvc5 = ["cvc5>=1.1"]
|
|
34
|
+
numeric = ["numpy>=1.26", "scipy>=1.11", "networkx>=3.0"]
|
|
35
|
+
dev = ["pytest>=9.0.2", "pytest-asyncio>=0.24", "ruff>=0.8", "mypy>=1.8"]
|
|
36
|
+
|
|
37
|
+
[project.entry-points."neurocore.skills"]
|
|
38
|
+
# Group 1 — problem preparation (LLM)
|
|
39
|
+
math_problem_parser = "neurocore_skill_math:MathProblemParserSkill"
|
|
40
|
+
math_domain_classifier = "neurocore_skill_math:MathDomainClassifierSkill"
|
|
41
|
+
math_statement_normalizer = "neurocore_skill_math:MathStatementNormalizerSkill"
|
|
42
|
+
# Group 2 — symbolic / numeric / CAS
|
|
43
|
+
sympy_simplify = "neurocore_skill_math:SympySimplifySkill"
|
|
44
|
+
sympy_solve = "neurocore_skill_math:SympySolveSkill"
|
|
45
|
+
sympy_calculus = "neurocore_skill_math:SympyCalculusSkill"
|
|
46
|
+
mpmath_high_precision_check = "neurocore_skill_math:MpmathHighPrecisionCheckSkill"
|
|
47
|
+
sagemath_compute = "neurocore_skill_math:SagemathComputeSkill"
|
|
48
|
+
pari_gp_number_theory = "neurocore_skill_math:PariGpNumberTheorySkill"
|
|
49
|
+
gap_group_theory = "neurocore_skill_math:GapGroupTheorySkill"
|
|
50
|
+
# Group 3 — counterexample / SMT
|
|
51
|
+
z3_smt_check = "neurocore_skill_math:Z3SmtCheckSkill"
|
|
52
|
+
cvc5_smt_check = "neurocore_skill_math:Cvc5SmtCheckSkill"
|
|
53
|
+
mace4_countermodel = "neurocore_skill_math:Mace4CountermodelSkill"
|
|
54
|
+
# Group 4 — automated proof search (ATP)
|
|
55
|
+
vampire_prove_tptp = "neurocore_skill_math:VampireProveTptpSkill"
|
|
56
|
+
eprover_prove_tptp = "neurocore_skill_math:EproverProveTptpSkill"
|
|
57
|
+
prover9_prove = "neurocore_skill_math:Prover9ProveSkill"
|
|
58
|
+
# Group 4/5 — planning, retrieval (LLM)
|
|
59
|
+
llm_proof_planner = "neurocore_skill_math:LlmProofPlannerSkill"
|
|
60
|
+
theorem_retriever = "neurocore_skill_math:TheoremRetrieverSkill"
|
|
61
|
+
# Group 5 — formalization (Lean / Isabelle / Coq)
|
|
62
|
+
lean4_formalize_statement = "neurocore_skill_math:Lean4FormalizeStatementSkill"
|
|
63
|
+
lean4_check = "neurocore_skill_math:Lean4CheckSkill"
|
|
64
|
+
lean4_repair = "neurocore_skill_math:Lean4RepairSkill"
|
|
65
|
+
isabelle_check_theory = "neurocore_skill_math:IsabelleCheckTheorySkill"
|
|
66
|
+
coq_check = "neurocore_skill_math:CoqCheckSkill"
|
|
67
|
+
# Group 6 — reporting
|
|
68
|
+
proof_report_builder = "neurocore_skill_math:ProofReportBuilderSkill"
|
|
69
|
+
|
|
70
|
+
[tool.hatch.build.targets.wheel]
|
|
71
|
+
packages = ["src/neurocore_skill_math"]
|
|
72
|
+
|
|
73
|
+
[tool.hatch.build.targets.sdist]
|
|
74
|
+
include = ["src/neurocore_skill_math/", "tests/", "blueprints/", "scripts/", "README.md"]
|
|
75
|
+
|
|
76
|
+
[tool.pytest.ini_options]
|
|
77
|
+
testpaths = ["tests"]
|
|
78
|
+
pythonpath = ["src"]
|
|
79
|
+
asyncio_mode = "auto"
|
|
80
|
+
|
|
81
|
+
[tool.ruff]
|
|
82
|
+
target-version = "py313"
|
|
83
|
+
line-length = 100
|
|
84
|
+
src = ["src", "tests"]
|
|
85
|
+
|
|
86
|
+
[tool.ruff.lint]
|
|
87
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "C4", "SIM"]
|
|
88
|
+
ignore = ["E501"]
|
|
89
|
+
|
|
90
|
+
[tool.ruff.lint.per-file-ignores]
|
|
91
|
+
# Compact one-line setup (`ctx = FlowContext(); ctx.set(...)`) is fine in tests.
|
|
92
|
+
"tests/*" = ["E702"]
|
|
93
|
+
|
|
94
|
+
[tool.mypy]
|
|
95
|
+
python_version = "3.13"
|
|
96
|
+
# Math backends (sympy/mpmath/z3/cvc5) ship no type stubs.
|
|
97
|
+
ignore_missing_imports = true
|
|
98
|
+
disable_error_code = ["import-untyped"]
|