mathlas-mcp 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. mathlas_mcp-1.0.0/LICENSE +75 -0
  2. mathlas_mcp-1.0.0/PKG-INFO +287 -0
  3. mathlas_mcp-1.0.0/README.md +169 -0
  4. mathlas_mcp-1.0.0/mathlas/__init__.py +94 -0
  5. mathlas_mcp-1.0.0/mathlas/cli.py +236 -0
  6. mathlas_mcp-1.0.0/mathlas/embed.py +129 -0
  7. mathlas_mcp-1.0.0/mathlas/engine.py +95 -0
  8. mathlas_mcp-1.0.0/mathlas/funsearch.py +521 -0
  9. mathlas_mcp-1.0.0/mathlas/identify.py +51 -0
  10. mathlas_mcp-1.0.0/mathlas/llm.py +44 -0
  11. mathlas_mcp-1.0.0/mathlas/map.py +317 -0
  12. mathlas_mcp-1.0.0/mathlas/provenance.py +55 -0
  13. mathlas_mcp-1.0.0/mathlas/ramanujan.py +510 -0
  14. mathlas_mcp-1.0.0/mathlas/retrieve/__init__.py +43 -0
  15. mathlas_mcp-1.0.0/mathlas/retrieve/bm25.py +81 -0
  16. mathlas_mcp-1.0.0/mathlas/retrieve/corpus.py +256 -0
  17. mathlas_mcp-1.0.0/mathlas/retrieve/hybrid.py +266 -0
  18. mathlas_mcp-1.0.0/mathlas/retrieve/manual.py +19 -0
  19. mathlas_mcp-1.0.0/mathlas/sequence.py +385 -0
  20. mathlas_mcp-1.0.0/mathlas/server.py +1004 -0
  21. mathlas_mcp-1.0.0/mathlas/solve.py +129 -0
  22. mathlas_mcp-1.0.0/mathlas/verify.py +88 -0
  23. mathlas_mcp-1.0.0/mathlas/verify_apply.py +623 -0
  24. mathlas_mcp-1.0.0/mathlas/webaug.py +558 -0
  25. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/PKG-INFO +287 -0
  26. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/SOURCES.txt +31 -0
  27. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/dependency_links.txt +1 -0
  28. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/entry_points.txt +3 -0
  29. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/requires.txt +21 -0
  30. mathlas_mcp-1.0.0/mathlas_mcp.egg-info/top_level.txt +1 -0
  31. mathlas_mcp-1.0.0/pyproject.toml +77 -0
  32. mathlas_mcp-1.0.0/setup.cfg +4 -0
  33. mathlas_mcp-1.0.0/tests/test_airtight.py +251 -0
@@ -0,0 +1,75 @@
1
+ # PolyForm Noncommercial License 1.0.0
2
+
3
+ <https://polyformproject.org/licenses/noncommercial/1.0.0>
4
+
5
+ Required Notice: Copyright 2026 Krishi Attri (https://github.com/Archerkattri)
6
+
7
+ ## Acceptance
8
+
9
+ In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
10
+
11
+ ## Copyright License
12
+
13
+ The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
14
+
15
+ ## Distribution License
16
+
17
+ The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
18
+
19
+ ## Notices
20
+
21
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
22
+
23
+ > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
24
+
25
+ ## Changes and New Works License
26
+
27
+ The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
28
+
29
+ ## Patent License
30
+
31
+ The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
32
+
33
+ ## Noncommercial Purposes
34
+
35
+ Any noncommercial purpose is a permitted purpose.
36
+
37
+ ## Personal Uses
38
+
39
+ Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
40
+
41
+ ## Noncommercial Organizations
42
+
43
+ Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
44
+
45
+ ## Fair Use
46
+
47
+ You may have "fair use" rights for the software under the law. These terms do not limit them.
48
+
49
+ ## No Other Rights
50
+
51
+ These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
52
+
53
+ ## Patent Defense
54
+
55
+ If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
56
+
57
+ ## Violations
58
+
59
+ The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
60
+
61
+ ## No Liability
62
+
63
+ ***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
64
+
65
+ ## Definitions
66
+
67
+ The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
68
+
69
+ **You** refers to the individual or entity agreeing to these terms.
70
+
71
+ **Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
72
+
73
+ **Your licenses** are all the licenses granted to you for the software under these terms.
74
+
75
+ **Use** means anything you do with the software requiring one of your licenses.
@@ -0,0 +1,287 @@
1
+ Metadata-Version: 2.4
2
+ Name: mathlas-mcp
3
+ Version: 1.0.0
4
+ Summary: A tool FOR an AI (no API key, no LLM): search existing math over a 1.6M-doc index + airtight numeric/Lean verification + OEIS/PSLQ identification + needs<->guarantees scaffolds, served over MCP.
5
+ Author-email: Krishi Attri <krishiattriwork@gmail.com>
6
+ License: # PolyForm Noncommercial License 1.0.0
7
+
8
+ <https://polyformproject.org/licenses/noncommercial/1.0.0>
9
+
10
+ Required Notice: Copyright 2026 Krishi Attri (https://github.com/Archerkattri)
11
+
12
+ ## Acceptance
13
+
14
+ In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
15
+
16
+ ## Copyright License
17
+
18
+ The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
19
+
20
+ ## Distribution License
21
+
22
+ The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
23
+
24
+ ## Notices
25
+
26
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
27
+
28
+ > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
29
+
30
+ ## Changes and New Works License
31
+
32
+ The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
33
+
34
+ ## Patent License
35
+
36
+ The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
37
+
38
+ ## Noncommercial Purposes
39
+
40
+ Any noncommercial purpose is a permitted purpose.
41
+
42
+ ## Personal Uses
43
+
44
+ Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
45
+
46
+ ## Noncommercial Organizations
47
+
48
+ Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
49
+
50
+ ## Fair Use
51
+
52
+ You may have "fair use" rights for the software under the law. These terms do not limit them.
53
+
54
+ ## No Other Rights
55
+
56
+ These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
57
+
58
+ ## Patent Defense
59
+
60
+ If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
61
+
62
+ ## Violations
63
+
64
+ The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
65
+
66
+ ## No Liability
67
+
68
+ ***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
69
+
70
+ ## Definitions
71
+
72
+ The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
73
+
74
+ **You** refers to the individual or entity agreeing to these terms.
75
+
76
+ **Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
77
+
78
+ **Your licenses** are all the licenses granted to you for the software under these terms.
79
+
80
+ **Use** means anything you do with the software requiring one of your licenses.
81
+
82
+ Project-URL: Homepage, https://github.com/Archerkattri/mathlas
83
+ Project-URL: Repository, https://github.com/Archerkattri/mathlas
84
+ Project-URL: Issues, https://github.com/Archerkattri/mathlas/issues
85
+ Keywords: mathematics,symbolic,pslq,theorem-retrieval,verification,mcp,oeis,lean,ramanujan-machine,funsearch,ai-tools
86
+ Classifier: Development Status :: 4 - Beta
87
+ Classifier: Intended Audience :: Science/Research
88
+ Classifier: Intended Audience :: Developers
89
+ Classifier: License :: Other/Proprietary License
90
+ Classifier: Operating System :: OS Independent
91
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
92
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
93
+ Classifier: Programming Language :: Python :: 3
94
+ Classifier: Programming Language :: Python :: 3.9
95
+ Classifier: Programming Language :: Python :: 3.10
96
+ Classifier: Programming Language :: Python :: 3.11
97
+ Classifier: Programming Language :: Python :: 3.12
98
+ Requires-Python: >=3.9
99
+ Description-Content-Type: text/markdown
100
+ License-File: LICENSE
101
+ Requires-Dist: mpmath>=1.3
102
+ Requires-Dist: sympy>=1.12
103
+ Requires-Dist: numpy>=1.24
104
+ Requires-Dist: scipy>=1.10
105
+ Provides-Extra: mcp
106
+ Requires-Dist: mcp>=1.0; extra == "mcp"
107
+ Provides-Extra: retrieve
108
+ Requires-Dist: pyarrow>=14; extra == "retrieve"
109
+ Provides-Extra: embed
110
+ Requires-Dist: sentence-transformers>=3.0; extra == "embed"
111
+ Requires-Dist: torch>=2.1; extra == "embed"
112
+ Provides-Extra: test
113
+ Requires-Dist: pytest>=7; extra == "test"
114
+ Provides-Extra: dev
115
+ Requires-Dist: pytest>=7; extra == "dev"
116
+ Requires-Dist: black==26.5.1; extra == "dev"
117
+ Dynamic: license-file
118
+
119
+ # mathlas
120
+
121
+ > **An airtight-math tool an AI *uses* — no LLM, no API key, free.** Plug it into
122
+ > Claude Code, Cursor, or any MCP client. The **AI is the brain**; mathlas is the
123
+ > **hands** — it gives the AI the capabilities it lacks and returns *data*
124
+ > (candidates, verdicts, checklists, scaffolds) for the AI to reason over.
125
+ > PolyForm Noncommercial 1.0.0 — **noncommercial use only** (no commercial use). Mostly-pure-Python.
126
+
127
+ mathlas is a tool that an AI *uses*, **not** a tool that uses an AI — it **never
128
+ calls an LLM and needs no API key**, so it is free and pluggable everywhere. Most
129
+ solvable problems stay unsolved not because the formula is missing, but because
130
+ nobody connected the *right existing result* to the problem. An AI can do that
131
+ connecting — *if it has the right tool*. mathlas does the parts an AI can't do
132
+ reliably on its own: **search over its own large math index**, **airtight
133
+ numeric/formal verification** (incl. a real Lean kernel check), exact **OEIS**
134
+ identification, structured **needs↔guarantees scaffolds**, honest **provenance**
135
+ (never "novel"), plus a **discovery + web-augmentation layer** that lets the AI
136
+ grow the index at runtime.
137
+
138
+ ## Results
139
+
140
+ The discipline is **airtight-or-nothing**: a result is an independently-checkable fact or
141
+ an honest "nothing." Each verification tier feeds both a *known* input (expect the correct
142
+ verified result) and a *structureless* input (expect an honest "nothing") — the
143
+ **false-positive rate is 0 across every tier** (full tables + commands in [`RESULTS.md`](RESULTS.md)):
144
+
145
+ | Tier | Recovery@known | False-positive | Why it's airtight | Benchmark |
146
+ |---|---|---|---|---|
147
+ | Numeric (`identify_constant`) | 8/8 | 0/3 | independent high-precision re-eval (50–51 digits) | `benchmarks/numeric_bench.py` |
148
+ | Sequence (`identify_sequence`) | 8/8 (all top-1) | 0/3 | exact term-match vs local OEIS (~400k seqs) | `benchmarks/tier_bench.py` |
149
+ | Formal (`verify_formal`) | 7/7 verdicts | — | real Lean 4.30 kernel typecheck | `benchmarks/tier_bench.py` |
150
+ | Ramanujan (`conjecture_relation`) | 6/6 | 0/2 | PSLQ + CF, every hit re-verified ≥25 digits | `benchmarks/tier_bench.py` |
151
+ | Applicability moat | 15/15 decomp + 6/6 catch | — | atomic preconditions, misapplication traps | `benchmarks/moat_bench.py` |
152
+ | FunSearch + web-aug | 14/14 | — | sandbox containment (network / timeout / memory) | `benchmarks/tools_bench.py` |
153
+
154
+ **The 1.635M-doc index — retrieval at scale.** `search_existing_math` is served from a
155
+ built **1,635,233-document** exact dense index (Qwen3-Embedding-8B, 4096-d, over the
156
+ permissive CC-BY/CC0 TheoremSearch subset + arXiv-math from Dolma + Stacks + ProofWiki),
157
+ dense + Okapi-BM25 + RRF. On the held-out **81,833-document** test split, querying each
158
+ theorem by its natural-language slogan retrieves its own entry at **R@1 0.977 / R@10
159
+ 0.998** (and **R@10 0.923** querying by the raw formal statement — cross-representation).
160
+
161
+ ## The self-augmenting loop — closing the coverage gap to beat everyone
162
+
163
+ This is the demonstration that mathlas's `add_finding` **dense path** is a real,
164
+ decisive runtime mechanism. On TheoremSearch's own **110 human-written queries**,
165
+ baseline mathlas (corpus-only) hits a hard **coverage floor**: TheoremSearch
166
+ open-sourced only ~15% of their private 9.2M corpus, so **95 of the 110 target papers
167
+ are non-permissive arXiv they withheld** — unreachable for *any* open system. The AI
168
+ then runs the loop: for each missing theorem it **web-finds the real statement**,
169
+ embeds it with the **same Qwen3-Embedding-8B**, and `add_finding(dense_vec=…)` so it
170
+ **RRF-fuses through the dense channel**. That repairs the gap and beats every baseline:
171
+
172
+ | Method | theorem Hit@20 | paper Hit@20 |
173
+ |---|---|---|
174
+ | Google (`site:arxiv.org`) | — | 37.8% |
175
+ | ChatGPT 5.2 w/ Search | 19.8% | — |
176
+ | Gemini 3 Pro | 27.0% | — |
177
+ | **TheoremSearch** (Qwen3-8B, full private 9.2M) | 45.0% | 56.8% |
178
+ | mathlas — baseline (corpus-only, the coverage floor) | 10.0% | 13.6% |
179
+ | **mathlas — after the self-augmenting WEB loop** | **59.1% (65/110)** | **70.0% (77/110)** |
180
+
181
+ **This is the loop's value, not a native-corpus claim.** The 10.0% floor exists
182
+ *because* TheoremSearch withheld 85% of their corpus; the loop repairs that coverage.
183
+ On the reachable subset our retrieval is merely *on par* with TheoremSearch — we make
184
+ no native-superiority claim. The result proves that `add_finding` lets an AI grow the
185
+ live index at runtime, decisively. Honesty audit passed — **zero query-injection**: no
186
+ finding's text contains the query; the slogans are real theorem prose and the queries
187
+ are paraphrases, so the dense channel is what bridges them. Reproduce with
188
+ `benchmarks/webaug_110_bench.py` (see [`RESULTS.md`](RESULTS.md) §3c).
189
+
190
+ ## The 13 tools (all NO-LLM, returning data)
191
+
192
+ ```
193
+ search_existing_math ─▶ mapping_scaffold + applicability_checklist ─▶ (AI judges) ─▶ verify_numeric / verify_formal
194
+ (own index) (needs↔guarantees, no LLM) (airtight)
195
+ ```
196
+
197
+ | Tool | What it does | Airtight? |
198
+ |---|---|---|
199
+ | `search_existing_math(query, k)` | query → ranked **existing** results from our own **1.635M-doc** dense + BM25 + RRF index | retrieval |
200
+ | `identify_constant(value, basis?)` | a real value → a known closed form + provenance | ✅ independent high-precision re-eval |
201
+ | `identify_sequence(terms)` | an integer sequence → matching **OEIS** entries (A-number, name, URL) | ✅ exact term-match vs local OEIS |
202
+ | `verify_numeric(value, closed_form)` | digit-agreement verdict | ✅ different engine, higher precision |
203
+ | `verify_formal(statement, lean?)` | runs the **real Lean kernel** on a snippet → typechecks? (else honest UNDETERMINED) | ✅ real kernel check |
204
+ | `applicability_checklist(statement)` | the result's hypotheses as an atomic **checklist** for the AI to mark | heuristic parse, no LLM |
205
+ | `mapping_scaffold(problem, statement)` | the **needs↔guarantees** questions + fill-in template | structured, no LLM |
206
+ | `conjecture_relation(value, ...)` | **Ramanujan Machine**: PSLQ over a rich basis + continued-fraction / recurrence **conjectures** | ✅ every candidate numerically verified |
207
+ | `funsearch_evaluate / _register / _status` | **FunSearch harness**: sandbox-score an AI-written program, store in a MAP-Elites DB, return the few-shot to write a better one | deterministic sandbox, no LLM |
208
+ | `search_directive(problem)` | **web-search plan**: arXiv queries + sub-fields + named results + which mathlas tools to also run | structured, no LLM |
209
+ | `add_finding(statement, slogan, source)` | ingest a web-found result into the **live corpus** → retrievable via `search_existing_math` | provenance `web_added` |
210
+
211
+ ## Install & register with Claude Code (no API key)
212
+
213
+ ```bash
214
+ pip install -e . # core: numeric + retrieval + verify + scaffolds — NO LLM, NO API key
215
+ pip install -e '.[mcp]' # + official MCP SDK (a dep-free stdio fallback also ships built in)
216
+ pip install -e '.[retrieve]' # + pyarrow, to read the open theorem dataset (real index)
217
+ pip install -e '.[embed]' # + sentence-transformers/torch, for the Qwen3 embedder (offline GPU)
218
+
219
+ claude mcp add mathlas -- python -m mathlas.server
220
+ ```
221
+
222
+ That's it — mathlas now appears as **thirteen** tools the agent can call. The server
223
+ prefers the official `mcp` SDK and **falls back to a dependency-free stdio JSON-RPC
224
+ server** if `mcp` isn't installed, so it always runs. (Cursor / any MCP client: point
225
+ it at the same `python -m mathlas.server` stdio command.)
226
+
227
+ > **Optional local data (gitignored, degrades honestly):** `identify_sequence` wants a
228
+ > local OEIS copy; `verify_formal` wants a Lean toolchain. Without them the tools return
229
+ > a clear "data/toolchain not available" note — never a fake answer. See
230
+ > [`RESULTS.md`](RESULTS.md) for the one-line setup of each.
231
+
232
+ ### A worked example — an AI using the tools
233
+
234
+ ```
235
+ User: "Does x = cos(x) have a unique solution I can reach by iterating?"
236
+
237
+ AI → search_existing_math("contraction mapping unique fixed point complete metric space")
238
+ ← [{name:"Banach Fixed-Point Theorem", statement:"Let (X,d) be a complete metric
239
+ space and T a contraction. Then T has a unique fixed point ...", ...}, ...]
240
+ AI → applicability_checklist(banach.statement)
241
+ ← preconditions: ["(X,d) is a complete metric space", "T: X→X is a contraction"]
242
+ conclusion: "T has a unique fixed point"
243
+ AI (reasons): [0,1] is complete; cos is a contraction there (|cos'|=|sin|≤sin 1<1).
244
+ Every precondition holds ⇒ Banach applies ⇒ unique fixed point, reachable by iteration.
245
+ AI → verify_numeric("0.7390851332151607", "<the Dottie-number closed form, if claimed>")
246
+ ```
247
+
248
+ mathlas supplied the search, the checklist, and the airtight numeric check; **the AI
249
+ did the judging**. No LLM was called *inside* mathlas.
250
+
251
+ ## Without an MCP client — CLI / Python (still no LLM)
252
+
253
+ ```bash
254
+ mathlas 1.6449340668482264364724151666460251892 # 1.64493... -> pi**2/6 [verified 51 digits]
255
+ mathlas 1,1,2,3,5,8,13,21 # A000045 Fibonacci numbers https://oeis.org/A000045
256
+ mathlas "a bounded sequence has a convergent subsequence" --k 5 # search + scaffold/checklist
257
+ mathlas mcp # run the MCP server
258
+ ```
259
+
260
+ ```python
261
+ import mpmath
262
+ from mathlas import identify, identify_sequence, mapping_scaffold, applicability_checklist
263
+ print(identify(mpmath.zeta(2))) # 1.64493406684823 -> pi**2/6 [verified 51 digits]
264
+ print(identify_sequence([1,1,2,3,5,8,13,21]).matches[1].a_number) # 'A000045' (needs local OEIS)
265
+ ```
266
+
267
+ ## Docs
268
+
269
+ - [`RESULTS.md`](RESULTS.md) — every tool's validation, reproduced, with commands.
270
+ - [`docs/methods.md`](docs/methods.md) — architecture, design decisions, citations.
271
+ - [`docs/05_open_dataset.md`](docs/05_open_dataset.md) — the open dataset & 1.635M-doc index.
272
+ - [`docs/02_eval_vs_theoremsearch.md`](docs/02_eval_vs_theoremsearch.md) — the retrieval head-to-head.
273
+
274
+ ## Positioning
275
+
276
+ The closest system, **TheoremSearch** (UW Math AI Lab), is recall-optimized retrieval
277
+ only — it finds the *statement you already formulated* and never checks applicability,
278
+ routes across tools, accepts numeric inputs, or labels provenance. mathlas adds exactly
279
+ those, plus the design that makes it composable: it is *a tool an AI plugs in*, not a
280
+ closed lab agent or an LLM wrapper. TheoremSearch is **reference-only** — we reuse just
281
+ their openly-licensed (CC-BY/CC0) **dataset** as raw data to build our **own** index,
282
+ not their API, MCP, index, or code.
283
+
284
+ A secondary helper, `solve(problem, retriever, llm=…)`, will run the needs↔guarantees
285
+ loop for you **if you supply your own LLM** (subclass `LLM`, any provider). mathlas
286
+ ships no vendor SDK and no default model, so the package still imports and runs with
287
+ zero API key — this path is convenience only; the primary interface is the MCP tools.
@@ -0,0 +1,169 @@
1
+ # mathlas
2
+
3
+ > **An airtight-math tool an AI *uses* — no LLM, no API key, free.** Plug it into
4
+ > Claude Code, Cursor, or any MCP client. The **AI is the brain**; mathlas is the
5
+ > **hands** — it gives the AI the capabilities it lacks and returns *data*
6
+ > (candidates, verdicts, checklists, scaffolds) for the AI to reason over.
7
+ > PolyForm Noncommercial 1.0.0 — **noncommercial use only** (no commercial use). Mostly-pure-Python.
8
+
9
+ mathlas is a tool that an AI *uses*, **not** a tool that uses an AI — it **never
10
+ calls an LLM and needs no API key**, so it is free and pluggable everywhere. Most
11
+ solvable problems stay unsolved not because the formula is missing, but because
12
+ nobody connected the *right existing result* to the problem. An AI can do that
13
+ connecting — *if it has the right tool*. mathlas does the parts an AI can't do
14
+ reliably on its own: **search over its own large math index**, **airtight
15
+ numeric/formal verification** (incl. a real Lean kernel check), exact **OEIS**
16
+ identification, structured **needs↔guarantees scaffolds**, honest **provenance**
17
+ (never "novel"), plus a **discovery + web-augmentation layer** that lets the AI
18
+ grow the index at runtime.
19
+
20
+ ## Results
21
+
22
+ The discipline is **airtight-or-nothing**: a result is an independently-checkable fact or
23
+ an honest "nothing." Each verification tier feeds both a *known* input (expect the correct
24
+ verified result) and a *structureless* input (expect an honest "nothing") — the
25
+ **false-positive rate is 0 across every tier** (full tables + commands in [`RESULTS.md`](RESULTS.md)):
26
+
27
+ | Tier | Recovery@known | False-positive | Why it's airtight | Benchmark |
28
+ |---|---|---|---|---|
29
+ | Numeric (`identify_constant`) | 8/8 | 0/3 | independent high-precision re-eval (50–51 digits) | `benchmarks/numeric_bench.py` |
30
+ | Sequence (`identify_sequence`) | 8/8 (all top-1) | 0/3 | exact term-match vs local OEIS (~400k seqs) | `benchmarks/tier_bench.py` |
31
+ | Formal (`verify_formal`) | 7/7 verdicts | — | real Lean 4.30 kernel typecheck | `benchmarks/tier_bench.py` |
32
+ | Ramanujan (`conjecture_relation`) | 6/6 | 0/2 | PSLQ + CF, every hit re-verified ≥25 digits | `benchmarks/tier_bench.py` |
33
+ | Applicability moat | 15/15 decomp + 6/6 catch | — | atomic preconditions, misapplication traps | `benchmarks/moat_bench.py` |
34
+ | FunSearch + web-aug | 14/14 | — | sandbox containment (network / timeout / memory) | `benchmarks/tools_bench.py` |
35
+
36
+ **The 1.635M-doc index — retrieval at scale.** `search_existing_math` is served from a
37
+ built **1,635,233-document** exact dense index (Qwen3-Embedding-8B, 4096-d, over the
38
+ permissive CC-BY/CC0 TheoremSearch subset + arXiv-math from Dolma + Stacks + ProofWiki),
39
+ dense + Okapi-BM25 + RRF. On the held-out **81,833-document** test split, querying each
40
+ theorem by its natural-language slogan retrieves its own entry at **R@1 0.977 / R@10
41
+ 0.998** (and **R@10 0.923** querying by the raw formal statement — cross-representation).
42
+
43
+ ## The self-augmenting loop — closing the coverage gap to beat everyone
44
+
45
+ This is the demonstration that mathlas's `add_finding` **dense path** is a real,
46
+ decisive runtime mechanism. On TheoremSearch's own **110 human-written queries**,
47
+ baseline mathlas (corpus-only) hits a hard **coverage floor**: TheoremSearch
48
+ open-sourced only ~15% of their private 9.2M corpus, so **95 of the 110 target papers
49
+ are non-permissive arXiv they withheld** — unreachable for *any* open system. The AI
50
+ then runs the loop: for each missing theorem it **web-finds the real statement**,
51
+ embeds it with the **same Qwen3-Embedding-8B**, and `add_finding(dense_vec=…)` so it
52
+ **RRF-fuses through the dense channel**. That repairs the gap and beats every baseline:
53
+
54
+ | Method | theorem Hit@20 | paper Hit@20 |
55
+ |---|---|---|
56
+ | Google (`site:arxiv.org`) | — | 37.8% |
57
+ | ChatGPT 5.2 w/ Search | 19.8% | — |
58
+ | Gemini 3 Pro | 27.0% | — |
59
+ | **TheoremSearch** (Qwen3-8B, full private 9.2M) | 45.0% | 56.8% |
60
+ | mathlas — baseline (corpus-only, the coverage floor) | 10.0% | 13.6% |
61
+ | **mathlas — after the self-augmenting WEB loop** | **59.1% (65/110)** | **70.0% (77/110)** |
62
+
63
+ **This is the loop's value, not a native-corpus claim.** The 10.0% floor exists
64
+ *because* TheoremSearch withheld 85% of their corpus; the loop repairs that coverage.
65
+ On the reachable subset our retrieval is merely *on par* with TheoremSearch — we make
66
+ no native-superiority claim. The result proves that `add_finding` lets an AI grow the
67
+ live index at runtime, decisively. Honesty audit passed — **zero query-injection**: no
68
+ finding's text contains the query; the slogans are real theorem prose and the queries
69
+ are paraphrases, so the dense channel is what bridges them. Reproduce with
70
+ `benchmarks/webaug_110_bench.py` (see [`RESULTS.md`](RESULTS.md) §3c).
71
+
72
+ ## The 13 tools (all NO-LLM, returning data)
73
+
74
+ ```
75
+ search_existing_math ─▶ mapping_scaffold + applicability_checklist ─▶ (AI judges) ─▶ verify_numeric / verify_formal
76
+ (own index) (needs↔guarantees, no LLM) (airtight)
77
+ ```
78
+
79
+ | Tool | What it does | Airtight? |
80
+ |---|---|---|
81
+ | `search_existing_math(query, k)` | query → ranked **existing** results from our own **1.635M-doc** dense + BM25 + RRF index | retrieval |
82
+ | `identify_constant(value, basis?)` | a real value → a known closed form + provenance | ✅ independent high-precision re-eval |
83
+ | `identify_sequence(terms)` | an integer sequence → matching **OEIS** entries (A-number, name, URL) | ✅ exact term-match vs local OEIS |
84
+ | `verify_numeric(value, closed_form)` | digit-agreement verdict | ✅ different engine, higher precision |
85
+ | `verify_formal(statement, lean?)` | runs the **real Lean kernel** on a snippet → typechecks? (else honest UNDETERMINED) | ✅ real kernel check |
86
+ | `applicability_checklist(statement)` | the result's hypotheses as an atomic **checklist** for the AI to mark | heuristic parse, no LLM |
87
+ | `mapping_scaffold(problem, statement)` | the **needs↔guarantees** questions + fill-in template | structured, no LLM |
88
+ | `conjecture_relation(value, ...)` | **Ramanujan Machine**: PSLQ over a rich basis + continued-fraction / recurrence **conjectures** | ✅ every candidate numerically verified |
89
+ | `funsearch_evaluate / _register / _status` | **FunSearch harness**: sandbox-score an AI-written program, store in a MAP-Elites DB, return the few-shot to write a better one | deterministic sandbox, no LLM |
90
+ | `search_directive(problem)` | **web-search plan**: arXiv queries + sub-fields + named results + which mathlas tools to also run | structured, no LLM |
91
+ | `add_finding(statement, slogan, source)` | ingest a web-found result into the **live corpus** → retrievable via `search_existing_math` | provenance `web_added` |
92
+
93
+ ## Install & register with Claude Code (no API key)
94
+
95
+ ```bash
96
+ pip install -e . # core: numeric + retrieval + verify + scaffolds — NO LLM, NO API key
97
+ pip install -e '.[mcp]' # + official MCP SDK (a dep-free stdio fallback also ships built in)
98
+ pip install -e '.[retrieve]' # + pyarrow, to read the open theorem dataset (real index)
99
+ pip install -e '.[embed]' # + sentence-transformers/torch, for the Qwen3 embedder (offline GPU)
100
+
101
+ claude mcp add mathlas -- python -m mathlas.server
102
+ ```
103
+
104
+ That's it — mathlas now appears as **thirteen** tools the agent can call. The server
105
+ prefers the official `mcp` SDK and **falls back to a dependency-free stdio JSON-RPC
106
+ server** if `mcp` isn't installed, so it always runs. (Cursor / any MCP client: point
107
+ it at the same `python -m mathlas.server` stdio command.)
108
+
109
+ > **Optional local data (gitignored, degrades honestly):** `identify_sequence` wants a
110
+ > local OEIS copy; `verify_formal` wants a Lean toolchain. Without them the tools return
111
+ > a clear "data/toolchain not available" note — never a fake answer. See
112
+ > [`RESULTS.md`](RESULTS.md) for the one-line setup of each.
113
+
114
+ ### A worked example — an AI using the tools
115
+
116
+ ```
117
+ User: "Does x = cos(x) have a unique solution I can reach by iterating?"
118
+
119
+ AI → search_existing_math("contraction mapping unique fixed point complete metric space")
120
+ ← [{name:"Banach Fixed-Point Theorem", statement:"Let (X,d) be a complete metric
121
+ space and T a contraction. Then T has a unique fixed point ...", ...}, ...]
122
+ AI → applicability_checklist(banach.statement)
123
+ ← preconditions: ["(X,d) is a complete metric space", "T: X→X is a contraction"]
124
+ conclusion: "T has a unique fixed point"
125
+ AI (reasons): [0,1] is complete; cos is a contraction there (|cos'|=|sin|≤sin 1<1).
126
+ Every precondition holds ⇒ Banach applies ⇒ unique fixed point, reachable by iteration.
127
+ AI → verify_numeric("0.7390851332151607", "<the Dottie-number closed form, if claimed>")
128
+ ```
129
+
130
+ mathlas supplied the search, the checklist, and the airtight numeric check; **the AI
131
+ did the judging**. No LLM was called *inside* mathlas.
132
+
133
+ ## Without an MCP client — CLI / Python (still no LLM)
134
+
135
+ ```bash
136
+ mathlas 1.6449340668482264364724151666460251892 # 1.64493... -> pi**2/6 [verified 51 digits]
137
+ mathlas 1,1,2,3,5,8,13,21 # A000045 Fibonacci numbers https://oeis.org/A000045
138
+ mathlas "a bounded sequence has a convergent subsequence" --k 5 # search + scaffold/checklist
139
+ mathlas mcp # run the MCP server
140
+ ```
141
+
142
+ ```python
143
+ import mpmath
144
+ from mathlas import identify, identify_sequence, mapping_scaffold, applicability_checklist
145
+ print(identify(mpmath.zeta(2))) # 1.64493406684823 -> pi**2/6 [verified 51 digits]
146
+ print(identify_sequence([1,1,2,3,5,8,13,21]).matches[1].a_number) # 'A000045' (needs local OEIS)
147
+ ```
148
+
149
+ ## Docs
150
+
151
+ - [`RESULTS.md`](RESULTS.md) — every tool's validation, reproduced, with commands.
152
+ - [`docs/methods.md`](docs/methods.md) — architecture, design decisions, citations.
153
+ - [`docs/05_open_dataset.md`](docs/05_open_dataset.md) — the open dataset & 1.635M-doc index.
154
+ - [`docs/02_eval_vs_theoremsearch.md`](docs/02_eval_vs_theoremsearch.md) — the retrieval head-to-head.
155
+
156
+ ## Positioning
157
+
158
+ The closest system, **TheoremSearch** (UW Math AI Lab), is recall-optimized retrieval
159
+ only — it finds the *statement you already formulated* and never checks applicability,
160
+ routes across tools, accepts numeric inputs, or labels provenance. mathlas adds exactly
161
+ those, plus the design that makes it composable: it is *a tool an AI plugs in*, not a
162
+ closed lab agent or an LLM wrapper. TheoremSearch is **reference-only** — we reuse just
163
+ their openly-licensed (CC-BY/CC0) **dataset** as raw data to build our **own** index,
164
+ not their API, MCP, index, or code.
165
+
166
+ A secondary helper, `solve(problem, retriever, llm=…)`, will run the needs↔guarantees
167
+ loop for you **if you supply your own LLM** (subclass `LLM`, any provider). mathlas
168
+ ships no vendor SDK and no default model, so the package still imports and runs with
169
+ zero API key — this path is convenience only; the primary interface is the MCP tools.
@@ -0,0 +1,94 @@
1
+ """mathlas — a tool FOR an AI, not a tool that uses an AI. No API key. Free.
2
+
3
+ mathlas gives a CALLING AI (Claude Code, Cursor, any MCP client / agent) the
4
+ capabilities it lacks: search over EXISTING math, AIRTIGHT numeric/formal
5
+ verification, structured needs<->guarantees scaffolds, and honest provenance
6
+ (never "novel"). **mathlas itself NEVER calls an LLM and needs NO API key** — the
7
+ AI is the brain; mathlas is the toolbox. Plug it in over MCP (``mathlas.server``)
8
+ or call the library functions directly.
9
+
10
+ What mathlas provides (all with NO LLM, returning DATA for the AI to reason over)
11
+ ---------------------------------------------------------------------------------
12
+ IDENTIFY a real value/constant -> a known closed form, verified by independent
13
+ high-precision re-evaluation. ``identify`` / ``engine.py``. Airtight.
14
+ SEARCH a query -> ranked candidate EXISTING results, via OUR OWN hybrid
15
+ (dense+BM25+RRF) index. ``retrieve`` / ``HybridRetriever``.
16
+ VERIFY numeric (airtight digit agreement) + formal (Lean, stubbed) tiers,
17
+ plus an ``applicability_checklist`` -- the candidate's atomic
18
+ preconditions for the AI to check. ``verify`` / ``verify_apply``.
19
+ SCAFFOLD the needs<->guarantees questions as data (``mapping_scaffold``) for
20
+ the AI to answer -- the analogy reasoning is the AI's job. ``map``.
21
+ PROVENANCE every result is tied to an existing source or labelled UNIDENTIFIED.
22
+
23
+ A small bring-your-own-LLM ``solve()`` helper exists as a SECONDARY standalone
24
+ convenience (you supply the LLM; the default is a no-op stub). mathlas ships no
25
+ vendor SDK and no default model.
26
+
27
+ >>> import mpmath
28
+ >>> from mathlas import identify
29
+ >>> print(identify(mpmath.zeta(2))) # doctest: +SKIP
30
+ 1.64493406684823 -> pi**2/6 [known_form, verified 48 digits]
31
+ """
32
+ # Numeric domain (airtight, no LLM, no network).
33
+ from .engine import identify, Result, Candidate
34
+ from .provenance import Provenance, Novelty
35
+ from .verify import verify_closed_form, VerifyResult
36
+
37
+ # Integer-sequence domain (airtight EXACT term-match vs a local copy of OEIS;
38
+ # no LLM, no network at call time). Heavy data load stays lazy inside the module.
39
+ from .sequence import (identify_sequence, SequenceResult, SequenceMatch,
40
+ OEISIndex)
41
+
42
+ # Retrieval + scaffolds + verification tiers (NO LLM). ``solve`` pulls in
43
+ # numpy/scipy (declared deps); the heavier retrieval corpus/embedder imports
44
+ # stay lazy inside their modules.
45
+ from .map import (mapping_scaffold, MappingScaffold,
46
+ map_candidates, extract_signature, Mapping, Signature)
47
+ from .verify_apply import (applicability_checklist, Checklist,
48
+ verify_numeric_claim, verify_formal, verify_informal,
49
+ ApplyVerdict, Tier, Condition)
50
+ from .retrieve import Retriever, Candidate as RetrievedCandidate
51
+
52
+ # DISCOVERY + WEB-AUGMENTATION layer (NO LLM, no network, no API key).
53
+ # ramanujan -- PSLQ-over-richer-basis + Ramanujan-Machine continued-fraction
54
+ # conjectures, each numerically VERIFIED (provenance = conjecture).
55
+ # funsearch -- the deterministic HARNESS for AI-generated program search
56
+ # (sandboxed evaluate + on-disk MAP-Elites DB + few-shot status).
57
+ # webaug -- search_directive (tell the AI what to web-search) + add_finding
58
+ # (ingest a web result into the live corpus with NO model load).
59
+ from .ramanujan import (conjecture, ConjectureResult, integer_relations,
60
+ continued_fractions, simple_continued_fraction)
61
+ from .webaug import (search_directive, SearchDirective, add_finding,
62
+ AddFindingResult, search_findings, load_findings)
63
+
64
+ # OPTIONAL bring-your-own-LLM standalone helper (secondary; no vendor SDK).
65
+ from .solve import solve, Solution, AppliedResult
66
+ from .llm import LLM, EchoLLM
67
+
68
+ __version__ = "0.1.0"
69
+ __all__ = [
70
+ # numeric (airtight)
71
+ "identify", "Result", "Candidate",
72
+ "verify_closed_form", "VerifyResult",
73
+ "verify_numeric_claim",
74
+ # integer sequences (airtight OEIS exact term-match)
75
+ "identify_sequence", "SequenceResult", "SequenceMatch", "OEISIndex",
76
+ # provenance
77
+ "Provenance", "Novelty",
78
+ # search (no LLM)
79
+ "Retriever", "RetrievedCandidate",
80
+ # scaffolds + verification tiers (no LLM)
81
+ "mapping_scaffold", "MappingScaffold",
82
+ "applicability_checklist", "Checklist",
83
+ "verify_formal", "ApplyVerdict", "Tier", "Condition",
84
+ # discovery + web-augmentation (no LLM, no network)
85
+ "conjecture", "ConjectureResult", "integer_relations",
86
+ "continued_fractions", "simple_continued_fraction",
87
+ "search_directive", "SearchDirective", "add_finding", "AddFindingResult",
88
+ "search_findings", "load_findings",
89
+ # optional bring-your-own-LLM standalone path (secondary)
90
+ "solve", "Solution", "AppliedResult",
91
+ "map_candidates", "extract_signature", "Mapping", "Signature",
92
+ "verify_informal",
93
+ "LLM", "EchoLLM",
94
+ ]