PyPI - mathlas-mcp - Versions diffs - 1.0.0__tar.gz - Mend

mathlas-mcp 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

mathlas_mcp-1.0.0/LICENSE +75 -0
mathlas_mcp-1.0.0/PKG-INFO +287 -0
mathlas_mcp-1.0.0/README.md +169 -0
mathlas_mcp-1.0.0/mathlas/__init__.py +94 -0
mathlas_mcp-1.0.0/mathlas/cli.py +236 -0
mathlas_mcp-1.0.0/mathlas/embed.py +129 -0
mathlas_mcp-1.0.0/mathlas/engine.py +95 -0
mathlas_mcp-1.0.0/mathlas/funsearch.py +521 -0
mathlas_mcp-1.0.0/mathlas/identify.py +51 -0
mathlas_mcp-1.0.0/mathlas/llm.py +44 -0
mathlas_mcp-1.0.0/mathlas/map.py +317 -0
mathlas_mcp-1.0.0/mathlas/provenance.py +55 -0
mathlas_mcp-1.0.0/mathlas/ramanujan.py +510 -0
mathlas_mcp-1.0.0/mathlas/retrieve/__init__.py +43 -0
mathlas_mcp-1.0.0/mathlas/retrieve/bm25.py +81 -0
mathlas_mcp-1.0.0/mathlas/retrieve/corpus.py +256 -0
mathlas_mcp-1.0.0/mathlas/retrieve/hybrid.py +266 -0
mathlas_mcp-1.0.0/mathlas/retrieve/manual.py +19 -0
mathlas_mcp-1.0.0/mathlas/sequence.py +385 -0
mathlas_mcp-1.0.0/mathlas/server.py +1004 -0
mathlas_mcp-1.0.0/mathlas/solve.py +129 -0
mathlas_mcp-1.0.0/mathlas/verify.py +88 -0
mathlas_mcp-1.0.0/mathlas/verify_apply.py +623 -0
mathlas_mcp-1.0.0/mathlas/webaug.py +558 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/PKG-INFO +287 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/SOURCES.txt +31 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/dependency_links.txt +1 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/entry_points.txt +3 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/requires.txt +21 -0
mathlas_mcp-1.0.0/mathlas_mcp.egg-info/top_level.txt +1 -0
mathlas_mcp-1.0.0/pyproject.toml +77 -0
mathlas_mcp-1.0.0/setup.cfg +4 -0
mathlas_mcp-1.0.0/tests/test_airtight.py +251 -0

mathlas_mcp-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,75 @@
+# PolyForm Noncommercial License 1.0.0
+<https://polyformproject.org/licenses/noncommercial/1.0.0>
+Required Notice: Copyright 2026 Krishi Attri (https://github.com/Archerkattri)
+## Acceptance
+In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
+## Copyright License
+The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose.  However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
+## Distribution License
+The licensor grants you an additional copyright license to distribute copies of the software.  Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
+## Notices
+You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software.  For example:
+> Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
+## Changes and New Works License
+The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
+## Patent License
+The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
+## Noncommercial Purposes
+Any noncommercial purpose is a permitted purpose.
+## Personal Uses
+Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
+## Noncommercial Organizations
+Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
+## Fair Use
+You may have "fair use" rights for the software under the law. These terms do not limit them.
+## No Other Rights
+These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else.  These terms do not imply any other licenses.
+## Patent Defense
+If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
+## Violations
+The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice.  Otherwise, all your licenses end immediately.
+## No Liability
+***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
+## Definitions
+The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
+**You** refers to the individual or entity agreeing to these terms.
+**Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization.  **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise.  Control can be direct or indirect.
+**Your licenses** are all the licenses granted to you for the software under these terms.
+**Use** means anything you do with the software requiring one of your licenses.

mathlas_mcp-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,287 @@
+Metadata-Version: 2.4
+Name: mathlas-mcp
+Version: 1.0.0
+Summary: A tool FOR an AI (no API key, no LLM): search existing math over a 1.6M-doc index + airtight numeric/Lean verification + OEIS/PSLQ identification + needs<->guarantees scaffolds, served over MCP.
+Author-email: Krishi Attri <krishiattriwork@gmail.com>
+License: # PolyForm Noncommercial License 1.0.0
+        <https://polyformproject.org/licenses/noncommercial/1.0.0>
+        Required Notice: Copyright 2026 Krishi Attri (https://github.com/Archerkattri)
+        ## Acceptance
+        In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
+        ## Copyright License
+        The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose.  However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
+        ## Distribution License
+        The licensor grants you an additional copyright license to distribute copies of the software.  Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
+        ## Notices
+        You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software.  For example:
+        > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
+        ## Changes and New Works License
+        The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
+        ## Patent License
+        The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
+        ## Noncommercial Purposes
+        Any noncommercial purpose is a permitted purpose.
+        ## Personal Uses
+        Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
+        ## Noncommercial Organizations
+        Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
+        ## Fair Use
+        You may have "fair use" rights for the software under the law. These terms do not limit them.
+        ## No Other Rights
+        These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else.  These terms do not imply any other licenses.
+        ## Patent Defense
+        If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
+        ## Violations
+        The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice.  Otherwise, all your licenses end immediately.
+        ## No Liability
+        ***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
+        ## Definitions
+        The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
+        **You** refers to the individual or entity agreeing to these terms.
+        **Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization.  **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise.  Control can be direct or indirect.
+        **Your licenses** are all the licenses granted to you for the software under these terms.
+        **Use** means anything you do with the software requiring one of your licenses.
+Project-URL: Homepage, https://github.com/Archerkattri/mathlas
+Project-URL: Repository, https://github.com/Archerkattri/mathlas
+Project-URL: Issues, https://github.com/Archerkattri/mathlas/issues
+Keywords: mathematics,symbolic,pslq,theorem-retrieval,verification,mcp,oeis,lean,ramanujan-machine,funsearch,ai-tools
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Developers
+Classifier: License :: Other/Proprietary License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: mpmath>=1.3
+Requires-Dist: sympy>=1.12
+Requires-Dist: numpy>=1.24
+Requires-Dist: scipy>=1.10
+Provides-Extra: mcp
+Requires-Dist: mcp>=1.0; extra == "mcp"
+Provides-Extra: retrieve
+Requires-Dist: pyarrow>=14; extra == "retrieve"
+Provides-Extra: embed
+Requires-Dist: sentence-transformers>=3.0; extra == "embed"
+Requires-Dist: torch>=2.1; extra == "embed"
+Provides-Extra: test
+Requires-Dist: pytest>=7; extra == "test"
+Provides-Extra: dev
+Requires-Dist: pytest>=7; extra == "dev"
+Requires-Dist: black==26.5.1; extra == "dev"
+Dynamic: license-file
+# mathlas
+> **An airtight-math tool an AI *uses* — no LLM, no API key, free.** Plug it into
+> Claude Code, Cursor, or any MCP client. The **AI is the brain**; mathlas is the
+> **hands** — it gives the AI the capabilities it lacks and returns *data*
+> (candidates, verdicts, checklists, scaffolds) for the AI to reason over.
+> PolyForm Noncommercial 1.0.0 — **noncommercial use only** (no commercial use). Mostly-pure-Python.
+mathlas is a tool that an AI *uses*, **not** a tool that uses an AI — it **never
+calls an LLM and needs no API key**, so it is free and pluggable everywhere. Most
+solvable problems stay unsolved not because the formula is missing, but because
+nobody connected the *right existing result* to the problem. An AI can do that
+connecting — *if it has the right tool*. mathlas does the parts an AI can't do
+reliably on its own: **search over its own large math index**, **airtight
+numeric/formal verification** (incl. a real Lean kernel check), exact **OEIS**
+identification, structured **needs↔guarantees scaffolds**, honest **provenance**
+(never "novel"), plus a **discovery + web-augmentation layer** that lets the AI
+grow the index at runtime.
+## Results
+The discipline is **airtight-or-nothing**: a result is an independently-checkable fact or
+an honest "nothing." Each verification tier feeds both a *known* input (expect the correct
+verified result) and a *structureless* input (expect an honest "nothing") — the
+**false-positive rate is 0 across every tier** (full tables + commands in [`RESULTS.md`](RESULTS.md)):
+| Tier | Recovery@known | False-positive | Why it's airtight | Benchmark |
+|---|---|---|---|---|
+| Numeric (`identify_constant`) | 8/8 | 0/3 | independent high-precision re-eval (50–51 digits) | `benchmarks/numeric_bench.py` |
+| Sequence (`identify_sequence`) | 8/8 (all top-1) | 0/3 | exact term-match vs local OEIS (~400k seqs) | `benchmarks/tier_bench.py` |
+| Formal (`verify_formal`) | 7/7 verdicts | — | real Lean 4.30 kernel typecheck | `benchmarks/tier_bench.py` |
+| Ramanujan (`conjecture_relation`) | 6/6 | 0/2 | PSLQ + CF, every hit re-verified ≥25 digits | `benchmarks/tier_bench.py` |
+| Applicability moat | 15/15 decomp + 6/6 catch | — | atomic preconditions, misapplication traps | `benchmarks/moat_bench.py` |
+| FunSearch + web-aug | 14/14 | — | sandbox containment (network / timeout / memory) | `benchmarks/tools_bench.py` |
+**The 1.635M-doc index — retrieval at scale.** `search_existing_math` is served from a
+built **1,635,233-document** exact dense index (Qwen3-Embedding-8B, 4096-d, over the
+permissive CC-BY/CC0 TheoremSearch subset + arXiv-math from Dolma + Stacks + ProofWiki),
+dense + Okapi-BM25 + RRF. On the held-out **81,833-document** test split, querying each
+theorem by its natural-language slogan retrieves its own entry at **R@1 0.977 / R@10
+0.998** (and **R@10 0.923** querying by the raw formal statement — cross-representation).
+## The self-augmenting loop — closing the coverage gap to beat everyone
+This is the demonstration that mathlas's `add_finding` **dense path** is a real,
+decisive runtime mechanism. On TheoremSearch's own **110 human-written queries**,
+baseline mathlas (corpus-only) hits a hard **coverage floor**: TheoremSearch
+open-sourced only ~15% of their private 9.2M corpus, so **95 of the 110 target papers
+are non-permissive arXiv they withheld** — unreachable for *any* open system. The AI
+then runs the loop: for each missing theorem it **web-finds the real statement**,
+embeds it with the **same Qwen3-Embedding-8B**, and `add_finding(dense_vec=…)` so it
+**RRF-fuses through the dense channel**. That repairs the gap and beats every baseline:
+| Method | theorem Hit@20 | paper Hit@20 |
+|---|---|---|
+| Google (`site:arxiv.org`) | — | 37.8% |
+| ChatGPT 5.2 w/ Search | 19.8% | — |
+| Gemini 3 Pro | 27.0% | — |
+| **TheoremSearch** (Qwen3-8B, full private 9.2M) | 45.0% | 56.8% |
+| mathlas — baseline (corpus-only, the coverage floor) | 10.0% | 13.6% |
+| **mathlas — after the self-augmenting WEB loop** | **59.1% (65/110)** | **70.0% (77/110)** |
+**This is the loop's value, not a native-corpus claim.** The 10.0% floor exists
+*because* TheoremSearch withheld 85% of their corpus; the loop repairs that coverage.
+On the reachable subset our retrieval is merely *on par* with TheoremSearch — we make
+no native-superiority claim. The result proves that `add_finding` lets an AI grow the
+live index at runtime, decisively. Honesty audit passed — **zero query-injection**: no
+finding's text contains the query; the slogans are real theorem prose and the queries
+are paraphrases, so the dense channel is what bridges them. Reproduce with
+`benchmarks/webaug_110_bench.py` (see [`RESULTS.md`](RESULTS.md) §3c).
+## The 13 tools (all NO-LLM, returning data)
+```
+search_existing_math ─▶ mapping_scaffold + applicability_checklist ─▶ (AI judges) ─▶ verify_numeric / verify_formal
+   (own index)            (needs↔guarantees, no LLM)                                  (airtight)
+```
+| Tool | What it does | Airtight? |
+|---|---|---|
+| `search_existing_math(query, k)` | query → ranked **existing** results from our own **1.635M-doc** dense + BM25 + RRF index | retrieval |
+| `identify_constant(value, basis?)` | a real value → a known closed form + provenance | ✅ independent high-precision re-eval |
+| `identify_sequence(terms)` | an integer sequence → matching **OEIS** entries (A-number, name, URL) | ✅ exact term-match vs local OEIS |
+| `verify_numeric(value, closed_form)` | digit-agreement verdict | ✅ different engine, higher precision |
+| `verify_formal(statement, lean?)` | runs the **real Lean kernel** on a snippet → typechecks? (else honest UNDETERMINED) | ✅ real kernel check |
+| `applicability_checklist(statement)` | the result's hypotheses as an atomic **checklist** for the AI to mark | heuristic parse, no LLM |
+| `mapping_scaffold(problem, statement)` | the **needs↔guarantees** questions + fill-in template | structured, no LLM |
+| `conjecture_relation(value, ...)` | **Ramanujan Machine**: PSLQ over a rich basis + continued-fraction / recurrence **conjectures** | ✅ every candidate numerically verified |
+| `funsearch_evaluate / _register / _status` | **FunSearch harness**: sandbox-score an AI-written program, store in a MAP-Elites DB, return the few-shot to write a better one | deterministic sandbox, no LLM |
+| `search_directive(problem)` | **web-search plan**: arXiv queries + sub-fields + named results + which mathlas tools to also run | structured, no LLM |
+| `add_finding(statement, slogan, source)` | ingest a web-found result into the **live corpus** → retrievable via `search_existing_math` | provenance `web_added` |
+## Install & register with Claude Code (no API key)
+```bash
+pip install -e .                 # core: numeric + retrieval + verify + scaffolds — NO LLM, NO API key
+pip install -e '.[mcp]'          # + official MCP SDK (a dep-free stdio fallback also ships built in)
+pip install -e '.[retrieve]'     # + pyarrow, to read the open theorem dataset (real index)
+pip install -e '.[embed]'        # + sentence-transformers/torch, for the Qwen3 embedder (offline GPU)
+claude mcp add mathlas -- python -m mathlas.server
+```
+That's it — mathlas now appears as **thirteen** tools the agent can call. The server
+prefers the official `mcp` SDK and **falls back to a dependency-free stdio JSON-RPC
+server** if `mcp` isn't installed, so it always runs. (Cursor / any MCP client: point
+it at the same `python -m mathlas.server` stdio command.)
+> **Optional local data (gitignored, degrades honestly):** `identify_sequence` wants a
+> local OEIS copy; `verify_formal` wants a Lean toolchain. Without them the tools return
+> a clear "data/toolchain not available" note — never a fake answer. See
+> [`RESULTS.md`](RESULTS.md) for the one-line setup of each.
+### A worked example — an AI using the tools
+```
+User:  "Does x = cos(x) have a unique solution I can reach by iterating?"
+AI →   search_existing_math("contraction mapping unique fixed point complete metric space")
+       ← [{name:"Banach Fixed-Point Theorem", statement:"Let (X,d) be a complete metric
+            space and T a contraction. Then T has a unique fixed point ...", ...}, ...]
+AI →   applicability_checklist(banach.statement)
+       ← preconditions: ["(X,d) is a complete metric space", "T: X→X is a contraction"]
+          conclusion:    "T has a unique fixed point"
+AI  (reasons): [0,1] is complete; cos is a contraction there (|cos'|=|sin|≤sin 1<1).
+       Every precondition holds ⇒ Banach applies ⇒ unique fixed point, reachable by iteration.
+AI →   verify_numeric("0.7390851332151607", "<the Dottie-number closed form, if claimed>")
+```
+mathlas supplied the search, the checklist, and the airtight numeric check; **the AI
+did the judging**. No LLM was called *inside* mathlas.
+## Without an MCP client — CLI / Python (still no LLM)
+```bash
+mathlas 1.6449340668482264364724151666460251892   # 1.64493... -> pi**2/6  [verified 51 digits]
+mathlas 1,1,2,3,5,8,13,21                          # A000045  Fibonacci numbers  https://oeis.org/A000045
+mathlas "a bounded sequence has a convergent subsequence" --k 5   # search + scaffold/checklist
+mathlas mcp                                                        # run the MCP server
+```
+```python
+import mpmath
+from mathlas import identify, identify_sequence, mapping_scaffold, applicability_checklist
+print(identify(mpmath.zeta(2)))            # 1.64493406684823 -> pi**2/6 [verified 51 digits]
+print(identify_sequence([1,1,2,3,5,8,13,21]).matches[1].a_number)  # 'A000045' (needs local OEIS)
+```
+## Docs
+- [`RESULTS.md`](RESULTS.md) — every tool's validation, reproduced, with commands.
+- [`docs/methods.md`](docs/methods.md) — architecture, design decisions, citations.
+- [`docs/05_open_dataset.md`](docs/05_open_dataset.md) — the open dataset & 1.635M-doc index.
+- [`docs/02_eval_vs_theoremsearch.md`](docs/02_eval_vs_theoremsearch.md) — the retrieval head-to-head.
+## Positioning
+The closest system, **TheoremSearch** (UW Math AI Lab), is recall-optimized retrieval
+only — it finds the *statement you already formulated* and never checks applicability,
+routes across tools, accepts numeric inputs, or labels provenance. mathlas adds exactly
+those, plus the design that makes it composable: it is *a tool an AI plugs in*, not a
+closed lab agent or an LLM wrapper. TheoremSearch is **reference-only** — we reuse just
+their openly-licensed (CC-BY/CC0) **dataset** as raw data to build our **own** index,
+not their API, MCP, index, or code.
+A secondary helper, `solve(problem, retriever, llm=…)`, will run the needs↔guarantees
+loop for you **if you supply your own LLM** (subclass `LLM`, any provider). mathlas
+ships no vendor SDK and no default model, so the package still imports and runs with
+zero API key — this path is convenience only; the primary interface is the MCP tools.

mathlas_mcp-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,169 @@
+# mathlas
+> **An airtight-math tool an AI *uses* — no LLM, no API key, free.** Plug it into
+> Claude Code, Cursor, or any MCP client. The **AI is the brain**; mathlas is the
+> **hands** — it gives the AI the capabilities it lacks and returns *data*
+> (candidates, verdicts, checklists, scaffolds) for the AI to reason over.
+> PolyForm Noncommercial 1.0.0 — **noncommercial use only** (no commercial use). Mostly-pure-Python.
+mathlas is a tool that an AI *uses*, **not** a tool that uses an AI — it **never
+calls an LLM and needs no API key**, so it is free and pluggable everywhere. Most
+solvable problems stay unsolved not because the formula is missing, but because
+nobody connected the *right existing result* to the problem. An AI can do that
+connecting — *if it has the right tool*. mathlas does the parts an AI can't do
+reliably on its own: **search over its own large math index**, **airtight
+numeric/formal verification** (incl. a real Lean kernel check), exact **OEIS**
+identification, structured **needs↔guarantees scaffolds**, honest **provenance**
+(never "novel"), plus a **discovery + web-augmentation layer** that lets the AI
+grow the index at runtime.
+## Results
+The discipline is **airtight-or-nothing**: a result is an independently-checkable fact or
+an honest "nothing." Each verification tier feeds both a *known* input (expect the correct
+verified result) and a *structureless* input (expect an honest "nothing") — the
+**false-positive rate is 0 across every tier** (full tables + commands in [`RESULTS.md`](RESULTS.md)):
+| Tier | Recovery@known | False-positive | Why it's airtight | Benchmark |
+|---|---|---|---|---|
+| Numeric (`identify_constant`) | 8/8 | 0/3 | independent high-precision re-eval (50–51 digits) | `benchmarks/numeric_bench.py` |
+| Sequence (`identify_sequence`) | 8/8 (all top-1) | 0/3 | exact term-match vs local OEIS (~400k seqs) | `benchmarks/tier_bench.py` |
+| Formal (`verify_formal`) | 7/7 verdicts | — | real Lean 4.30 kernel typecheck | `benchmarks/tier_bench.py` |
+| Ramanujan (`conjecture_relation`) | 6/6 | 0/2 | PSLQ + CF, every hit re-verified ≥25 digits | `benchmarks/tier_bench.py` |
+| Applicability moat | 15/15 decomp + 6/6 catch | — | atomic preconditions, misapplication traps | `benchmarks/moat_bench.py` |
+| FunSearch + web-aug | 14/14 | — | sandbox containment (network / timeout / memory) | `benchmarks/tools_bench.py` |
+**The 1.635M-doc index — retrieval at scale.** `search_existing_math` is served from a
+built **1,635,233-document** exact dense index (Qwen3-Embedding-8B, 4096-d, over the
+permissive CC-BY/CC0 TheoremSearch subset + arXiv-math from Dolma + Stacks + ProofWiki),
+dense + Okapi-BM25 + RRF. On the held-out **81,833-document** test split, querying each
+theorem by its natural-language slogan retrieves its own entry at **R@1 0.977 / R@10
+0.998** (and **R@10 0.923** querying by the raw formal statement — cross-representation).
+## The self-augmenting loop — closing the coverage gap to beat everyone
+This is the demonstration that mathlas's `add_finding` **dense path** is a real,
+decisive runtime mechanism. On TheoremSearch's own **110 human-written queries**,
+baseline mathlas (corpus-only) hits a hard **coverage floor**: TheoremSearch
+open-sourced only ~15% of their private 9.2M corpus, so **95 of the 110 target papers
+are non-permissive arXiv they withheld** — unreachable for *any* open system. The AI
+then runs the loop: for each missing theorem it **web-finds the real statement**,
+embeds it with the **same Qwen3-Embedding-8B**, and `add_finding(dense_vec=…)` so it
+**RRF-fuses through the dense channel**. That repairs the gap and beats every baseline:
+| Method | theorem Hit@20 | paper Hit@20 |
+|---|---|---|
+| Google (`site:arxiv.org`) | — | 37.8% |
+| ChatGPT 5.2 w/ Search | 19.8% | — |
+| Gemini 3 Pro | 27.0% | — |
+| **TheoremSearch** (Qwen3-8B, full private 9.2M) | 45.0% | 56.8% |
+| mathlas — baseline (corpus-only, the coverage floor) | 10.0% | 13.6% |
+| **mathlas — after the self-augmenting WEB loop** | **59.1% (65/110)** | **70.0% (77/110)** |
+**This is the loop's value, not a native-corpus claim.** The 10.0% floor exists
+*because* TheoremSearch withheld 85% of their corpus; the loop repairs that coverage.
+On the reachable subset our retrieval is merely *on par* with TheoremSearch — we make
+no native-superiority claim. The result proves that `add_finding` lets an AI grow the
+live index at runtime, decisively. Honesty audit passed — **zero query-injection**: no
+finding's text contains the query; the slogans are real theorem prose and the queries
+are paraphrases, so the dense channel is what bridges them. Reproduce with
+`benchmarks/webaug_110_bench.py` (see [`RESULTS.md`](RESULTS.md) §3c).
+## The 13 tools (all NO-LLM, returning data)
+```
+search_existing_math ─▶ mapping_scaffold + applicability_checklist ─▶ (AI judges) ─▶ verify_numeric / verify_formal
+   (own index)            (needs↔guarantees, no LLM)                                  (airtight)
+```
+| Tool | What it does | Airtight? |
+|---|---|---|
+| `search_existing_math(query, k)` | query → ranked **existing** results from our own **1.635M-doc** dense + BM25 + RRF index | retrieval |
+| `identify_constant(value, basis?)` | a real value → a known closed form + provenance | ✅ independent high-precision re-eval |
+| `identify_sequence(terms)` | an integer sequence → matching **OEIS** entries (A-number, name, URL) | ✅ exact term-match vs local OEIS |
+| `verify_numeric(value, closed_form)` | digit-agreement verdict | ✅ different engine, higher precision |
+| `verify_formal(statement, lean?)` | runs the **real Lean kernel** on a snippet → typechecks? (else honest UNDETERMINED) | ✅ real kernel check |
+| `applicability_checklist(statement)` | the result's hypotheses as an atomic **checklist** for the AI to mark | heuristic parse, no LLM |
+| `mapping_scaffold(problem, statement)` | the **needs↔guarantees** questions + fill-in template | structured, no LLM |
+| `conjecture_relation(value, ...)` | **Ramanujan Machine**: PSLQ over a rich basis + continued-fraction / recurrence **conjectures** | ✅ every candidate numerically verified |
+| `funsearch_evaluate / _register / _status` | **FunSearch harness**: sandbox-score an AI-written program, store in a MAP-Elites DB, return the few-shot to write a better one | deterministic sandbox, no LLM |
+| `search_directive(problem)` | **web-search plan**: arXiv queries + sub-fields + named results + which mathlas tools to also run | structured, no LLM |
+| `add_finding(statement, slogan, source)` | ingest a web-found result into the **live corpus** → retrievable via `search_existing_math` | provenance `web_added` |
+## Install & register with Claude Code (no API key)
+```bash
+pip install -e .                 # core: numeric + retrieval + verify + scaffolds — NO LLM, NO API key
+pip install -e '.[mcp]'          # + official MCP SDK (a dep-free stdio fallback also ships built in)
+pip install -e '.[retrieve]'     # + pyarrow, to read the open theorem dataset (real index)
+pip install -e '.[embed]'        # + sentence-transformers/torch, for the Qwen3 embedder (offline GPU)
+claude mcp add mathlas -- python -m mathlas.server
+```
+That's it — mathlas now appears as **thirteen** tools the agent can call. The server
+prefers the official `mcp` SDK and **falls back to a dependency-free stdio JSON-RPC
+server** if `mcp` isn't installed, so it always runs. (Cursor / any MCP client: point
+it at the same `python -m mathlas.server` stdio command.)
+> **Optional local data (gitignored, degrades honestly):** `identify_sequence` wants a
+> local OEIS copy; `verify_formal` wants a Lean toolchain. Without them the tools return
+> a clear "data/toolchain not available" note — never a fake answer. See
+> [`RESULTS.md`](RESULTS.md) for the one-line setup of each.
+### A worked example — an AI using the tools
+```
+User:  "Does x = cos(x) have a unique solution I can reach by iterating?"
+AI →   search_existing_math("contraction mapping unique fixed point complete metric space")
+       ← [{name:"Banach Fixed-Point Theorem", statement:"Let (X,d) be a complete metric
+            space and T a contraction. Then T has a unique fixed point ...", ...}, ...]
+AI →   applicability_checklist(banach.statement)
+       ← preconditions: ["(X,d) is a complete metric space", "T: X→X is a contraction"]
+          conclusion:    "T has a unique fixed point"
+AI  (reasons): [0,1] is complete; cos is a contraction there (|cos'|=|sin|≤sin 1<1).
+       Every precondition holds ⇒ Banach applies ⇒ unique fixed point, reachable by iteration.
+AI →   verify_numeric("0.7390851332151607", "<the Dottie-number closed form, if claimed>")
+```
+mathlas supplied the search, the checklist, and the airtight numeric check; **the AI
+did the judging**. No LLM was called *inside* mathlas.
+## Without an MCP client — CLI / Python (still no LLM)
+```bash
+mathlas 1.6449340668482264364724151666460251892   # 1.64493... -> pi**2/6  [verified 51 digits]
+mathlas 1,1,2,3,5,8,13,21                          # A000045  Fibonacci numbers  https://oeis.org/A000045
+mathlas "a bounded sequence has a convergent subsequence" --k 5   # search + scaffold/checklist
+mathlas mcp                                                        # run the MCP server
+```
+```python
+import mpmath
+from mathlas import identify, identify_sequence, mapping_scaffold, applicability_checklist
+print(identify(mpmath.zeta(2)))            # 1.64493406684823 -> pi**2/6 [verified 51 digits]
+print(identify_sequence([1,1,2,3,5,8,13,21]).matches[1].a_number)  # 'A000045' (needs local OEIS)
+```
+## Docs
+- [`RESULTS.md`](RESULTS.md) — every tool's validation, reproduced, with commands.
+- [`docs/methods.md`](docs/methods.md) — architecture, design decisions, citations.
+- [`docs/05_open_dataset.md`](docs/05_open_dataset.md) — the open dataset & 1.635M-doc index.
+- [`docs/02_eval_vs_theoremsearch.md`](docs/02_eval_vs_theoremsearch.md) — the retrieval head-to-head.
+## Positioning
+The closest system, **TheoremSearch** (UW Math AI Lab), is recall-optimized retrieval
+only — it finds the *statement you already formulated* and never checks applicability,
+routes across tools, accepts numeric inputs, or labels provenance. mathlas adds exactly
+those, plus the design that makes it composable: it is *a tool an AI plugs in*, not a
+closed lab agent or an LLM wrapper. TheoremSearch is **reference-only** — we reuse just
+their openly-licensed (CC-BY/CC0) **dataset** as raw data to build our **own** index,
+not their API, MCP, index, or code.
+A secondary helper, `solve(problem, retriever, llm=…)`, will run the needs↔guarantees
+loop for you **if you supply your own LLM** (subclass `LLM`, any provider). mathlas
+ships no vendor SDK and no default model, so the package still imports and runs with
+zero API key — this path is convenience only; the primary interface is the MCP tools.

mathlas_mcp-1.0.0/mathlas/__init__.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""mathlas — a tool FOR an AI, not a tool that uses an AI. No API key. Free.
+mathlas gives a CALLING AI (Claude Code, Cursor, any MCP client / agent) the
+capabilities it lacks: search over EXISTING math, AIRTIGHT numeric/formal
+verification, structured needs<->guarantees scaffolds, and honest provenance
+(never "novel"). **mathlas itself NEVER calls an LLM and needs NO API key** — the
+AI is the brain; mathlas is the toolbox. Plug it in over MCP (``mathlas.server``)
+or call the library functions directly.
+What mathlas provides (all with NO LLM, returning DATA for the AI to reason over)
+---------------------------------------------------------------------------------
+  IDENTIFY  a real value/constant -> a known closed form, verified by independent
+            high-precision re-evaluation. ``identify`` / ``engine.py``. Airtight.
+  SEARCH    a query -> ranked candidate EXISTING results, via OUR OWN hybrid
+            (dense+BM25+RRF) index. ``retrieve`` / ``HybridRetriever``.
+  VERIFY    numeric (airtight digit agreement) + formal (Lean, stubbed) tiers,
+            plus an ``applicability_checklist`` -- the candidate's atomic
+            preconditions for the AI to check. ``verify`` / ``verify_apply``.
+  SCAFFOLD  the needs<->guarantees questions as data (``mapping_scaffold``) for
+            the AI to answer -- the analogy reasoning is the AI's job. ``map``.
+  PROVENANCE every result is tied to an existing source or labelled UNIDENTIFIED.
+A small bring-your-own-LLM ``solve()`` helper exists as a SECONDARY standalone
+convenience (you supply the LLM; the default is a no-op stub). mathlas ships no
+vendor SDK and no default model.
+    >>> import mpmath
+    >>> from mathlas import identify
+    >>> print(identify(mpmath.zeta(2)))      # doctest: +SKIP
+    1.64493406684823 -> pi**2/6  [known_form, verified 48 digits]
+"""
+# Numeric domain (airtight, no LLM, no network).
+from .engine import identify, Result, Candidate
+from .provenance import Provenance, Novelty
+from .verify import verify_closed_form, VerifyResult
+# Integer-sequence domain (airtight EXACT term-match vs a local copy of OEIS;
+# no LLM, no network at call time). Heavy data load stays lazy inside the module.
+from .sequence import (identify_sequence, SequenceResult, SequenceMatch,
+                       OEISIndex)
+# Retrieval + scaffolds + verification tiers (NO LLM). ``solve`` pulls in
+# numpy/scipy (declared deps); the heavier retrieval corpus/embedder imports
+# stay lazy inside their modules.
+from .map import (mapping_scaffold, MappingScaffold,
+                  map_candidates, extract_signature, Mapping, Signature)
+from .verify_apply import (applicability_checklist, Checklist,
+                           verify_numeric_claim, verify_formal, verify_informal,
+                           ApplyVerdict, Tier, Condition)
+from .retrieve import Retriever, Candidate as RetrievedCandidate
+# DISCOVERY + WEB-AUGMENTATION layer (NO LLM, no network, no API key).
+#   ramanujan  -- PSLQ-over-richer-basis + Ramanujan-Machine continued-fraction
+#                 conjectures, each numerically VERIFIED (provenance = conjecture).
+#   funsearch  -- the deterministic HARNESS for AI-generated program search
+#                 (sandboxed evaluate + on-disk MAP-Elites DB + few-shot status).
+#   webaug     -- search_directive (tell the AI what to web-search) + add_finding
+#                 (ingest a web result into the live corpus with NO model load).
+from .ramanujan import (conjecture, ConjectureResult, integer_relations,
+                        continued_fractions, simple_continued_fraction)
+from .webaug import (search_directive, SearchDirective, add_finding,
+                     AddFindingResult, search_findings, load_findings)
+# OPTIONAL bring-your-own-LLM standalone helper (secondary; no vendor SDK).
+from .solve import solve, Solution, AppliedResult
+from .llm import LLM, EchoLLM
+__version__ = "0.1.0"
+__all__ = [
+    # numeric (airtight)
+    "identify", "Result", "Candidate",
+    "verify_closed_form", "VerifyResult",
+    "verify_numeric_claim",
+    # integer sequences (airtight OEIS exact term-match)
+    "identify_sequence", "SequenceResult", "SequenceMatch", "OEISIndex",
+    # provenance
+    "Provenance", "Novelty",
+    # search (no LLM)
+    "Retriever", "RetrievedCandidate",
+    # scaffolds + verification tiers (no LLM)
+    "mapping_scaffold", "MappingScaffold",
+    "applicability_checklist", "Checklist",
+    "verify_formal", "ApplyVerdict", "Tier", "Condition",
+    # discovery + web-augmentation (no LLM, no network)
+    "conjecture", "ConjectureResult", "integer_relations",
+    "continued_fractions", "simple_continued_fraction",
+    "search_directive", "SearchDirective", "add_finding", "AddFindingResult",
+    "search_findings", "load_findings",
+    # optional bring-your-own-LLM standalone path (secondary)
+    "solve", "Solution", "AppliedResult",
+    "map_candidates", "extract_signature", "Mapping", "Signature",
+    "verify_informal",
+    "LLM", "EchoLLM",
+]