mathlas-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mathlas/__init__.py ADDED
@@ -0,0 +1,94 @@
1
+ """mathlas — a tool FOR an AI, not a tool that uses an AI. No API key. Free.
2
+
3
+ mathlas gives a CALLING AI (Claude Code, Cursor, any MCP client / agent) the
4
+ capabilities it lacks: search over EXISTING math, AIRTIGHT numeric/formal
5
+ verification, structured needs<->guarantees scaffolds, and honest provenance
6
+ (never "novel"). **mathlas itself NEVER calls an LLM and needs NO API key** — the
7
+ AI is the brain; mathlas is the toolbox. Plug it in over MCP (``mathlas.server``)
8
+ or call the library functions directly.
9
+
10
+ What mathlas provides (all with NO LLM, returning DATA for the AI to reason over)
11
+ ---------------------------------------------------------------------------------
12
+ IDENTIFY a real value/constant -> a known closed form, verified by independent
13
+ high-precision re-evaluation. ``identify`` / ``engine.py``. Airtight.
14
+ SEARCH a query -> ranked candidate EXISTING results, via OUR OWN hybrid
15
+ (dense+BM25+RRF) index. ``retrieve`` / ``HybridRetriever``.
16
+ VERIFY numeric (airtight digit agreement) + formal (Lean, stubbed) tiers,
17
+ plus an ``applicability_checklist`` -- the candidate's atomic
18
+ preconditions for the AI to check. ``verify`` / ``verify_apply``.
19
+ SCAFFOLD the needs<->guarantees questions as data (``mapping_scaffold``) for
20
+ the AI to answer -- the analogy reasoning is the AI's job. ``map``.
21
+ PROVENANCE every result is tied to an existing source or labelled UNIDENTIFIED.
22
+
23
+ A small bring-your-own-LLM ``solve()`` helper exists as a SECONDARY standalone
24
+ convenience (you supply the LLM; the default is a no-op stub). mathlas ships no
25
+ vendor SDK and no default model.
26
+
27
+ >>> import mpmath
28
+ >>> from mathlas import identify
29
+ >>> print(identify(mpmath.zeta(2))) # doctest: +SKIP
30
+ 1.64493406684823 -> pi**2/6 [known_form, verified 48 digits]
31
+ """
32
+ # Numeric domain (airtight, no LLM, no network).
33
+ from .engine import identify, Result, Candidate
34
+ from .provenance import Provenance, Novelty
35
+ from .verify import verify_closed_form, VerifyResult
36
+
37
+ # Integer-sequence domain (airtight EXACT term-match vs a local copy of OEIS;
38
+ # no LLM, no network at call time). Heavy data load stays lazy inside the module.
39
+ from .sequence import (identify_sequence, SequenceResult, SequenceMatch,
40
+ OEISIndex)
41
+
42
+ # Retrieval + scaffolds + verification tiers (NO LLM). ``solve`` pulls in
43
+ # numpy/scipy (declared deps); the heavier retrieval corpus/embedder imports
44
+ # stay lazy inside their modules.
45
+ from .map import (mapping_scaffold, MappingScaffold,
46
+ map_candidates, extract_signature, Mapping, Signature)
47
+ from .verify_apply import (applicability_checklist, Checklist,
48
+ verify_numeric_claim, verify_formal, verify_informal,
49
+ ApplyVerdict, Tier, Condition)
50
+ from .retrieve import Retriever, Candidate as RetrievedCandidate
51
+
52
+ # DISCOVERY + WEB-AUGMENTATION layer (NO LLM, no network, no API key).
53
+ # ramanujan -- PSLQ-over-richer-basis + Ramanujan-Machine continued-fraction
54
+ # conjectures, each numerically VERIFIED (provenance = conjecture).
55
+ # funsearch -- the deterministic HARNESS for AI-generated program search
56
+ # (sandboxed evaluate + on-disk MAP-Elites DB + few-shot status).
57
+ # webaug -- search_directive (tell the AI what to web-search) + add_finding
58
+ # (ingest a web result into the live corpus with NO model load).
59
+ from .ramanujan import (conjecture, ConjectureResult, integer_relations,
60
+ continued_fractions, simple_continued_fraction)
61
+ from .webaug import (search_directive, SearchDirective, add_finding,
62
+ AddFindingResult, search_findings, load_findings)
63
+
64
+ # OPTIONAL bring-your-own-LLM standalone helper (secondary; no vendor SDK).
65
+ from .solve import solve, Solution, AppliedResult
66
+ from .llm import LLM, EchoLLM
67
+
68
+ __version__ = "0.1.0"
69
+ __all__ = [
70
+ # numeric (airtight)
71
+ "identify", "Result", "Candidate",
72
+ "verify_closed_form", "VerifyResult",
73
+ "verify_numeric_claim",
74
+ # integer sequences (airtight OEIS exact term-match)
75
+ "identify_sequence", "SequenceResult", "SequenceMatch", "OEISIndex",
76
+ # provenance
77
+ "Provenance", "Novelty",
78
+ # search (no LLM)
79
+ "Retriever", "RetrievedCandidate",
80
+ # scaffolds + verification tiers (no LLM)
81
+ "mapping_scaffold", "MappingScaffold",
82
+ "applicability_checklist", "Checklist",
83
+ "verify_formal", "ApplyVerdict", "Tier", "Condition",
84
+ # discovery + web-augmentation (no LLM, no network)
85
+ "conjecture", "ConjectureResult", "integer_relations",
86
+ "continued_fractions", "simple_continued_fraction",
87
+ "search_directive", "SearchDirective", "add_finding", "AddFindingResult",
88
+ "search_findings", "load_findings",
89
+ # optional bring-your-own-LLM standalone path (secondary)
90
+ "solve", "Solution", "AppliedResult",
91
+ "map_candidates", "extract_signature", "Mapping", "Signature",
92
+ "verify_informal",
93
+ "LLM", "EchoLLM",
94
+ ]
mathlas/cli.py ADDED
@@ -0,0 +1,236 @@
1
+ """``mathlas`` command-line entry point — NO LLM, NO API key.
2
+
3
+ mathlas is a tool an AI *uses*; the CLI is a thin, human-facing view of the same
4
+ no-LLM capabilities the AI gets over MCP. Modes:
5
+
6
+ NUMERIC ``mathlas 1.6449340668482264`` -> the airtight constant->closed-form
7
+ path (engine.identify). Airtight, no LLM, no network. The default when
8
+ the argument parses as a single number.
9
+
10
+ SEQUENCE ``mathlas 1,1,2,3,5,8,13,21`` (or ``mathlas 1 1 2 3 5 8 13 21``)
11
+ -> identify an integer sequence against a LOCAL copy of OEIS by EXACT
12
+ term match (sequence.identify_sequence). Airtight, no LLM. The default
13
+ when the argument is two or more integers. Prints A-number/name/URL.
14
+
15
+ PROBLEM ``mathlas "Banach contraction gives a unique fixed point"``
16
+ -> search EXISTING math (our own index) and, for the top candidate,
17
+ print the needs<->guarantees SCAFFOLD + applicability CHECKLIST as
18
+ data — the structure an AI (or you) reasons over. NO LLM is called.
19
+ Uses a small built-in seed corpus by default; point ``--corpus DIR`` at
20
+ the open theorem dataset for the real index.
21
+
22
+ MCP ``mathlas mcp`` (or ``python -m mathlas.server``)
23
+ -> run the MCP server so an AI client can call the tools. Register in
24
+ Claude Code with: ``claude mcp add mathlas -- python -m mathlas.server``
25
+
26
+ Examples
27
+ --------
28
+ mathlas 1.6449340668482264364724151666460251892
29
+ mathlas --basis pi,e,catalan 0.915965594177219
30
+ mathlas 1,1,2,3,5,8,13,21 # OEIS: Fibonacci (A000045)
31
+ mathlas 2 3 5 7 11 13 # OEIS: the primes (A000040)
32
+ mathlas "a bounded sequence has a convergent subsequence" --k 5
33
+ mathlas "<problem>" --corpus reference/theorem-search-dataset --limit 5000
34
+ mathlas mcp
35
+ """
36
+ from __future__ import annotations
37
+
38
+ import argparse
39
+ import json
40
+ import re
41
+ import sys
42
+ from typing import List, Optional
43
+
44
+
45
+ def _looks_numeric(s: str) -> bool:
46
+ try:
47
+ float(s)
48
+ return True
49
+ except ValueError:
50
+ return False
51
+
52
+
53
+ def _parse_sequence(s: str) -> Optional[List[int]]:
54
+ """Parse ``s`` as an integer sequence if it looks like one (>= 2 comma- or
55
+ whitespace-separated integers, optionally wrapped in [] or ()), else None.
56
+
57
+ A single number is NOT a sequence (that is numeric mode). Any non-integer
58
+ token (e.g. a decimal or word) disqualifies the whole string."""
59
+ body = s.strip().strip("[]()").strip()
60
+ # split on commas and/or whitespace
61
+ toks = [t for t in re.split(r"[,\s]+", body) if t]
62
+ if len(toks) < 2:
63
+ return None
64
+ out: List[int] = []
65
+ for t in toks:
66
+ if not re.fullmatch(r"[+-]?\d+", t):
67
+ return None
68
+ out.append(int(t))
69
+ return out
70
+
71
+
72
+ def _looks_sequence(s: str) -> bool:
73
+ return _parse_sequence(s) is not None
74
+
75
+
76
+ def _run_numeric(value: str, args) -> int:
77
+ import mpmath
78
+ from .engine import identify
79
+ from .identify import DEFAULT_BASIS
80
+ basis = tuple(args.basis.split(",")) if args.basis else DEFAULT_BASIS
81
+ mpmath.mp.dps = max(args.dps_verify + 10, 60)
82
+ v = mpmath.mpf(value) # str -> full precision if more digits were given
83
+ res = identify(v, dps_search=args.dps_search, dps_verify=args.dps_verify,
84
+ min_digits=args.min_digits, basis=basis)
85
+ print(res)
86
+ if args.verbose and res.candidates:
87
+ for c in res.candidates:
88
+ print(f" candidate {c.expr}: verified {c.verify.digits_agreed} digits "
89
+ f"[{c.provenance.novelty.value}]")
90
+ return 0 if res.identified else 1
91
+
92
+
93
+ def _run_sequence(seq: List[int], args) -> int:
94
+ """Integer-sequence mode (NO LLM, airtight): exact term-match against a local
95
+ copy of OEIS. Prints the matching A-numbers, names and OEIS URLs."""
96
+ from .server import tool_identify_sequence
97
+ res = tool_identify_sequence(seq, max_results=args.k, data_dir=args.oeis_dir)
98
+ if args.json:
99
+ print(json.dumps(res, indent=2, default=str))
100
+ return 0 if res["identified"] else 1
101
+ print(f"# integer sequence {res['query']} — OEIS exact term-match:")
102
+ if not res["identified"]:
103
+ print(f" (no match) {res['note']}")
104
+ return 1
105
+ for m in res["matches"]:
106
+ where = "prefix" if m["exact_prefix"] else f"offset {m['offset']}"
107
+ print(f" {m['a_number']} [{where}] {m['url']}")
108
+ if m["name"]:
109
+ print(f" {m['name']}")
110
+ print(f"\n({res['note']})")
111
+ return 0
112
+
113
+
114
+ def _run_problem(problem: str, args) -> int:
115
+ """AI-driven, NO-LLM path: search existing math, then print the scaffold +
116
+ checklist the calling AI reasons over. mathlas itself never judges."""
117
+ from .server import (tool_search_existing_math, tool_mapping_scaffold,
118
+ tool_applicability_checklist)
119
+ search = tool_search_existing_math(problem, k=args.k, corpus_dir=args.corpus,
120
+ corpus_limit=args.limit)
121
+ if args.json:
122
+ out = {"search": search}
123
+ if search["candidates"]:
124
+ top = search["candidates"][0]["statement"]
125
+ out["mapping_scaffold"] = tool_mapping_scaffold(problem, top)
126
+ out["applicability_checklist"] = tool_applicability_checklist(top)
127
+ print(json.dumps(out, indent=2, default=str))
128
+ return 0 if search["candidates"] else 1
129
+
130
+ print(f"# search ({search['corpus']}) — top {args.k} candidate existing results:")
131
+ for c in search["candidates"]:
132
+ score = c["score"]
133
+ print(f" [{score:.4f}] {c['name'] or '(unnamed)'} -- {c['source'] or ''}")
134
+ print(f" {(c['slogan'] or c['statement'])[:160]}")
135
+ if not search["candidates"]:
136
+ print(" (no candidates; try --corpus DIR for a larger index)")
137
+ return 1
138
+
139
+ top = search["candidates"][0]
140
+ print(f"\n# needs<->guarantees SCAFFOLD for the top candidate "
141
+ f"({top['name'] or 'unnamed'}) — the AI reasons over this (NO LLM):")
142
+ sc = tool_mapping_scaffold(problem, top["statement"])
143
+ for q in sc["questions"]:
144
+ print(f" - {q}")
145
+ print("\n# applicability CHECKLIST (mark each against your problem):")
146
+ cl = tool_applicability_checklist(top["statement"])
147
+ for cond in cl["preconditions"]:
148
+ print(f" [ ] {cond['text']}")
149
+ print(f" => guarantees: {cl['conclusion']}")
150
+ print("\n(mathlas provided search + scaffold + checklist; an AI does the "
151
+ "judging. Run `mathlas mcp` to expose these as MCP tools.)")
152
+ return 0
153
+
154
+
155
+ def _run_mcp(_args) -> int:
156
+ from .server import main as server_main
157
+ return server_main()
158
+
159
+
160
+ def build_parser() -> argparse.ArgumentParser:
161
+ p = argparse.ArgumentParser(
162
+ prog="mathlas",
163
+ description="A tool FOR an AI: search existing math + airtight "
164
+ "verification + needs<->guarantees scaffolds. No API key.")
165
+ p.add_argument("query", nargs="*",
166
+ help="a number (numeric mode), an integer sequence such as "
167
+ "1,1,2,3,5,8 or '1 1 2 3 5 8' (sequence mode), a problem "
168
+ "description (problem mode), or 'mcp' to run the MCP server")
169
+ p.add_argument("--mode",
170
+ choices=["auto", "numeric", "sequence", "problem", "mcp"],
171
+ default="auto")
172
+ p.add_argument("-v", "--verbose", action="store_true")
173
+ p.add_argument("--json", action="store_true",
174
+ help="emit machine-readable JSON (problem/sequence mode)")
175
+
176
+ g = p.add_argument_group("numeric mode")
177
+ g.add_argument("--basis", help="comma-separated constant basis (e.g. pi,e,catalan)")
178
+ g.add_argument("--dps-search", type=int, default=30)
179
+ g.add_argument("--dps-verify", type=int, default=50)
180
+ g.add_argument("--min-digits", type=int, default=20)
181
+
182
+ g = p.add_argument_group("sequence mode")
183
+ g.add_argument("--oeis-dir", help="dir holding OEIS stripped.gz/names.gz "
184
+ "(omit to use the standard search path)")
185
+
186
+ g = p.add_argument_group("problem mode")
187
+ g.add_argument("--corpus", help="dir with the open theorem dataset parquets "
188
+ "(omit to use the built-in seed corpus)")
189
+ g.add_argument("--limit", type=int, default=5000,
190
+ help="cap corpus size (keep small without a GPU)")
191
+ g.add_argument("--k", type=int, default=10,
192
+ help="candidates/matches to return")
193
+ return p
194
+
195
+
196
+ def main(argv: Optional[List[str]] = None) -> int:
197
+ args = build_parser().parse_args(argv)
198
+ # ``query`` is now a list of tokens (nargs="*"). Reconstruct the user's intent:
199
+ # join multiple tokens into one string (so "1 1 2 3" -> a space-separated
200
+ # sequence, and a multi-word problem stays intact).
201
+ tokens: List[str] = args.query or []
202
+ query = " ".join(tokens).strip()
203
+ mode = args.mode
204
+
205
+ if mode == "auto":
206
+ if not query:
207
+ print("error: provide a number, an integer sequence, a problem "
208
+ "description, or 'mcp'", file=sys.stderr)
209
+ return 2
210
+ if query == "mcp":
211
+ mode = "mcp"
212
+ elif _looks_numeric(query): # a single bare number -> numeric
213
+ mode = "numeric"
214
+ elif _looks_sequence(query): # >= 2 integers -> sequence
215
+ mode = "sequence"
216
+ else:
217
+ mode = "problem"
218
+
219
+ if mode == "mcp":
220
+ return _run_mcp(args)
221
+ if not query:
222
+ print("error: a query is required for this mode", file=sys.stderr)
223
+ return 2
224
+ if mode == "numeric":
225
+ return _run_numeric(query, args)
226
+ if mode == "sequence":
227
+ seq = _parse_sequence(query)
228
+ if seq is None:
229
+ print(f"error: {query!r} is not an integer sequence", file=sys.stderr)
230
+ return 2
231
+ return _run_sequence(seq, args)
232
+ return _run_problem(query, args)
233
+
234
+
235
+ if __name__ == "__main__":
236
+ raise SystemExit(main())
mathlas/embed.py ADDED
@@ -0,0 +1,129 @@
1
+ """Pluggable dense-embedding backend for semantic retrieval.
2
+
3
+ Mirrors the ``llm.py`` design: the embedder is an interface, not a hardwired
4
+ vendor. The PRODUCTION path is an open-weights instruction-tuned text embedder
5
+ (Qwen3-Embedding, the current open MTEB SOTA — Zhang et al., arXiv:2506.05176);
6
+ the DEFAULT path needs no model download so validation runs CPU-only/offline.
7
+
8
+ Why Qwen3-Embedding for the production index
9
+ --------------------------------------------
10
+ Qwen3-Embedding (0.6B/4B/8B; 1024/2560/4096-dim, Matryoshka-truncatable) tops
11
+ MTEB as of mid-2026 and is exactly what TheoremSearch independently converged on
12
+ for the same corpus — but we build OUR OWN index with it, over the open dataset.
13
+ It is *instruction-aware*: prefixing the query with a task instruction gives a
14
+ documented +1-5% on retrieval, so ``Qwen3Embedder`` adds that prefix to queries
15
+ (never to documents), per the model card.
16
+
17
+ The corpus unit we embed is the natural-language *slogan/denotation* of a
18
+ theorem, NOT raw LaTeX (the load-bearing lesson: symbol-heavy LaTeX drowns dense
19
+ embedders; mathematicians query in prose). See ``retrieve/corpus.py``.
20
+
21
+ References
22
+ ----------
23
+ - Qwen3-Embedding: Zhang et al., "Qwen3 Embedding," arXiv:2506.05176 (2026).
24
+ - MTEB: Muennighoff et al., "MTEB: Massive Text Embedding Benchmark," 2023.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ from abc import ABC, abstractmethod
29
+ from typing import List, Optional, Sequence
30
+
31
+ import numpy as np
32
+
33
+
34
+ class Embedder(ABC):
35
+ """Text -> unit-norm dense vectors. Documents and queries embed the same
36
+ way unless a backend opts to instruction-prefix queries (Qwen3 does)."""
37
+
38
+ #: dimensionality of the returned vectors
39
+ dim: int
40
+
41
+ @abstractmethod
42
+ def encode(self, texts: Sequence[str], *, is_query: bool = False) -> np.ndarray:
43
+ """Return an ``(len(texts), dim)`` float32 array of L2-normalised rows."""
44
+ ...
45
+
46
+
47
+ def _l2norm(x: np.ndarray) -> np.ndarray:
48
+ n = np.linalg.norm(x, axis=1, keepdims=True)
49
+ n[n == 0] = 1.0
50
+ return (x / n).astype(np.float32)
51
+
52
+
53
+ class HashingEmbedder(Embedder):
54
+ """Zero-dependency, zero-download fallback embedder (the DEFAULT).
55
+
56
+ A deterministic word-hashing bag-of-words projected to ``dim`` and
57
+ L2-normalised. It is intentionally weak -- it exists so the retrieval +
58
+ fusion + map + verify pipeline can be exercised end-to-end on CPU with no
59
+ model weights. For any real corpus, plug in ``Qwen3Embedder`` (production)
60
+ or another open embedder. We deliberately keep the *dense channel* honest
61
+ by pairing it with BM25 in the hybrid retriever, so a weak dense backend
62
+ still yields a usable system (sparse carries exact-term matches).
63
+ """
64
+
65
+ def __init__(self, dim: int = 256, ngram: int = 1) -> None:
66
+ self.dim = int(dim)
67
+ self.ngram = int(ngram)
68
+
69
+ def _toks(self, text: str) -> List[str]:
70
+ words = "".join(c.lower() if (c.isalnum() or c.isspace()) else " "
71
+ for c in text).split()
72
+ if self.ngram <= 1:
73
+ return words
74
+ grams = words[:]
75
+ for n in range(2, self.ngram + 1):
76
+ grams += ["_".join(words[i:i + n]) for i in range(len(words) - n + 1)]
77
+ return grams
78
+
79
+ def encode(self, texts: Sequence[str], *, is_query: bool = False) -> np.ndarray:
80
+ out = np.zeros((len(texts), self.dim), dtype=np.float32)
81
+ for i, t in enumerate(texts):
82
+ for tok in self._toks(t):
83
+ h = hash((tok, "mathlas-embed")) % self.dim
84
+ sign = 1.0 if (hash((tok, "sign")) & 1) else -1.0
85
+ out[i, h] += sign
86
+ return _l2norm(out)
87
+
88
+
89
+ class Qwen3Embedder(Embedder):
90
+ """Open-weights production embedder (Qwen3-Embedding via sentence-transformers).
91
+
92
+ Lazy-loaded so importing mathlas never pulls torch/transformers unless a
93
+ caller actually constructs this. Heavy on CPU; run on GPU for the full
94
+ index. Validation in this repo uses ``HashingEmbedder`` to stay light.
95
+ """
96
+
97
+ # Query-side instruction (documents are embedded bare). Per the Qwen3 card,
98
+ # the instruction goes ONLY on the query and lifts retrieval 1-5%.
99
+ DEFAULT_INSTRUCT = (
100
+ "Given a mathematical problem or a description of a result, retrieve the "
101
+ "statement of the existing theorem or lemma that applies to it."
102
+ )
103
+
104
+ def __init__(self, model: str = "Qwen/Qwen3-Embedding-0.6B",
105
+ dim: Optional[int] = None, device: Optional[str] = None,
106
+ instruct: Optional[str] = DEFAULT_INSTRUCT) -> None:
107
+ try:
108
+ from sentence_transformers import SentenceTransformer
109
+ except ImportError as e: # pragma: no cover - optional heavy dep
110
+ raise ImportError(
111
+ "pip install 'mathlas[embed]' (sentence-transformers + torch) "
112
+ "for the Qwen3 production embedder; the default HashingEmbedder "
113
+ "needs no extra deps."
114
+ ) from e
115
+ self._st = SentenceTransformer(model, device=device,
116
+ truncate_dim=dim) # MRL truncation if set
117
+ self.dim = int(dim or self._st.get_sentence_embedding_dimension())
118
+ self._instruct = instruct
119
+
120
+ def encode(self, texts: Sequence[str], *, is_query: bool = False) -> np.ndarray:
121
+ kw = {}
122
+ if is_query and self._instruct:
123
+ kw["prompt"] = f"Instruct: {self._instruct}\nQuery: "
124
+ vecs = self._st.encode(list(texts), normalize_embeddings=True,
125
+ convert_to_numpy=True, **kw)
126
+ return vecs.astype(np.float32)
127
+
128
+
129
+ __all__ = ["Embedder", "HashingEmbedder", "Qwen3Embedder"]
mathlas/engine.py ADDED
@@ -0,0 +1,95 @@
1
+ """math_engine v0 facade -- the numeric beachhead.
2
+
3
+ ``identify(value)`` runs the full loop:
4
+
5
+ route (implicit: real constant)
6
+ -> identify (find an existing closed form)
7
+ -> verify (independent high-precision re-evaluation)
8
+ -> provenance (label the source; never "novel")
9
+
10
+ and returns a ``Result`` you can inspect or print.
11
+
12
+ Precision caveat: pass high-precision inputs as ``mpmath.mpf`` or as strings.
13
+ A Python ``float`` carries only ~15 digits, which limits PSLQ to the simplest
14
+ relations.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass
19
+ from typing import List, Optional
20
+
21
+ import mpmath
22
+
23
+ from .identify import identify_constant, DEFAULT_BASIS
24
+ from .verify import verify_closed_form, VerifyResult
25
+ from .provenance import Provenance, Novelty
26
+
27
+
28
+ def _pretty(expr: str) -> str:
29
+ """A human-readable form of a verified PSLQ expression for DISPLAY only.
30
+ Runs after verification, so it can never weaken the airtight check; on any
31
+ failure it falls back to the raw expression. ``pi**2*(sqrt(24)/12)**2`` ->
32
+ ``pi**2/6``; ``(1*catalan)`` -> ``Catalan``."""
33
+ try:
34
+ import sympy
35
+ from .verify import _to_sympy_expr
36
+ return str(sympy.simplify(_to_sympy_expr(expr)))
37
+ except Exception:
38
+ return expr
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class Candidate:
43
+ expr: str # the verified PSLQ expression (raw)
44
+ verify: VerifyResult
45
+ provenance: Provenance
46
+
47
+ @property
48
+ def display(self) -> str:
49
+ """Simplified, human-readable form (verification used ``expr``)."""
50
+ return _pretty(self.expr)
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class Result:
55
+ query: str
56
+ best: Optional[Candidate]
57
+ candidates: List[Candidate]
58
+
59
+ @property
60
+ def identified(self) -> bool:
61
+ return self.best is not None
62
+
63
+ def __str__(self) -> str:
64
+ if self.best is None:
65
+ return f"{self.query} -> UNIDENTIFIED (no verified closed form)"
66
+ b = self.best
67
+ return (f"{self.query} -> {b.display} "
68
+ f"[{b.provenance.novelty.value}, verified {b.verify.digits_agreed} digits]")
69
+
70
+
71
+ def identify(value, dps_search: int = 30, dps_verify: int = 50,
72
+ min_digits: int = 20, basis=DEFAULT_BASIS) -> Result:
73
+ """Find and verify an existing closed form for a real ``value``."""
74
+ with mpmath.workdps(dps_verify):
75
+ v = value if isinstance(value, mpmath.mpf) else mpmath.mpf(value)
76
+ query = mpmath.nstr(v, 15)
77
+
78
+ proposals = identify_constant(v, dps_search=dps_search, basis=basis)
79
+
80
+ cands: List[Candidate] = []
81
+ for expr in proposals:
82
+ vr = verify_closed_form(v, expr, dps_verify=dps_verify,
83
+ min_digits=min_digits)
84
+ if vr.ok:
85
+ prov = Provenance(
86
+ novelty=Novelty.KNOWN_FORM,
87
+ method="mpmath.identify+sympy-verify",
88
+ source=expr,
89
+ basis=tuple(basis),
90
+ )
91
+ cands.append(Candidate(expr=expr, verify=vr, provenance=prov))
92
+
93
+ cands.sort(key=lambda c: c.verify.digits_agreed, reverse=True)
94
+ best = cands[0] if cands else None
95
+ return Result(query=query, best=best, candidates=cands)