catapult-mcp 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write # trusted publishing (OIDC)
9
+ contents: read
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: astral-sh/setup-uv@v4
17
+ with:
18
+ version: "latest"
19
+ - run: uv venv
20
+ - run: uv pip install -e ".[dev]"
21
+ - run: uv run --no-sync pytest
22
+
23
+ publish:
24
+ needs: test
25
+ runs-on: ubuntu-latest
26
+ environment: pypi
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+ - uses: astral-sh/setup-uv@v4
30
+ with:
31
+ version: "latest"
32
+ - run: uv build
33
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,37 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+
14
+ # Testing
15
+ .pytest_cache/
16
+ .coverage
17
+ htmlcov/
18
+
19
+ # IDE
20
+ .idea/
21
+ .vscode/
22
+ *.swp
23
+ *.swo
24
+ *~
25
+
26
+ # OS
27
+ .DS_Store
28
+ Thumbs.db
29
+
30
+ # UV
31
+ uv.lock
32
+
33
+ # Data
34
+ *.acatome/
35
+ inbox/
36
+ completed/
37
+ errors/
@@ -0,0 +1,11 @@
1
+ # Changelog
2
+
3
+ All notable changes to **catapult-mcp** will be documented in this file.
4
+
5
+ Format follows [Keep a Changelog](https://keepachangelog.com/).
6
+
7
+ ## [0.1.0] — 2026-03-11
8
+
9
+ ### Added
10
+
11
+ - Initial release.
@@ -0,0 +1,17 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2024-2026 Reto Stamm
5
+
6
+ This program is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ This program is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: catapult-mcp
3
+ Version: 0.2.0
4
+ Summary: MCP server for querying heterogeneous catalysis databases
5
+ Author-email: Reto Stamm <reto@retostamm.com>
6
+ License-Expression: GPL-3.0-or-later
7
+ License-File: LICENSE
8
+ Keywords: catalysis,chemistry,database,materials-science,mcp
9
+ Requires-Python: >=3.11
10
+ Requires-Dist: chemdb-common>=0.1.0
11
+ Requires-Dist: mcp>=1.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: black>=24.0; extra == 'dev'
14
+ Requires-Dist: pytest>=8.0; extra == 'dev'
15
+ Requires-Dist: ruff>=0.5; extra == 'dev'
16
+ Provides-Extra: postgres
17
+ Requires-Dist: psycopg[binary]>=3.0; extra == 'postgres'
@@ -0,0 +1,32 @@
1
+ # catapult-mcp
2
+
3
+ MCP server for querying heterogeneous catalysis databases. Exposes reaction data, catalysts, and conditions via the Model Context Protocol.
4
+
5
+ ## Features
6
+
7
+ - **Reaction search** — query by catalyst, reactant, product, or conditions
8
+ - **SQLAlchemy backend** — SQLite (default) or PostgreSQL
9
+ - **MCP protocol** — compatible with any MCP-aware LLM client
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ uv pip install -e .
15
+ # With PostgreSQL support:
16
+ uv pip install -e ".[postgres]"
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```bash
22
+ catapult-mcp # starts the MCP server
23
+ ```
24
+
25
+ ## Dependencies
26
+
27
+ - **chemdb-common** — shared database models and CLI
28
+ - **mcp** — Model Context Protocol server framework
29
+
30
+ ## License
31
+
32
+ GPL-3.0-or-later — see [LICENSE](LICENSE).
@@ -0,0 +1,57 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "catapult-mcp"
7
+ version = "0.2.0"
8
+ description = "MCP server for querying heterogeneous catalysis databases"
9
+ requires-python = ">=3.11"
10
+ license = "GPL-3.0-or-later"
11
+ authors = [{name = "Reto Stamm", email = "reto@retostamm.com"}]
12
+ keywords = ["mcp", "catalysis", "chemistry", "materials-science", "database"]
13
+ dependencies = [
14
+ "chemdb-common>=0.1.0",
15
+ "mcp>=1.0",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ postgres = [
20
+ "psycopg[binary]>=3.0",
21
+ ]
22
+ dev = [
23
+ "pytest>=8.0",
24
+ "black>=24.0",
25
+ "ruff>=0.5",
26
+ ]
27
+
28
+ [project.scripts]
29
+ catapult-mcp = "catapult.server:main"
30
+
31
+ [tool.hatch.build.targets.wheel]
32
+ packages = ["src/catapult"]
33
+
34
+ [tool.pytest.ini_options]
35
+ testpaths = ["tests"]
36
+
37
+ [tool.black]
38
+ line-length = 88
39
+
40
+ [tool.ruff]
41
+ line-length = 88
42
+
43
+ [tool.bumpversion]
44
+ current_version = "0.2.0"
45
+ commit = true
46
+ tag = true
47
+ tag_name = "v{new_version}"
48
+
49
+ [[tool.bumpversion.files]]
50
+ filename = "pyproject.toml"
51
+ search = 'version = "{current_version}"'
52
+ replace = 'version = "{new_version}"'
53
+
54
+ [[tool.bumpversion.files]]
55
+ filename = "src/catapult/__init__.py"
56
+ search = '__version__ = "{current_version}"'
57
+ replace = '__version__ = "{new_version}"'
@@ -0,0 +1,3 @@
1
+ """CataPult — MCP server for heterogeneous catalysis databases."""
2
+
3
+ __version__ = "0.2.0"
File without changes
@@ -0,0 +1,238 @@
1
+ """Read-only queries for the catapult_get tool.
2
+
3
+ All queries are local SQL — no runtime API calls.
4
+ Shape is computed on-the-fly via SQL aggregation.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from sqlalchemy import func, text
12
+ from sqlalchemy.orm import Session
13
+
14
+ from chemdb.errors import IdNotFoundError, InvalidRangeError, NoResultsError
15
+ from chemdb.ranges import parse_range
16
+ from chemdb.sort import parse_sort
17
+
18
+ from catapult.db.schema import Reaction
19
+
20
+ PAGE_SIZE = 10
21
+
22
+ SORTABLE_FIELDS = {"energy", "barrier", "catalyst", "facet", "relevance"}
23
+
24
+
25
+ def get_by_id(session: Session, id_str: str) -> dict[str, Any]:
26
+ """Look up reactions by identifier (doi:, pub:, sys:, or bare string)."""
27
+ scheme, _, ident = id_str.partition(":")
28
+ if not ident:
29
+ # Bare string → publication ID search
30
+ ident = scheme
31
+ scheme = "pub"
32
+
33
+ q = session.query(Reaction)
34
+ if scheme == "doi":
35
+ q = q.filter(Reaction.doi == ident)
36
+ elif scheme == "pub":
37
+ q = q.filter(Reaction.pub_id.ilike(f"%{ident}%"))
38
+ elif scheme == "sys":
39
+ q = q.filter(Reaction.sys_id == ident)
40
+ else:
41
+ raise IdNotFoundError(id_str)
42
+
43
+ results = q.all()
44
+ if not results:
45
+ raise IdNotFoundError(id_str)
46
+
47
+ return {
48
+ "type": "publication",
49
+ "id": id_str,
50
+ "total": len(results),
51
+ "results": [_rxn_to_row(r) for r in results[:PAGE_SIZE]],
52
+ }
53
+
54
+
55
+ def search(
56
+ session: Session,
57
+ *,
58
+ query: str = "",
59
+ catalyst: str = "",
60
+ facet: str = "",
61
+ reactants: str = "",
62
+ products: str = "",
63
+ energy: str = "",
64
+ barrier: str = "",
65
+ functional: str = "",
66
+ database: str = "",
67
+ sort: str = "",
68
+ page: int = 1,
69
+ ) -> dict[str, Any]:
70
+ """Filter search with shape + paginated results."""
71
+ q = session.query(Reaction)
72
+
73
+ # Catalyst filter (OR logic for comparison)
74
+ catalysts = []
75
+ if catalyst:
76
+ catalysts = [c.strip() for c in catalyst.split(",") if c.strip()]
77
+ if len(catalysts) == 1:
78
+ q = q.filter(Reaction.catalyst.ilike(f"%{catalysts[0]}%"))
79
+ else:
80
+ from sqlalchemy import or_
81
+
82
+ q = q.filter(or_(*[Reaction.catalyst.ilike(f"%{c}%") for c in catalysts]))
83
+
84
+ if facet:
85
+ q = q.filter(Reaction.facet == facet)
86
+
87
+ if reactants:
88
+ q = q.filter(Reaction.reactants.ilike(f"%{reactants}%"))
89
+
90
+ if products:
91
+ q = q.filter(Reaction.products.ilike(f"%{products}%"))
92
+
93
+ # Range filters
94
+ for field_name, raw in [("energy", energy), ("barrier", barrier)]:
95
+ if raw:
96
+ try:
97
+ r = parse_range(raw)
98
+ except ValueError:
99
+ raise InvalidRangeError(field_name, raw)
100
+ clause, params = r.to_sql_clause(field_name)
101
+ q = q.filter(text(clause).bindparams(**params))
102
+
103
+ if functional:
104
+ q = q.filter(Reaction.functional.ilike(functional))
105
+
106
+ if database:
107
+ q = q.filter(Reaction.database.ilike(database))
108
+
109
+ total = q.count()
110
+ if total == 0:
111
+ raise NoResultsError()
112
+
113
+ # Comparison mode: multiple catalysts
114
+ is_comparison = len(catalysts) > 1
115
+
116
+ # Sort
117
+ sort_fields = parse_sort(sort, SORTABLE_FIELDS) if sort else []
118
+ if sort_fields:
119
+ for sf in sort_fields:
120
+ if sf.name == "relevance":
121
+ continue
122
+ col = getattr(Reaction, sf.name, None)
123
+ if col is not None:
124
+ q = q.order_by(col.desc() if sf.descending else col.asc())
125
+ else:
126
+ q = q.order_by(Reaction.energy.asc())
127
+
128
+ offset = (page - 1) * PAGE_SIZE
129
+ results = q.offset(offset).limit(PAGE_SIZE).all()
130
+
131
+ shape = _compute_shape(session, q) if page == 1 else None
132
+
133
+ # For comparison mode, also compute per-catalyst best
134
+ comparison = None
135
+ if is_comparison and page == 1:
136
+ comparison = _compute_comparison(session, q, catalysts)
137
+
138
+ return {
139
+ "total": total,
140
+ "page": page,
141
+ "page_size": PAGE_SIZE,
142
+ "shape": shape,
143
+ "comparison": comparison,
144
+ "results": [_rxn_to_row(r) for r in results],
145
+ }
146
+
147
+
148
+ def get_shape(session: Session) -> dict[str, Any]:
149
+ """Shape of entire database (no-arg call)."""
150
+ total = session.query(Reaction).count()
151
+ if total == 0:
152
+ return {"total": 0}
153
+
154
+ shape = _compute_shape(session, session.query(Reaction))
155
+ shape["total"] = total
156
+ return shape
157
+
158
+
159
+ def _compute_shape(session: Session, q) -> dict[str, Any]:
160
+ """Compute shape statistics from a query."""
161
+ stats = session.query(
162
+ func.count(Reaction.id),
163
+ func.min(Reaction.energy),
164
+ func.max(Reaction.energy),
165
+ func.avg(Reaction.energy),
166
+ func.min(Reaction.barrier),
167
+ func.max(Reaction.barrier),
168
+ func.avg(Reaction.barrier),
169
+ ).one()
170
+
171
+ # Top catalysts
172
+ cat_counts = (
173
+ session.query(Reaction.catalyst, func.count(Reaction.id))
174
+ .group_by(Reaction.catalyst)
175
+ .order_by(func.count(Reaction.id).desc())
176
+ .limit(10)
177
+ .all()
178
+ )
179
+
180
+ # Top facets
181
+ facet_counts = (
182
+ session.query(Reaction.facet, func.count(Reaction.id))
183
+ .filter(Reaction.facet.isnot(None), Reaction.facet != "")
184
+ .group_by(Reaction.facet)
185
+ .order_by(func.count(Reaction.id).desc())
186
+ .limit(10)
187
+ .all()
188
+ )
189
+
190
+ # Functionals
191
+ func_counts = (
192
+ session.query(Reaction.functional, func.count(Reaction.id))
193
+ .filter(Reaction.functional.isnot(None), Reaction.functional != "")
194
+ .group_by(Reaction.functional)
195
+ .order_by(func.count(Reaction.id).desc())
196
+ .limit(10)
197
+ .all()
198
+ )
199
+
200
+ return {
201
+ "energy": {"min": stats[1], "max": stats[2], "avg": stats[3]},
202
+ "barrier": {"min": stats[4], "max": stats[5], "avg": stats[6]},
203
+ "catalysts": {cat: count for cat, count in cat_counts},
204
+ "facets": {f: count for f, count in facet_counts},
205
+ "functionals": {f: count for f, count in func_counts},
206
+ }
207
+
208
+
209
+ def _compute_comparison(
210
+ session: Session, q, catalysts: list[str]
211
+ ) -> list[dict[str, Any]]:
212
+ """Best (lowest energy) row per catalyst for comparison table."""
213
+ rows = []
214
+ for cat in catalysts:
215
+ best = (
216
+ q.filter(Reaction.catalyst.ilike(f"%{cat}%"))
217
+ .order_by(Reaction.energy.asc())
218
+ .first()
219
+ )
220
+ if best:
221
+ rows.append(_rxn_to_row(best))
222
+ return rows
223
+
224
+
225
+ def _rxn_to_row(r: Reaction) -> dict[str, Any]:
226
+ """Summary row for a reaction."""
227
+ return {
228
+ "equation": r.equation,
229
+ "catalyst": r.catalyst,
230
+ "facet": r.facet,
231
+ "energy": r.energy,
232
+ "barrier": r.barrier,
233
+ "site": r.site,
234
+ "functional": r.functional,
235
+ "database": r.database,
236
+ "doi": r.doi,
237
+ "pub_id": r.pub_id,
238
+ }
@@ -0,0 +1,65 @@
1
+ """SQLAlchemy models for local catalysis reaction data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+
7
+ from sqlalchemy import (
8
+ Column,
9
+ DateTime,
10
+ Float,
11
+ Index,
12
+ Integer,
13
+ String,
14
+ Text,
15
+ )
16
+ from sqlalchemy.orm import DeclarativeBase
17
+
18
+ SCHEMA = "catapult"
19
+
20
+
21
+ class Base(DeclarativeBase):
22
+ pass
23
+
24
+
25
+ class Reaction(Base):
26
+ """A DFT-computed catalytic reaction."""
27
+
28
+ __tablename__ = "reactions"
29
+ __table_args__ = {"schema": SCHEMA}
30
+
31
+ id = Column(Integer, primary_key=True, autoincrement=True)
32
+ equation = Column(Text, nullable=False)
33
+ catalyst = Column(String(128), nullable=False, index=True)
34
+ facet = Column(String(64), nullable=True, index=True)
35
+ reactants = Column(Text, nullable=True) # comma-separated
36
+ products = Column(Text, nullable=True) # comma-separated
37
+ energy = Column(Float, nullable=True) # reaction energy ΔE, eV
38
+ barrier = Column(Float, nullable=True) # activation barrier Ea, eV
39
+ site = Column(Text, nullable=True)
40
+ functional = Column(String(64), nullable=True)
41
+ dft_code = Column(String(64), nullable=True)
42
+ database = Column(String(32), nullable=False) # cathub, mp
43
+ doi = Column(Text, nullable=True)
44
+ pub_id = Column(String(128), nullable=True, index=True)
45
+ sys_id = Column(String(128), nullable=True)
46
+
47
+
48
+ class SyncLog(Base):
49
+ """Sync history."""
50
+
51
+ __tablename__ = "sync_log"
52
+ __table_args__ = {"schema": SCHEMA}
53
+
54
+ id = Column(Integer, primary_key=True, autoincrement=True)
55
+ source = Column(String(64), nullable=False)
56
+ synced_at = Column(DateTime, nullable=False, default=datetime.utcnow)
57
+ row_count = Column(Integer, nullable=False, default=0)
58
+ duration_s = Column(Float, nullable=True)
59
+
60
+
61
+ # Indexes for common queries
62
+ Index("idx_reactions_energy", Reaction.energy)
63
+ Index("idx_reactions_barrier", Reaction.barrier)
64
+ Index("idx_reactions_functional", Reaction.functional)
65
+ Index("idx_reactions_database", Reaction.database)
@@ -0,0 +1,202 @@
1
+ """Sync upstream catalysis databases to local storage.
2
+
3
+ Fetches data from Catalysis-Hub GraphQL and (optionally) Materials Project,
4
+ then upserts into the local DB (Postgres schema or SQLite file).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import time
11
+ from datetime import datetime
12
+
13
+ from chemdb.config import ChemdbConfig
14
+ from chemdb.db import ensure_schema, make_engine, make_session
15
+ from catapult.db.schema import SCHEMA, Base, Reaction, SyncLog
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+ CATHUB_GRAPHQL = "https://api.catalysis-hub.org/graphql"
20
+ CATHUB_PAGE_SIZE = 200
21
+
22
+
23
+ class CatapultSyncer:
24
+ """Downloads catalysis data from upstream sources into local DB."""
25
+
26
+ def __init__(self, config: ChemdbConfig):
27
+ self.config = config
28
+ self.engine = make_engine(config, SCHEMA)
29
+ self.Session = make_session(self.engine)
30
+
31
+ def run(self, *, force: bool = False):
32
+ """Run the full sync."""
33
+ ensure_schema(self.engine, SCHEMA)
34
+
35
+ if force:
36
+ Base.metadata.drop_all(self.engine)
37
+
38
+ Base.metadata.create_all(self.engine)
39
+
40
+ t0 = time.time()
41
+
42
+ count = self._sync_cathub()
43
+ if self.config.mp_api_key:
44
+ count += self._sync_mp()
45
+
46
+ duration = time.time() - t0
47
+ self._log_sync("all", count, duration)
48
+ log.info("Sync complete: %d rows in %.1fs", count, duration)
49
+
50
+ def _sync_cathub(self) -> int:
51
+ """Fetch reactions from Catalysis-Hub GraphQL API.
52
+
53
+ Uses keyset pagination: id > last_id, ordered by id.
54
+ """
55
+ import base64
56
+ import httpx
57
+
58
+ log.info("Syncing from Catalysis-Hub...")
59
+ count = 0
60
+ last_id = 0
61
+
62
+ with self.Session() as session:
63
+ while True:
64
+ query = _cathub_query(last_id, CATHUB_PAGE_SIZE)
65
+ try:
66
+ resp = httpx.post(
67
+ CATHUB_GRAPHQL,
68
+ json={"query": query},
69
+ timeout=60,
70
+ )
71
+ resp.raise_for_status()
72
+ except Exception as exc:
73
+ log.error("Catalysis-Hub fetch failed: %s", exc)
74
+ break
75
+
76
+ data = resp.json()
77
+ reactions = data.get("data", {}).get("reactions")
78
+ if reactions is None:
79
+ errors = data.get("errors", [])
80
+ log.error("GraphQL error: %s", errors)
81
+ break
82
+
83
+ edges = reactions.get("edges", [])
84
+ if not edges:
85
+ break
86
+
87
+ for edge in edges:
88
+ node = edge.get("node", {})
89
+ # Extract numeric id from base64 "Reaction:NNN"
90
+ raw_id = node.get("id", "")
91
+ try:
92
+ decoded = base64.b64decode(raw_id).decode()
93
+ last_id = int(decoded.split(":", 1)[1])
94
+ except Exception:
95
+ pass
96
+
97
+ rxn = self._cathub_to_model(node)
98
+ if rxn:
99
+ session.add(rxn)
100
+ count += 1
101
+
102
+ session.flush()
103
+ if count % 5000 < CATHUB_PAGE_SIZE:
104
+ session.commit()
105
+ log.info(" %d reactions (last_id=%d)...", count, last_id)
106
+
107
+ if len(edges) < CATHUB_PAGE_SIZE:
108
+ break
109
+
110
+ session.commit()
111
+
112
+ return count
113
+
114
+ def _sync_mp(self) -> int:
115
+ """Fetch bulk properties from Materials Project."""
116
+ log.info("MP sync: not yet implemented (needs mp-api client)")
117
+ return 0
118
+
119
+ def _cathub_to_model(self, node: dict) -> Reaction | None:
120
+ """Convert a Catalysis-Hub GraphQL node to a Reaction model."""
121
+ equation = node.get("Equation", "")
122
+ if not equation:
123
+ return None
124
+
125
+ # Parse reactants/products from equation
126
+ reactants = ""
127
+ products = ""
128
+ if " -> " in equation:
129
+ parts = equation.split(" -> ", 1)
130
+ reactants = parts[0].strip()
131
+ products = parts[1].strip()
132
+
133
+ # Prefer JSON reactants/products fields if available, else parse equation
134
+ raw_reactants = node.get("reactants") or ""
135
+ raw_products = node.get("products") or ""
136
+ if raw_reactants and isinstance(raw_reactants, str) and raw_reactants != "{}":
137
+ reactants = raw_reactants
138
+ if raw_products and isinstance(raw_products, str) and raw_products != "{}":
139
+ products = raw_products
140
+
141
+ return Reaction(
142
+ equation=equation,
143
+ catalyst=node.get("chemicalComposition", ""),
144
+ facet=node.get("facet", ""),
145
+ reactants=reactants,
146
+ products=products,
147
+ energy=_float(node.get("reactionEnergy")),
148
+ barrier=_float(node.get("activationEnergy")),
149
+ site=node.get("sites", ""),
150
+ functional=node.get("dftFunctional", ""),
151
+ dft_code=node.get("dftCode", ""),
152
+ database="cathub",
153
+ pub_id=node.get("pubId"),
154
+ )
155
+
156
+ def _log_sync(self, source: str, count: int, duration: float):
157
+ with self.Session() as session:
158
+ session.add(
159
+ SyncLog(
160
+ source=source,
161
+ synced_at=datetime.utcnow(),
162
+ row_count=count,
163
+ duration_s=duration,
164
+ )
165
+ )
166
+ session.commit()
167
+
168
+
169
+ def _cathub_query(last_id: int, page_size: int) -> str:
170
+ """Build a Catalysis-Hub GraphQL query with keyset pagination."""
171
+ id_filter = f', id: {last_id}, op: ">"' if last_id > 0 else ""
172
+ return f"""{{
173
+ reactions(first: {page_size}, order: "id"{id_filter}) {{
174
+ totalCount
175
+ edges {{
176
+ node {{
177
+ id
178
+ Equation
179
+ reactionEnergy
180
+ activationEnergy
181
+ chemicalComposition
182
+ surfaceComposition
183
+ facet
184
+ sites
185
+ reactants
186
+ products
187
+ pubId
188
+ dftCode
189
+ dftFunctional
190
+ }}
191
+ }}
192
+ }}
193
+ }}"""
194
+
195
+
196
+ def _float(v) -> float | None:
197
+ if v is None:
198
+ return None
199
+ try:
200
+ return float(v)
201
+ except (ValueError, TypeError):
202
+ return None
File without changes
@@ -0,0 +1,73 @@
1
+ """MCP server for CataPult — one tool: catapult_get.
2
+
3
+ Queries heterogeneous catalysis databases (Catalysis-Hub, Materials Project).
4
+ All data is local (Postgres or SQLite). Run ``chemdb sync catapult`` first.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from mcp.server.fastmcp import FastMCP
10
+
11
+ from catapult import tool
12
+
13
+ mcp = FastMCP("catapult")
14
+
15
+
16
+ @mcp.tool()
17
+ def catapult_get(
18
+ id: str = "",
19
+ query: str = "",
20
+ catalyst: str = "",
21
+ facet: str = "",
22
+ reactants: str = "",
23
+ products: str = "",
24
+ energy: str = "",
25
+ barrier: str = "",
26
+ functional: str = "",
27
+ database: str = "",
28
+ sort: str = "",
29
+ page: int = 1,
30
+ ) -> str:
31
+ """Query heterogeneous catalysis databases.
32
+
33
+ id: identifier — "doi:10.1021/...", "pub:xyz", "sys:abc"
34
+ query: FTS5 full-text search (always AND with filters)
35
+ catalyst: composition, OR/comparison logic ("Pd", "Pd,Pt,Cu" = compare)
36
+ facet: Miller index ("111", "100")
37
+ reactants: reactant formula ("CO", "H2O")
38
+ products: product formula
39
+ energy: reaction energy range, eV ("-1.5..0", "<-0.5")
40
+ barrier: activation barrier range, eV ("<1.0")
41
+ functional: DFT method filter ("BEEF-vdW", "PBE")
42
+ database: source filter ("cathub", "mp")
43
+ sort: sort order ("energy", "!barrier", "energy,catalyst")
44
+ page: page number (10 results/page)
45
+
46
+ No args → shape of entire database (catalyst counts, energy ranges, top facets).
47
+ id provided → all reactions from that publication.
48
+ Any filter → search with results + shape on page 1.
49
+ Multi-catalyst → comparison table (best per catalyst).
50
+ """
51
+ return tool.catapult_get(
52
+ id=id,
53
+ query=query,
54
+ catalyst=catalyst,
55
+ facet=facet,
56
+ reactants=reactants,
57
+ products=products,
58
+ energy=energy,
59
+ barrier=barrier,
60
+ functional=functional,
61
+ database=database,
62
+ sort=sort,
63
+ page=page,
64
+ )
65
+
66
+
67
+ def main():
68
+ """Run the MCP server (stdio transport)."""
69
+ mcp.run()
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
@@ -0,0 +1,279 @@
1
+ """catapult_get implementation — formats query results as markdown strings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from chemdb.cite import format_citation
8
+ from chemdb.config import ChemdbConfig, load_config
9
+ from chemdb.db import make_engine, make_session
10
+ from chemdb.errors import ChemdbError
11
+
12
+ from catapult.db.query import PAGE_SIZE, get_by_id, get_shape, search
13
+ from catapult.db.schema import SCHEMA
14
+
15
+ _config: ChemdbConfig | None = None
16
+ _SessionFactory = None
17
+
18
+
19
+ def _get_session():
20
+ global _config, _SessionFactory
21
+ if _SessionFactory is None:
22
+ _config = load_config()
23
+ engine = make_engine(_config, SCHEMA)
24
+ _SessionFactory = make_session(engine)
25
+ return _SessionFactory()
26
+
27
+
28
+ def catapult_get(
29
+ id: str = "",
30
+ query: str = "",
31
+ catalyst: str = "",
32
+ facet: str = "",
33
+ reactants: str = "",
34
+ products: str = "",
35
+ energy: str = "",
36
+ barrier: str = "",
37
+ functional: str = "",
38
+ database: str = "",
39
+ sort: str = "",
40
+ page: int = 1,
41
+ ) -> str:
42
+ """Query heterogeneous catalysis databases.
43
+
44
+ Returns formatted markdown with results, shape, and hints.
45
+ """
46
+ try:
47
+ session = _get_session()
48
+
49
+ # ID lookup mode
50
+ if id:
51
+ data = get_by_id(session, id)
52
+ return _format_publication(data)
53
+
54
+ # No args → shape of entire database
55
+ has_filters = any(
56
+ [
57
+ query,
58
+ catalyst,
59
+ facet,
60
+ reactants,
61
+ products,
62
+ energy,
63
+ barrier,
64
+ functional,
65
+ database,
66
+ sort,
67
+ ]
68
+ )
69
+ if not has_filters and page == 1:
70
+ data = get_shape(session)
71
+ return _format_shape(data)
72
+
73
+ # Filter search
74
+ data = search(
75
+ session,
76
+ query=query,
77
+ catalyst=catalyst,
78
+ facet=facet,
79
+ reactants=reactants,
80
+ products=products,
81
+ energy=energy,
82
+ barrier=barrier,
83
+ functional=functional,
84
+ database=database,
85
+ sort=sort,
86
+ page=page,
87
+ )
88
+ return _format_search(data, catalyst=catalyst)
89
+
90
+ except ChemdbError as exc:
91
+ return exc.to_markdown()
92
+ except Exception as exc:
93
+ return f"⚠ Internal error: {exc}"
94
+
95
+
96
+ # ── Formatters ──────────────────────────────────────────────────────
97
+
98
+
99
+ def _format_publication(data: dict[str, Any]) -> str:
100
+ """Format publication/ID lookup results."""
101
+ total = data["total"]
102
+ results = data["results"]
103
+
104
+ lines = [f"## {total} reactions for {data['id']}"]
105
+ lines.append("")
106
+
107
+ for i, r in enumerate(results, 1):
108
+ lines.append(_format_rxn_line(i, r))
109
+
110
+ if total > PAGE_SIZE:
111
+ lines.append("")
112
+ lines.append(f'→ catapult_get(id="{data["id"]}", page=2) for next {PAGE_SIZE}')
113
+
114
+ return "\n".join(lines)
115
+
116
+
117
+ def _format_shape(data: dict[str, Any]) -> str:
118
+ """Format full database shape (no-arg call)."""
119
+ total = data.get("total", 0)
120
+
121
+ lines = [f"## CataPult — ~{total:,} reactions"]
122
+ lines.append("")
123
+
124
+ cats = data.get("catalysts", {})
125
+ if cats:
126
+ lines.append("### Top catalysts (by reaction count)")
127
+ parts = [f"{cat} {count:,}" for cat, count in list(cats.items())[:8]]
128
+ lines.append(" | ".join(parts))
129
+
130
+ facets = data.get("facets", {})
131
+ if facets:
132
+ lines.append("")
133
+ lines.append("### Facets")
134
+ parts = [f"{f} {count:,}" for f, count in list(facets.items())[:8]]
135
+ lines.append(" | ".join(parts))
136
+
137
+ e = data.get("energy", {})
138
+ b = data.get("barrier", {})
139
+ if e.get("min") is not None:
140
+ lines.append("")
141
+ lines.append("### Energy ranges")
142
+ lines.append("| Property | Min | Avg | Max | Unit |")
143
+ lines.append("|----------|-----|-----|-----|------|")
144
+ if e.get("min") is not None:
145
+ lines.append(
146
+ f"| ΔE (reaction) | {e['min']:.1f} | {e['avg']:.1f} | {e['max']:.1f} | eV |"
147
+ )
148
+ if b.get("min") is not None:
149
+ lines.append(
150
+ f"| Ea (barrier) | {b['min']:.1f} | {b['avg']:.1f} | {b['max']:.1f} | eV |"
151
+ )
152
+
153
+ funcs = data.get("functionals", {})
154
+ if funcs:
155
+ lines.append("")
156
+ lines.append("### Functionals")
157
+ parts = [f"{f} {count:,}" for f, count in list(funcs.items())[:6]]
158
+ lines.append(" | ".join(parts))
159
+
160
+ lines.append("")
161
+ lines.append("### Sortable fields")
162
+ lines.append("energy, barrier, catalyst, facet")
163
+ lines.append("")
164
+ lines.append('→ catapult_get(catalyst="Pd") to browse palladium reactions')
165
+ lines.append(
166
+ '→ catapult_get(reactants="CO", catalyst="Pd,Pt,Cu", facet="111") to compare metals'
167
+ )
168
+ lines.append('→ catapult_get(query="oxygen evolution") for free-text search')
169
+
170
+ return "\n".join(lines)
171
+
172
+
173
+ def _format_search(data: dict[str, Any], catalyst: str = "") -> str:
174
+ """Format search results with shape + hints."""
175
+ total = data["total"]
176
+ page = data["page"]
177
+ start = (page - 1) * PAGE_SIZE + 1
178
+ end = min(start + PAGE_SIZE - 1, total)
179
+ comparison = data.get("comparison")
180
+
181
+ # Comparison mode
182
+ if comparison:
183
+ return _format_comparison(data, catalyst)
184
+
185
+ lines = [f"## {total} reactions (showing {start}–{end})"]
186
+ lines.append("")
187
+
188
+ # Shape on page 1
189
+ shape = data.get("shape")
190
+ if shape:
191
+ lines.append("### Shape")
192
+ e = shape.get("energy", {})
193
+ b = shape.get("barrier", {})
194
+ if e.get("min") is not None:
195
+ lines.append(
196
+ f" ΔE {e['min']:.1f} to {e['max']:.1f} eV (mean {e['avg']:.2f})"
197
+ )
198
+ if b.get("min") is not None:
199
+ lines.append(
200
+ f" Ea {b['min']:.1f}–{b['max']:.1f} eV (mean {b['avg']:.2f})"
201
+ )
202
+ facets = shape.get("facets", {})
203
+ if facets:
204
+ parts = [f"{f} ×{count}" for f, count in list(facets.items())[:5]]
205
+ lines.append(f" Facets: {' | '.join(parts)}")
206
+ funcs = shape.get("functionals", {})
207
+ if funcs:
208
+ parts = [f"{f} {count}" for f, count in list(funcs.items())[:4]]
209
+ lines.append(f" Functionals: {' | '.join(parts)}")
210
+ lines.append("")
211
+
212
+ for i, r in enumerate(data["results"], start):
213
+ lines.append(_format_rxn_line(i, r))
214
+
215
+ # Hints
216
+ lines.append("")
217
+ if end < total:
218
+ lines.append(f"→ catapult_get(..., page={page + 1}) for next {PAGE_SIZE}")
219
+ if data["results"] and data["results"][0].get("doi"):
220
+ doi = data["results"][0]["doi"]
221
+ lines.append(f'→ catapult_get(id="doi:{doi}") for full pub details')
222
+
223
+ return "\n".join(lines)
224
+
225
+
226
+ def _format_comparison(data: dict[str, Any], catalyst: str) -> str:
227
+ """Format multi-catalyst comparison table."""
228
+ comparison = data["comparison"]
229
+ total = data["total"]
230
+
231
+ lines = [f"## Comparison — {len(comparison)} catalysts ({total} total reactions)"]
232
+ lines.append("")
233
+ lines.append("| Surface | ΔE (eV) | Ea (eV) | Site | Functional | Source |")
234
+ lines.append("|---------|---------|---------|------|------------|--------|")
235
+
236
+ for r in comparison:
237
+ cat_facet = r.get("catalyst", "?")
238
+ if r.get("facet"):
239
+ cat_facet += f"({r['facet']})"
240
+ energy_s = f"{r['energy']:.2f}" if r.get("energy") is not None else "?"
241
+ barrier_s = f"{r['barrier']:.2f}" if r.get("barrier") is not None else "?"
242
+ cite = r.get("doi", "")
243
+ if cite:
244
+ cite = f"[@{cite}]"
245
+ lines.append(
246
+ f"| {cat_facet} | {energy_s} | {barrier_s} | {r.get('site', '')} | {r.get('functional', '')} | {cite} |"
247
+ )
248
+
249
+ lines.append("")
250
+ cats = catalyst.split(",")
251
+ if cats:
252
+ lines.append(
253
+ f'→ catapult_get(catalyst="{cats[0].strip()}") for all {cats[0].strip()} reactions'
254
+ )
255
+
256
+ return "\n".join(lines)
257
+
258
+
259
+ def _format_rxn_line(i: int, r: dict[str, Any]) -> str:
260
+ """Format a single reaction result line."""
261
+ cat_facet = r.get("catalyst", "?")
262
+ if r.get("facet"):
263
+ cat_facet += f"({r['facet']})"
264
+
265
+ parts = [f"{cat_facet} — {r.get('equation', '?')}"]
266
+ if r.get("energy") is not None:
267
+ parts.append(f"ΔE {r['energy']:.2f} eV")
268
+ if r.get("barrier") is not None:
269
+ parts.append(f"Ea {r['barrier']:.2f} eV")
270
+ if r.get("functional"):
271
+ parts.append(r["functional"])
272
+
273
+ line = f"{i:>2}. {parts[0]} {' | '.join(parts[1:])}"
274
+
275
+ if r.get("doi"):
276
+ cite = format_citation(r["doi"])
277
+ line += f"\n {cite}"
278
+
279
+ return line
@@ -0,0 +1,114 @@
1
+ """Shared fixtures for catapult tests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+ from sqlalchemy import create_engine, event
7
+ from sqlalchemy.orm import sessionmaker
8
+
9
+ from catapult.db.schema import Base, Reaction
10
+
11
+
12
+ @pytest.fixture
13
+ def engine():
14
+ """In-memory SQLite engine with schema tables."""
15
+ eng = create_engine(
16
+ "sqlite:///:memory:",
17
+ execution_options={"schema_translate_map": {"catapult": None}},
18
+ )
19
+
20
+ @event.listens_for(eng, "connect")
21
+ def _set_pragma(dbapi_conn, connection_record):
22
+ cursor = dbapi_conn.cursor()
23
+ cursor.execute("PRAGMA journal_mode=WAL")
24
+ cursor.close()
25
+
26
+ Base.metadata.create_all(eng)
27
+ return eng
28
+
29
+
30
+ @pytest.fixture
31
+ def session(engine):
32
+ """Session with sample reaction data."""
33
+ Session = sessionmaker(bind=engine)
34
+ s = Session()
35
+
36
+ reactions = [
37
+ Reaction(
38
+ equation="CO* -> CO2",
39
+ catalyst="Pd",
40
+ facet="111",
41
+ reactants="CO",
42
+ products="CO2",
43
+ energy=-0.72,
44
+ barrier=0.89,
45
+ site="fcc",
46
+ functional="BEEF-vdW",
47
+ database="cathub",
48
+ doi="10.1021/acscatal.7b02335",
49
+ pub_id="MedfordEtAl2017",
50
+ sys_id="rxn_001",
51
+ ),
52
+ Reaction(
53
+ equation="CO* -> CO2",
54
+ catalyst="Pd",
55
+ facet="100",
56
+ reactants="CO",
57
+ products="CO2",
58
+ energy=-0.58,
59
+ barrier=1.02,
60
+ site="bridge",
61
+ functional="BEEF-vdW",
62
+ database="cathub",
63
+ doi="10.1021/acscatal.7b02335",
64
+ pub_id="MedfordEtAl2017",
65
+ sys_id="rxn_002",
66
+ ),
67
+ Reaction(
68
+ equation="CO* -> CO2",
69
+ catalyst="Pt",
70
+ facet="111",
71
+ reactants="CO",
72
+ products="CO2",
73
+ energy=-0.91,
74
+ barrier=0.78,
75
+ site="atop",
76
+ functional="BEEF-vdW",
77
+ database="cathub",
78
+ doi="10.1021/acscatal.7b02335",
79
+ pub_id="MedfordEtAl2017",
80
+ sys_id="rxn_003",
81
+ ),
82
+ Reaction(
83
+ equation="H2O* -> OH* + H*",
84
+ catalyst="Cu",
85
+ facet="111",
86
+ reactants="H2O",
87
+ products="OH,H",
88
+ energy=-0.33,
89
+ barrier=1.35,
90
+ site="fcc",
91
+ functional="PBE",
92
+ database="cathub",
93
+ doi="10.1021/ja5088237",
94
+ pub_id="NorskovEtAl2014",
95
+ sys_id="rxn_004",
96
+ ),
97
+ Reaction(
98
+ equation="O2* -> 2O*",
99
+ catalyst="Pt",
100
+ facet="111",
101
+ reactants="O2",
102
+ products="O",
103
+ energy=-1.45,
104
+ barrier=0.52,
105
+ site="bridge",
106
+ functional="RPBE",
107
+ database="cathub",
108
+ sys_id="rxn_005",
109
+ ),
110
+ ]
111
+ s.add_all(reactions)
112
+ s.commit()
113
+ yield s
114
+ s.close()
@@ -0,0 +1,84 @@
1
+ """Tests for catapult.db.query."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pytest
6
+
7
+ from chemdb.errors import IdNotFoundError, NoResultsError
8
+ from catapult.db.query import get_by_id, get_shape, search
9
+
10
+
11
+ class TestGetById:
12
+ def test_doi_prefix(self, session):
13
+ result = get_by_id(session, "doi:10.1021/acscatal.7b02335")
14
+ assert result["type"] == "publication"
15
+ assert result["total"] == 3 # 3 reactions from Medford
16
+
17
+ def test_pub_prefix(self, session):
18
+ result = get_by_id(session, "pub:MedfordEtAl2017")
19
+ assert result["total"] == 3
20
+
21
+ def test_sys_prefix(self, session):
22
+ result = get_by_id(session, "sys:rxn_001")
23
+ assert result["total"] == 1
24
+
25
+ def test_bare_string_as_pub(self, session):
26
+ result = get_by_id(session, "MedfordEtAl2017")
27
+ assert result["total"] == 3
28
+
29
+ def test_not_found(self, session):
30
+ with pytest.raises(IdNotFoundError):
31
+ get_by_id(session, "doi:10.9999/nonexistent")
32
+
33
+
34
+ class TestSearch:
35
+ def test_catalyst_single(self, session):
36
+ result = search(session, catalyst="Pd")
37
+ assert result["total"] == 2
38
+
39
+ def test_catalyst_multi_comparison(self, session):
40
+ result = search(session, catalyst="Pd,Pt")
41
+ assert result["total"] == 4 # 2 Pd + 2 Pt
42
+ assert result["comparison"] is not None
43
+
44
+ def test_facet_filter(self, session):
45
+ result = search(session, facet="111")
46
+ assert result["total"] == 4
47
+
48
+ def test_reactants_filter(self, session):
49
+ result = search(session, reactants="CO")
50
+ assert result["total"] == 3
51
+
52
+ def test_energy_range(self, session):
53
+ result = search(session, energy="-1..0")
54
+ assert result["total"] == 4 # all except O2 dissociation at -1.45
55
+
56
+ def test_barrier_range(self, session):
57
+ result = search(session, barrier="<1.0")
58
+ assert result["total"] == 3 # Pd(111) 0.89, Pt(111) 0.78, Pt(111) 0.52
59
+
60
+ def test_functional_filter(self, session):
61
+ result = search(session, functional="PBE")
62
+ assert result["total"] == 1 # Cu only
63
+
64
+ def test_database_filter(self, session):
65
+ result = search(session, database="cathub")
66
+ assert result["total"] == 5
67
+
68
+ def test_no_results(self, session):
69
+ with pytest.raises(NoResultsError):
70
+ search(session, catalyst="Au")
71
+
72
+ def test_shape_on_page_1(self, session):
73
+ result = search(session, catalyst="Pd")
74
+ assert result["shape"] is not None
75
+ assert "energy" in result["shape"]
76
+
77
+
78
+ class TestGetShape:
79
+ def test_full_shape(self, session):
80
+ shape = get_shape(session)
81
+ assert shape["total"] == 5
82
+ assert "catalysts" in shape
83
+ assert "facets" in shape
84
+ assert "energy" in shape
@@ -0,0 +1,38 @@
1
+ """Tests for catapult.tool — formatted output."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from catapult.db.query import get_by_id, get_shape, search
6
+ from catapult.tool import _format_publication, _format_search, _format_shape
7
+
8
+
9
+ class TestFormatPublication:
10
+ def test_pub_output(self, session):
11
+ data = get_by_id(session, "doi:10.1021/acscatal.7b02335")
12
+ text = _format_publication(data)
13
+ assert "reactions" in text
14
+ assert "CO" in text
15
+
16
+
17
+ class TestFormatShape:
18
+ def test_shape_output(self, session):
19
+ data = get_shape(session)
20
+ text = _format_shape(data)
21
+ assert "CataPult" in text
22
+ assert "Top catalysts" in text
23
+ assert "Sortable fields" in text
24
+ assert "→ catapult_get" in text
25
+
26
+
27
+ class TestFormatSearch:
28
+ def test_search_output(self, session):
29
+ data = search(session, catalyst="Pd")
30
+ text = _format_search(data, catalyst="Pd")
31
+ assert "reactions" in text
32
+ assert "Shape" in text
33
+
34
+ def test_comparison_output(self, session):
35
+ data = search(session, catalyst="Pd,Pt", facet="111")
36
+ text = _format_search(data, catalyst="Pd,Pt")
37
+ assert "Comparison" in text
38
+ assert "Surface" in text