catapult-mcp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- catapult_mcp-0.2.0/.github/workflows/publish.yml +33 -0
- catapult_mcp-0.2.0/.gitignore +37 -0
- catapult_mcp-0.2.0/CHANGELOG.md +11 -0
- catapult_mcp-0.2.0/LICENSE +17 -0
- catapult_mcp-0.2.0/PKG-INFO +17 -0
- catapult_mcp-0.2.0/README.md +32 -0
- catapult_mcp-0.2.0/pyproject.toml +57 -0
- catapult_mcp-0.2.0/src/catapult/__init__.py +3 -0
- catapult_mcp-0.2.0/src/catapult/db/__init__.py +0 -0
- catapult_mcp-0.2.0/src/catapult/db/query.py +238 -0
- catapult_mcp-0.2.0/src/catapult/db/schema.py +65 -0
- catapult_mcp-0.2.0/src/catapult/db/sync.py +202 -0
- catapult_mcp-0.2.0/src/catapult/py.typed +0 -0
- catapult_mcp-0.2.0/src/catapult/server.py +73 -0
- catapult_mcp-0.2.0/src/catapult/tool.py +279 -0
- catapult_mcp-0.2.0/tests/conftest.py +114 -0
- catapult_mcp-0.2.0/tests/test_query.py +84 -0
- catapult_mcp-0.2.0/tests/test_tool.py +38 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write # trusted publishing (OIDC)
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: astral-sh/setup-uv@v4
|
|
17
|
+
with:
|
|
18
|
+
version: "latest"
|
|
19
|
+
- run: uv venv
|
|
20
|
+
- run: uv pip install -e ".[dev]"
|
|
21
|
+
- run: uv run --no-sync pytest
|
|
22
|
+
|
|
23
|
+
publish:
|
|
24
|
+
needs: test
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
environment: pypi
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
- uses: astral-sh/setup-uv@v4
|
|
30
|
+
with:
|
|
31
|
+
version: "latest"
|
|
32
|
+
- run: uv build
|
|
33
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
|
|
14
|
+
# Testing
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
htmlcov/
|
|
18
|
+
|
|
19
|
+
# IDE
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
22
|
+
*.swp
|
|
23
|
+
*.swo
|
|
24
|
+
*~
|
|
25
|
+
|
|
26
|
+
# OS
|
|
27
|
+
.DS_Store
|
|
28
|
+
Thumbs.db
|
|
29
|
+
|
|
30
|
+
# UV
|
|
31
|
+
uv.lock
|
|
32
|
+
|
|
33
|
+
# Data
|
|
34
|
+
*.acatome/
|
|
35
|
+
inbox/
|
|
36
|
+
completed/
|
|
37
|
+
errors/
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2024-2026 Reto Stamm
|
|
5
|
+
|
|
6
|
+
This program is free software: you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
This program is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: catapult-mcp
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: MCP server for querying heterogeneous catalysis databases
|
|
5
|
+
Author-email: Reto Stamm <reto@retostamm.com>
|
|
6
|
+
License-Expression: GPL-3.0-or-later
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: catalysis,chemistry,database,materials-science,mcp
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Requires-Dist: chemdb-common>=0.1.0
|
|
11
|
+
Requires-Dist: mcp>=1.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: black>=24.0; extra == 'dev'
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
16
|
+
Provides-Extra: postgres
|
|
17
|
+
Requires-Dist: psycopg[binary]>=3.0; extra == 'postgres'
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# catapult-mcp
|
|
2
|
+
|
|
3
|
+
MCP server for querying heterogeneous catalysis databases. Exposes reaction data, catalysts, and conditions via the Model Context Protocol.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Reaction search** — query by catalyst, reactant, product, or conditions
|
|
8
|
+
- **SQLAlchemy backend** — SQLite (default) or PostgreSQL
|
|
9
|
+
- **MCP protocol** — compatible with any MCP-aware LLM client
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv pip install -e .
|
|
15
|
+
# With PostgreSQL support:
|
|
16
|
+
uv pip install -e ".[postgres]"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
catapult-mcp # starts the MCP server
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Dependencies
|
|
26
|
+
|
|
27
|
+
- **chemdb-common** — shared database models and CLI
|
|
28
|
+
- **mcp** — Model Context Protocol server framework
|
|
29
|
+
|
|
30
|
+
## License
|
|
31
|
+
|
|
32
|
+
GPL-3.0-or-later — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "catapult-mcp"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "MCP server for querying heterogeneous catalysis databases"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = "GPL-3.0-or-later"
|
|
11
|
+
authors = [{name = "Reto Stamm", email = "reto@retostamm.com"}]
|
|
12
|
+
keywords = ["mcp", "catalysis", "chemistry", "materials-science", "database"]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"chemdb-common>=0.1.0",
|
|
15
|
+
"mcp>=1.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
postgres = [
|
|
20
|
+
"psycopg[binary]>=3.0",
|
|
21
|
+
]
|
|
22
|
+
dev = [
|
|
23
|
+
"pytest>=8.0",
|
|
24
|
+
"black>=24.0",
|
|
25
|
+
"ruff>=0.5",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.scripts]
|
|
29
|
+
catapult-mcp = "catapult.server:main"
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.wheel]
|
|
32
|
+
packages = ["src/catapult"]
|
|
33
|
+
|
|
34
|
+
[tool.pytest.ini_options]
|
|
35
|
+
testpaths = ["tests"]
|
|
36
|
+
|
|
37
|
+
[tool.black]
|
|
38
|
+
line-length = 88
|
|
39
|
+
|
|
40
|
+
[tool.ruff]
|
|
41
|
+
line-length = 88
|
|
42
|
+
|
|
43
|
+
[tool.bumpversion]
|
|
44
|
+
current_version = "0.2.0"
|
|
45
|
+
commit = true
|
|
46
|
+
tag = true
|
|
47
|
+
tag_name = "v{new_version}"
|
|
48
|
+
|
|
49
|
+
[[tool.bumpversion.files]]
|
|
50
|
+
filename = "pyproject.toml"
|
|
51
|
+
search = 'version = "{current_version}"'
|
|
52
|
+
replace = 'version = "{new_version}"'
|
|
53
|
+
|
|
54
|
+
[[tool.bumpversion.files]]
|
|
55
|
+
filename = "src/catapult/__init__.py"
|
|
56
|
+
search = '__version__ = "{current_version}"'
|
|
57
|
+
replace = '__version__ = "{new_version}"'
|
|
File without changes
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Read-only queries for the catapult_get tool.
|
|
2
|
+
|
|
3
|
+
All queries are local SQL — no runtime API calls.
|
|
4
|
+
Shape is computed on-the-fly via SQL aggregation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import func, text
|
|
12
|
+
from sqlalchemy.orm import Session
|
|
13
|
+
|
|
14
|
+
from chemdb.errors import IdNotFoundError, InvalidRangeError, NoResultsError
|
|
15
|
+
from chemdb.ranges import parse_range
|
|
16
|
+
from chemdb.sort import parse_sort
|
|
17
|
+
|
|
18
|
+
from catapult.db.schema import Reaction
|
|
19
|
+
|
|
20
|
+
PAGE_SIZE = 10
|
|
21
|
+
|
|
22
|
+
SORTABLE_FIELDS = {"energy", "barrier", "catalyst", "facet", "relevance"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_by_id(session: Session, id_str: str) -> dict[str, Any]:
|
|
26
|
+
"""Look up reactions by identifier (doi:, pub:, sys:, or bare string)."""
|
|
27
|
+
scheme, _, ident = id_str.partition(":")
|
|
28
|
+
if not ident:
|
|
29
|
+
# Bare string → publication ID search
|
|
30
|
+
ident = scheme
|
|
31
|
+
scheme = "pub"
|
|
32
|
+
|
|
33
|
+
q = session.query(Reaction)
|
|
34
|
+
if scheme == "doi":
|
|
35
|
+
q = q.filter(Reaction.doi == ident)
|
|
36
|
+
elif scheme == "pub":
|
|
37
|
+
q = q.filter(Reaction.pub_id.ilike(f"%{ident}%"))
|
|
38
|
+
elif scheme == "sys":
|
|
39
|
+
q = q.filter(Reaction.sys_id == ident)
|
|
40
|
+
else:
|
|
41
|
+
raise IdNotFoundError(id_str)
|
|
42
|
+
|
|
43
|
+
results = q.all()
|
|
44
|
+
if not results:
|
|
45
|
+
raise IdNotFoundError(id_str)
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
"type": "publication",
|
|
49
|
+
"id": id_str,
|
|
50
|
+
"total": len(results),
|
|
51
|
+
"results": [_rxn_to_row(r) for r in results[:PAGE_SIZE]],
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def search(
|
|
56
|
+
session: Session,
|
|
57
|
+
*,
|
|
58
|
+
query: str = "",
|
|
59
|
+
catalyst: str = "",
|
|
60
|
+
facet: str = "",
|
|
61
|
+
reactants: str = "",
|
|
62
|
+
products: str = "",
|
|
63
|
+
energy: str = "",
|
|
64
|
+
barrier: str = "",
|
|
65
|
+
functional: str = "",
|
|
66
|
+
database: str = "",
|
|
67
|
+
sort: str = "",
|
|
68
|
+
page: int = 1,
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""Filter search with shape + paginated results."""
|
|
71
|
+
q = session.query(Reaction)
|
|
72
|
+
|
|
73
|
+
# Catalyst filter (OR logic for comparison)
|
|
74
|
+
catalysts = []
|
|
75
|
+
if catalyst:
|
|
76
|
+
catalysts = [c.strip() for c in catalyst.split(",") if c.strip()]
|
|
77
|
+
if len(catalysts) == 1:
|
|
78
|
+
q = q.filter(Reaction.catalyst.ilike(f"%{catalysts[0]}%"))
|
|
79
|
+
else:
|
|
80
|
+
from sqlalchemy import or_
|
|
81
|
+
|
|
82
|
+
q = q.filter(or_(*[Reaction.catalyst.ilike(f"%{c}%") for c in catalysts]))
|
|
83
|
+
|
|
84
|
+
if facet:
|
|
85
|
+
q = q.filter(Reaction.facet == facet)
|
|
86
|
+
|
|
87
|
+
if reactants:
|
|
88
|
+
q = q.filter(Reaction.reactants.ilike(f"%{reactants}%"))
|
|
89
|
+
|
|
90
|
+
if products:
|
|
91
|
+
q = q.filter(Reaction.products.ilike(f"%{products}%"))
|
|
92
|
+
|
|
93
|
+
# Range filters
|
|
94
|
+
for field_name, raw in [("energy", energy), ("barrier", barrier)]:
|
|
95
|
+
if raw:
|
|
96
|
+
try:
|
|
97
|
+
r = parse_range(raw)
|
|
98
|
+
except ValueError:
|
|
99
|
+
raise InvalidRangeError(field_name, raw)
|
|
100
|
+
clause, params = r.to_sql_clause(field_name)
|
|
101
|
+
q = q.filter(text(clause).bindparams(**params))
|
|
102
|
+
|
|
103
|
+
if functional:
|
|
104
|
+
q = q.filter(Reaction.functional.ilike(functional))
|
|
105
|
+
|
|
106
|
+
if database:
|
|
107
|
+
q = q.filter(Reaction.database.ilike(database))
|
|
108
|
+
|
|
109
|
+
total = q.count()
|
|
110
|
+
if total == 0:
|
|
111
|
+
raise NoResultsError()
|
|
112
|
+
|
|
113
|
+
# Comparison mode: multiple catalysts
|
|
114
|
+
is_comparison = len(catalysts) > 1
|
|
115
|
+
|
|
116
|
+
# Sort
|
|
117
|
+
sort_fields = parse_sort(sort, SORTABLE_FIELDS) if sort else []
|
|
118
|
+
if sort_fields:
|
|
119
|
+
for sf in sort_fields:
|
|
120
|
+
if sf.name == "relevance":
|
|
121
|
+
continue
|
|
122
|
+
col = getattr(Reaction, sf.name, None)
|
|
123
|
+
if col is not None:
|
|
124
|
+
q = q.order_by(col.desc() if sf.descending else col.asc())
|
|
125
|
+
else:
|
|
126
|
+
q = q.order_by(Reaction.energy.asc())
|
|
127
|
+
|
|
128
|
+
offset = (page - 1) * PAGE_SIZE
|
|
129
|
+
results = q.offset(offset).limit(PAGE_SIZE).all()
|
|
130
|
+
|
|
131
|
+
shape = _compute_shape(session, q) if page == 1 else None
|
|
132
|
+
|
|
133
|
+
# For comparison mode, also compute per-catalyst best
|
|
134
|
+
comparison = None
|
|
135
|
+
if is_comparison and page == 1:
|
|
136
|
+
comparison = _compute_comparison(session, q, catalysts)
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
"total": total,
|
|
140
|
+
"page": page,
|
|
141
|
+
"page_size": PAGE_SIZE,
|
|
142
|
+
"shape": shape,
|
|
143
|
+
"comparison": comparison,
|
|
144
|
+
"results": [_rxn_to_row(r) for r in results],
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_shape(session: Session) -> dict[str, Any]:
|
|
149
|
+
"""Shape of entire database (no-arg call)."""
|
|
150
|
+
total = session.query(Reaction).count()
|
|
151
|
+
if total == 0:
|
|
152
|
+
return {"total": 0}
|
|
153
|
+
|
|
154
|
+
shape = _compute_shape(session, session.query(Reaction))
|
|
155
|
+
shape["total"] = total
|
|
156
|
+
return shape
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _compute_shape(session: Session, q) -> dict[str, Any]:
|
|
160
|
+
"""Compute shape statistics from a query."""
|
|
161
|
+
stats = session.query(
|
|
162
|
+
func.count(Reaction.id),
|
|
163
|
+
func.min(Reaction.energy),
|
|
164
|
+
func.max(Reaction.energy),
|
|
165
|
+
func.avg(Reaction.energy),
|
|
166
|
+
func.min(Reaction.barrier),
|
|
167
|
+
func.max(Reaction.barrier),
|
|
168
|
+
func.avg(Reaction.barrier),
|
|
169
|
+
).one()
|
|
170
|
+
|
|
171
|
+
# Top catalysts
|
|
172
|
+
cat_counts = (
|
|
173
|
+
session.query(Reaction.catalyst, func.count(Reaction.id))
|
|
174
|
+
.group_by(Reaction.catalyst)
|
|
175
|
+
.order_by(func.count(Reaction.id).desc())
|
|
176
|
+
.limit(10)
|
|
177
|
+
.all()
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Top facets
|
|
181
|
+
facet_counts = (
|
|
182
|
+
session.query(Reaction.facet, func.count(Reaction.id))
|
|
183
|
+
.filter(Reaction.facet.isnot(None), Reaction.facet != "")
|
|
184
|
+
.group_by(Reaction.facet)
|
|
185
|
+
.order_by(func.count(Reaction.id).desc())
|
|
186
|
+
.limit(10)
|
|
187
|
+
.all()
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Functionals
|
|
191
|
+
func_counts = (
|
|
192
|
+
session.query(Reaction.functional, func.count(Reaction.id))
|
|
193
|
+
.filter(Reaction.functional.isnot(None), Reaction.functional != "")
|
|
194
|
+
.group_by(Reaction.functional)
|
|
195
|
+
.order_by(func.count(Reaction.id).desc())
|
|
196
|
+
.limit(10)
|
|
197
|
+
.all()
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"energy": {"min": stats[1], "max": stats[2], "avg": stats[3]},
|
|
202
|
+
"barrier": {"min": stats[4], "max": stats[5], "avg": stats[6]},
|
|
203
|
+
"catalysts": {cat: count for cat, count in cat_counts},
|
|
204
|
+
"facets": {f: count for f, count in facet_counts},
|
|
205
|
+
"functionals": {f: count for f, count in func_counts},
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _compute_comparison(
|
|
210
|
+
session: Session, q, catalysts: list[str]
|
|
211
|
+
) -> list[dict[str, Any]]:
|
|
212
|
+
"""Best (lowest energy) row per catalyst for comparison table."""
|
|
213
|
+
rows = []
|
|
214
|
+
for cat in catalysts:
|
|
215
|
+
best = (
|
|
216
|
+
q.filter(Reaction.catalyst.ilike(f"%{cat}%"))
|
|
217
|
+
.order_by(Reaction.energy.asc())
|
|
218
|
+
.first()
|
|
219
|
+
)
|
|
220
|
+
if best:
|
|
221
|
+
rows.append(_rxn_to_row(best))
|
|
222
|
+
return rows
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _rxn_to_row(r: Reaction) -> dict[str, Any]:
|
|
226
|
+
"""Summary row for a reaction."""
|
|
227
|
+
return {
|
|
228
|
+
"equation": r.equation,
|
|
229
|
+
"catalyst": r.catalyst,
|
|
230
|
+
"facet": r.facet,
|
|
231
|
+
"energy": r.energy,
|
|
232
|
+
"barrier": r.barrier,
|
|
233
|
+
"site": r.site,
|
|
234
|
+
"functional": r.functional,
|
|
235
|
+
"database": r.database,
|
|
236
|
+
"doi": r.doi,
|
|
237
|
+
"pub_id": r.pub_id,
|
|
238
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""SQLAlchemy models for local catalysis reaction data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
Column,
|
|
9
|
+
DateTime,
|
|
10
|
+
Float,
|
|
11
|
+
Index,
|
|
12
|
+
Integer,
|
|
13
|
+
String,
|
|
14
|
+
Text,
|
|
15
|
+
)
|
|
16
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
17
|
+
|
|
18
|
+
SCHEMA = "catapult"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Base(DeclarativeBase):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Reaction(Base):
|
|
26
|
+
"""A DFT-computed catalytic reaction."""
|
|
27
|
+
|
|
28
|
+
__tablename__ = "reactions"
|
|
29
|
+
__table_args__ = {"schema": SCHEMA}
|
|
30
|
+
|
|
31
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
32
|
+
equation = Column(Text, nullable=False)
|
|
33
|
+
catalyst = Column(String(128), nullable=False, index=True)
|
|
34
|
+
facet = Column(String(64), nullable=True, index=True)
|
|
35
|
+
reactants = Column(Text, nullable=True) # comma-separated
|
|
36
|
+
products = Column(Text, nullable=True) # comma-separated
|
|
37
|
+
energy = Column(Float, nullable=True) # reaction energy ΔE, eV
|
|
38
|
+
barrier = Column(Float, nullable=True) # activation barrier Ea, eV
|
|
39
|
+
site = Column(Text, nullable=True)
|
|
40
|
+
functional = Column(String(64), nullable=True)
|
|
41
|
+
dft_code = Column(String(64), nullable=True)
|
|
42
|
+
database = Column(String(32), nullable=False) # cathub, mp
|
|
43
|
+
doi = Column(Text, nullable=True)
|
|
44
|
+
pub_id = Column(String(128), nullable=True, index=True)
|
|
45
|
+
sys_id = Column(String(128), nullable=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SyncLog(Base):
|
|
49
|
+
"""Sync history."""
|
|
50
|
+
|
|
51
|
+
__tablename__ = "sync_log"
|
|
52
|
+
__table_args__ = {"schema": SCHEMA}
|
|
53
|
+
|
|
54
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
55
|
+
source = Column(String(64), nullable=False)
|
|
56
|
+
synced_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
|
57
|
+
row_count = Column(Integer, nullable=False, default=0)
|
|
58
|
+
duration_s = Column(Float, nullable=True)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Indexes for common queries
|
|
62
|
+
Index("idx_reactions_energy", Reaction.energy)
|
|
63
|
+
Index("idx_reactions_barrier", Reaction.barrier)
|
|
64
|
+
Index("idx_reactions_functional", Reaction.functional)
|
|
65
|
+
Index("idx_reactions_database", Reaction.database)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Sync upstream catalysis databases to local storage.
|
|
2
|
+
|
|
3
|
+
Fetches data from Catalysis-Hub GraphQL and (optionally) Materials Project,
|
|
4
|
+
then upserts into the local DB (Postgres schema or SQLite file).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
from chemdb.config import ChemdbConfig
|
|
14
|
+
from chemdb.db import ensure_schema, make_engine, make_session
|
|
15
|
+
from catapult.db.schema import SCHEMA, Base, Reaction, SyncLog
|
|
16
|
+
|
|
17
|
+
log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
CATHUB_GRAPHQL = "https://api.catalysis-hub.org/graphql"
|
|
20
|
+
CATHUB_PAGE_SIZE = 200
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CatapultSyncer:
|
|
24
|
+
"""Downloads catalysis data from upstream sources into local DB."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: ChemdbConfig):
|
|
27
|
+
self.config = config
|
|
28
|
+
self.engine = make_engine(config, SCHEMA)
|
|
29
|
+
self.Session = make_session(self.engine)
|
|
30
|
+
|
|
31
|
+
def run(self, *, force: bool = False):
|
|
32
|
+
"""Run the full sync."""
|
|
33
|
+
ensure_schema(self.engine, SCHEMA)
|
|
34
|
+
|
|
35
|
+
if force:
|
|
36
|
+
Base.metadata.drop_all(self.engine)
|
|
37
|
+
|
|
38
|
+
Base.metadata.create_all(self.engine)
|
|
39
|
+
|
|
40
|
+
t0 = time.time()
|
|
41
|
+
|
|
42
|
+
count = self._sync_cathub()
|
|
43
|
+
if self.config.mp_api_key:
|
|
44
|
+
count += self._sync_mp()
|
|
45
|
+
|
|
46
|
+
duration = time.time() - t0
|
|
47
|
+
self._log_sync("all", count, duration)
|
|
48
|
+
log.info("Sync complete: %d rows in %.1fs", count, duration)
|
|
49
|
+
|
|
50
|
+
def _sync_cathub(self) -> int:
|
|
51
|
+
"""Fetch reactions from Catalysis-Hub GraphQL API.
|
|
52
|
+
|
|
53
|
+
Uses keyset pagination: id > last_id, ordered by id.
|
|
54
|
+
"""
|
|
55
|
+
import base64
|
|
56
|
+
import httpx
|
|
57
|
+
|
|
58
|
+
log.info("Syncing from Catalysis-Hub...")
|
|
59
|
+
count = 0
|
|
60
|
+
last_id = 0
|
|
61
|
+
|
|
62
|
+
with self.Session() as session:
|
|
63
|
+
while True:
|
|
64
|
+
query = _cathub_query(last_id, CATHUB_PAGE_SIZE)
|
|
65
|
+
try:
|
|
66
|
+
resp = httpx.post(
|
|
67
|
+
CATHUB_GRAPHQL,
|
|
68
|
+
json={"query": query},
|
|
69
|
+
timeout=60,
|
|
70
|
+
)
|
|
71
|
+
resp.raise_for_status()
|
|
72
|
+
except Exception as exc:
|
|
73
|
+
log.error("Catalysis-Hub fetch failed: %s", exc)
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
data = resp.json()
|
|
77
|
+
reactions = data.get("data", {}).get("reactions")
|
|
78
|
+
if reactions is None:
|
|
79
|
+
errors = data.get("errors", [])
|
|
80
|
+
log.error("GraphQL error: %s", errors)
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
edges = reactions.get("edges", [])
|
|
84
|
+
if not edges:
|
|
85
|
+
break
|
|
86
|
+
|
|
87
|
+
for edge in edges:
|
|
88
|
+
node = edge.get("node", {})
|
|
89
|
+
# Extract numeric id from base64 "Reaction:NNN"
|
|
90
|
+
raw_id = node.get("id", "")
|
|
91
|
+
try:
|
|
92
|
+
decoded = base64.b64decode(raw_id).decode()
|
|
93
|
+
last_id = int(decoded.split(":", 1)[1])
|
|
94
|
+
except Exception:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
rxn = self._cathub_to_model(node)
|
|
98
|
+
if rxn:
|
|
99
|
+
session.add(rxn)
|
|
100
|
+
count += 1
|
|
101
|
+
|
|
102
|
+
session.flush()
|
|
103
|
+
if count % 5000 < CATHUB_PAGE_SIZE:
|
|
104
|
+
session.commit()
|
|
105
|
+
log.info(" %d reactions (last_id=%d)...", count, last_id)
|
|
106
|
+
|
|
107
|
+
if len(edges) < CATHUB_PAGE_SIZE:
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
session.commit()
|
|
111
|
+
|
|
112
|
+
return count
|
|
113
|
+
|
|
114
|
+
def _sync_mp(self) -> int:
|
|
115
|
+
"""Fetch bulk properties from Materials Project."""
|
|
116
|
+
log.info("MP sync: not yet implemented (needs mp-api client)")
|
|
117
|
+
return 0
|
|
118
|
+
|
|
119
|
+
def _cathub_to_model(self, node: dict) -> Reaction | None:
|
|
120
|
+
"""Convert a Catalysis-Hub GraphQL node to a Reaction model."""
|
|
121
|
+
equation = node.get("Equation", "")
|
|
122
|
+
if not equation:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
# Parse reactants/products from equation
|
|
126
|
+
reactants = ""
|
|
127
|
+
products = ""
|
|
128
|
+
if " -> " in equation:
|
|
129
|
+
parts = equation.split(" -> ", 1)
|
|
130
|
+
reactants = parts[0].strip()
|
|
131
|
+
products = parts[1].strip()
|
|
132
|
+
|
|
133
|
+
# Prefer JSON reactants/products fields if available, else parse equation
|
|
134
|
+
raw_reactants = node.get("reactants") or ""
|
|
135
|
+
raw_products = node.get("products") or ""
|
|
136
|
+
if raw_reactants and isinstance(raw_reactants, str) and raw_reactants != "{}":
|
|
137
|
+
reactants = raw_reactants
|
|
138
|
+
if raw_products and isinstance(raw_products, str) and raw_products != "{}":
|
|
139
|
+
products = raw_products
|
|
140
|
+
|
|
141
|
+
return Reaction(
|
|
142
|
+
equation=equation,
|
|
143
|
+
catalyst=node.get("chemicalComposition", ""),
|
|
144
|
+
facet=node.get("facet", ""),
|
|
145
|
+
reactants=reactants,
|
|
146
|
+
products=products,
|
|
147
|
+
energy=_float(node.get("reactionEnergy")),
|
|
148
|
+
barrier=_float(node.get("activationEnergy")),
|
|
149
|
+
site=node.get("sites", ""),
|
|
150
|
+
functional=node.get("dftFunctional", ""),
|
|
151
|
+
dft_code=node.get("dftCode", ""),
|
|
152
|
+
database="cathub",
|
|
153
|
+
pub_id=node.get("pubId"),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def _log_sync(self, source: str, count: int, duration: float):
|
|
157
|
+
with self.Session() as session:
|
|
158
|
+
session.add(
|
|
159
|
+
SyncLog(
|
|
160
|
+
source=source,
|
|
161
|
+
synced_at=datetime.utcnow(),
|
|
162
|
+
row_count=count,
|
|
163
|
+
duration_s=duration,
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
session.commit()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _cathub_query(last_id: int, page_size: int) -> str:
|
|
170
|
+
"""Build a Catalysis-Hub GraphQL query with keyset pagination."""
|
|
171
|
+
id_filter = f', id: {last_id}, op: ">"' if last_id > 0 else ""
|
|
172
|
+
return f"""{{
|
|
173
|
+
reactions(first: {page_size}, order: "id"{id_filter}) {{
|
|
174
|
+
totalCount
|
|
175
|
+
edges {{
|
|
176
|
+
node {{
|
|
177
|
+
id
|
|
178
|
+
Equation
|
|
179
|
+
reactionEnergy
|
|
180
|
+
activationEnergy
|
|
181
|
+
chemicalComposition
|
|
182
|
+
surfaceComposition
|
|
183
|
+
facet
|
|
184
|
+
sites
|
|
185
|
+
reactants
|
|
186
|
+
products
|
|
187
|
+
pubId
|
|
188
|
+
dftCode
|
|
189
|
+
dftFunctional
|
|
190
|
+
}}
|
|
191
|
+
}}
|
|
192
|
+
}}
|
|
193
|
+
}}"""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _float(v) -> float | None:
|
|
197
|
+
if v is None:
|
|
198
|
+
return None
|
|
199
|
+
try:
|
|
200
|
+
return float(v)
|
|
201
|
+
except (ValueError, TypeError):
|
|
202
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""MCP server for CataPult — one tool: catapult_get.
|
|
2
|
+
|
|
3
|
+
Queries heterogeneous catalysis databases (Catalysis-Hub, Materials Project).
|
|
4
|
+
All data is local (Postgres or SQLite). Run ``chemdb sync catapult`` first.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from mcp.server.fastmcp import FastMCP
|
|
10
|
+
|
|
11
|
+
from catapult import tool
|
|
12
|
+
|
|
13
|
+
mcp = FastMCP("catapult")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@mcp.tool()
|
|
17
|
+
def catapult_get(
|
|
18
|
+
id: str = "",
|
|
19
|
+
query: str = "",
|
|
20
|
+
catalyst: str = "",
|
|
21
|
+
facet: str = "",
|
|
22
|
+
reactants: str = "",
|
|
23
|
+
products: str = "",
|
|
24
|
+
energy: str = "",
|
|
25
|
+
barrier: str = "",
|
|
26
|
+
functional: str = "",
|
|
27
|
+
database: str = "",
|
|
28
|
+
sort: str = "",
|
|
29
|
+
page: int = 1,
|
|
30
|
+
) -> str:
|
|
31
|
+
"""Query heterogeneous catalysis databases.
|
|
32
|
+
|
|
33
|
+
id: identifier — "doi:10.1021/...", "pub:xyz", "sys:abc"
|
|
34
|
+
query: FTS5 full-text search (always AND with filters)
|
|
35
|
+
catalyst: composition, OR/comparison logic ("Pd", "Pd,Pt,Cu" = compare)
|
|
36
|
+
facet: Miller index ("111", "100")
|
|
37
|
+
reactants: reactant formula ("CO", "H2O")
|
|
38
|
+
products: product formula
|
|
39
|
+
energy: reaction energy range, eV ("-1.5..0", "<-0.5")
|
|
40
|
+
barrier: activation barrier range, eV ("<1.0")
|
|
41
|
+
functional: DFT method filter ("BEEF-vdW", "PBE")
|
|
42
|
+
database: source filter ("cathub", "mp")
|
|
43
|
+
sort: sort order ("energy", "!barrier", "energy,catalyst")
|
|
44
|
+
page: page number (10 results/page)
|
|
45
|
+
|
|
46
|
+
No args → shape of entire database (catalyst counts, energy ranges, top facets).
|
|
47
|
+
id provided → all reactions from that publication.
|
|
48
|
+
Any filter → search with results + shape on page 1.
|
|
49
|
+
Multi-catalyst → comparison table (best per catalyst).
|
|
50
|
+
"""
|
|
51
|
+
return tool.catapult_get(
|
|
52
|
+
id=id,
|
|
53
|
+
query=query,
|
|
54
|
+
catalyst=catalyst,
|
|
55
|
+
facet=facet,
|
|
56
|
+
reactants=reactants,
|
|
57
|
+
products=products,
|
|
58
|
+
energy=energy,
|
|
59
|
+
barrier=barrier,
|
|
60
|
+
functional=functional,
|
|
61
|
+
database=database,
|
|
62
|
+
sort=sort,
|
|
63
|
+
page=page,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def main():
|
|
68
|
+
"""Run the MCP server (stdio transport)."""
|
|
69
|
+
mcp.run()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
main()
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""catapult_get implementation — formats query results as markdown strings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from chemdb.cite import format_citation
|
|
8
|
+
from chemdb.config import ChemdbConfig, load_config
|
|
9
|
+
from chemdb.db import make_engine, make_session
|
|
10
|
+
from chemdb.errors import ChemdbError
|
|
11
|
+
|
|
12
|
+
from catapult.db.query import PAGE_SIZE, get_by_id, get_shape, search
|
|
13
|
+
from catapult.db.schema import SCHEMA
|
|
14
|
+
|
|
15
|
+
_config: ChemdbConfig | None = None
|
|
16
|
+
_SessionFactory = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_session():
|
|
20
|
+
global _config, _SessionFactory
|
|
21
|
+
if _SessionFactory is None:
|
|
22
|
+
_config = load_config()
|
|
23
|
+
engine = make_engine(_config, SCHEMA)
|
|
24
|
+
_SessionFactory = make_session(engine)
|
|
25
|
+
return _SessionFactory()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def catapult_get(
|
|
29
|
+
id: str = "",
|
|
30
|
+
query: str = "",
|
|
31
|
+
catalyst: str = "",
|
|
32
|
+
facet: str = "",
|
|
33
|
+
reactants: str = "",
|
|
34
|
+
products: str = "",
|
|
35
|
+
energy: str = "",
|
|
36
|
+
barrier: str = "",
|
|
37
|
+
functional: str = "",
|
|
38
|
+
database: str = "",
|
|
39
|
+
sort: str = "",
|
|
40
|
+
page: int = 1,
|
|
41
|
+
) -> str:
|
|
42
|
+
"""Query heterogeneous catalysis databases.
|
|
43
|
+
|
|
44
|
+
Returns formatted markdown with results, shape, and hints.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
session = _get_session()
|
|
48
|
+
|
|
49
|
+
# ID lookup mode
|
|
50
|
+
if id:
|
|
51
|
+
data = get_by_id(session, id)
|
|
52
|
+
return _format_publication(data)
|
|
53
|
+
|
|
54
|
+
# No args → shape of entire database
|
|
55
|
+
has_filters = any(
|
|
56
|
+
[
|
|
57
|
+
query,
|
|
58
|
+
catalyst,
|
|
59
|
+
facet,
|
|
60
|
+
reactants,
|
|
61
|
+
products,
|
|
62
|
+
energy,
|
|
63
|
+
barrier,
|
|
64
|
+
functional,
|
|
65
|
+
database,
|
|
66
|
+
sort,
|
|
67
|
+
]
|
|
68
|
+
)
|
|
69
|
+
if not has_filters and page == 1:
|
|
70
|
+
data = get_shape(session)
|
|
71
|
+
return _format_shape(data)
|
|
72
|
+
|
|
73
|
+
# Filter search
|
|
74
|
+
data = search(
|
|
75
|
+
session,
|
|
76
|
+
query=query,
|
|
77
|
+
catalyst=catalyst,
|
|
78
|
+
facet=facet,
|
|
79
|
+
reactants=reactants,
|
|
80
|
+
products=products,
|
|
81
|
+
energy=energy,
|
|
82
|
+
barrier=barrier,
|
|
83
|
+
functional=functional,
|
|
84
|
+
database=database,
|
|
85
|
+
sort=sort,
|
|
86
|
+
page=page,
|
|
87
|
+
)
|
|
88
|
+
return _format_search(data, catalyst=catalyst)
|
|
89
|
+
|
|
90
|
+
except ChemdbError as exc:
|
|
91
|
+
return exc.to_markdown()
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
return f"⚠ Internal error: {exc}"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ── Formatters ──────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _format_publication(data: dict[str, Any]) -> str:
|
|
100
|
+
"""Format publication/ID lookup results."""
|
|
101
|
+
total = data["total"]
|
|
102
|
+
results = data["results"]
|
|
103
|
+
|
|
104
|
+
lines = [f"## {total} reactions for {data['id']}"]
|
|
105
|
+
lines.append("")
|
|
106
|
+
|
|
107
|
+
for i, r in enumerate(results, 1):
|
|
108
|
+
lines.append(_format_rxn_line(i, r))
|
|
109
|
+
|
|
110
|
+
if total > PAGE_SIZE:
|
|
111
|
+
lines.append("")
|
|
112
|
+
lines.append(f'→ catapult_get(id="{data["id"]}", page=2) for next {PAGE_SIZE}')
|
|
113
|
+
|
|
114
|
+
return "\n".join(lines)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _format_shape(data: dict[str, Any]) -> str:
|
|
118
|
+
"""Format full database shape (no-arg call)."""
|
|
119
|
+
total = data.get("total", 0)
|
|
120
|
+
|
|
121
|
+
lines = [f"## CataPult — ~{total:,} reactions"]
|
|
122
|
+
lines.append("")
|
|
123
|
+
|
|
124
|
+
cats = data.get("catalysts", {})
|
|
125
|
+
if cats:
|
|
126
|
+
lines.append("### Top catalysts (by reaction count)")
|
|
127
|
+
parts = [f"{cat} {count:,}" for cat, count in list(cats.items())[:8]]
|
|
128
|
+
lines.append(" | ".join(parts))
|
|
129
|
+
|
|
130
|
+
facets = data.get("facets", {})
|
|
131
|
+
if facets:
|
|
132
|
+
lines.append("")
|
|
133
|
+
lines.append("### Facets")
|
|
134
|
+
parts = [f"{f} {count:,}" for f, count in list(facets.items())[:8]]
|
|
135
|
+
lines.append(" | ".join(parts))
|
|
136
|
+
|
|
137
|
+
e = data.get("energy", {})
|
|
138
|
+
b = data.get("barrier", {})
|
|
139
|
+
if e.get("min") is not None:
|
|
140
|
+
lines.append("")
|
|
141
|
+
lines.append("### Energy ranges")
|
|
142
|
+
lines.append("| Property | Min | Avg | Max | Unit |")
|
|
143
|
+
lines.append("|----------|-----|-----|-----|------|")
|
|
144
|
+
if e.get("min") is not None:
|
|
145
|
+
lines.append(
|
|
146
|
+
f"| ΔE (reaction) | {e['min']:.1f} | {e['avg']:.1f} | {e['max']:.1f} | eV |"
|
|
147
|
+
)
|
|
148
|
+
if b.get("min") is not None:
|
|
149
|
+
lines.append(
|
|
150
|
+
f"| Ea (barrier) | {b['min']:.1f} | {b['avg']:.1f} | {b['max']:.1f} | eV |"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
funcs = data.get("functionals", {})
|
|
154
|
+
if funcs:
|
|
155
|
+
lines.append("")
|
|
156
|
+
lines.append("### Functionals")
|
|
157
|
+
parts = [f"{f} {count:,}" for f, count in list(funcs.items())[:6]]
|
|
158
|
+
lines.append(" | ".join(parts))
|
|
159
|
+
|
|
160
|
+
lines.append("")
|
|
161
|
+
lines.append("### Sortable fields")
|
|
162
|
+
lines.append("energy, barrier, catalyst, facet")
|
|
163
|
+
lines.append("")
|
|
164
|
+
lines.append('→ catapult_get(catalyst="Pd") to browse palladium reactions')
|
|
165
|
+
lines.append(
|
|
166
|
+
'→ catapult_get(reactants="CO", catalyst="Pd,Pt,Cu", facet="111") to compare metals'
|
|
167
|
+
)
|
|
168
|
+
lines.append('→ catapult_get(query="oxygen evolution") for free-text search')
|
|
169
|
+
|
|
170
|
+
return "\n".join(lines)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _format_search(data: dict[str, Any], catalyst: str = "") -> str:
|
|
174
|
+
"""Format search results with shape + hints."""
|
|
175
|
+
total = data["total"]
|
|
176
|
+
page = data["page"]
|
|
177
|
+
start = (page - 1) * PAGE_SIZE + 1
|
|
178
|
+
end = min(start + PAGE_SIZE - 1, total)
|
|
179
|
+
comparison = data.get("comparison")
|
|
180
|
+
|
|
181
|
+
# Comparison mode
|
|
182
|
+
if comparison:
|
|
183
|
+
return _format_comparison(data, catalyst)
|
|
184
|
+
|
|
185
|
+
lines = [f"## {total} reactions (showing {start}–{end})"]
|
|
186
|
+
lines.append("")
|
|
187
|
+
|
|
188
|
+
# Shape on page 1
|
|
189
|
+
shape = data.get("shape")
|
|
190
|
+
if shape:
|
|
191
|
+
lines.append("### Shape")
|
|
192
|
+
e = shape.get("energy", {})
|
|
193
|
+
b = shape.get("barrier", {})
|
|
194
|
+
if e.get("min") is not None:
|
|
195
|
+
lines.append(
|
|
196
|
+
f" ΔE {e['min']:.1f} to {e['max']:.1f} eV (mean {e['avg']:.2f})"
|
|
197
|
+
)
|
|
198
|
+
if b.get("min") is not None:
|
|
199
|
+
lines.append(
|
|
200
|
+
f" Ea {b['min']:.1f}–{b['max']:.1f} eV (mean {b['avg']:.2f})"
|
|
201
|
+
)
|
|
202
|
+
facets = shape.get("facets", {})
|
|
203
|
+
if facets:
|
|
204
|
+
parts = [f"{f} ×{count}" for f, count in list(facets.items())[:5]]
|
|
205
|
+
lines.append(f" Facets: {' | '.join(parts)}")
|
|
206
|
+
funcs = shape.get("functionals", {})
|
|
207
|
+
if funcs:
|
|
208
|
+
parts = [f"{f} {count}" for f, count in list(funcs.items())[:4]]
|
|
209
|
+
lines.append(f" Functionals: {' | '.join(parts)}")
|
|
210
|
+
lines.append("")
|
|
211
|
+
|
|
212
|
+
for i, r in enumerate(data["results"], start):
|
|
213
|
+
lines.append(_format_rxn_line(i, r))
|
|
214
|
+
|
|
215
|
+
# Hints
|
|
216
|
+
lines.append("")
|
|
217
|
+
if end < total:
|
|
218
|
+
lines.append(f"→ catapult_get(..., page={page + 1}) for next {PAGE_SIZE}")
|
|
219
|
+
if data["results"] and data["results"][0].get("doi"):
|
|
220
|
+
doi = data["results"][0]["doi"]
|
|
221
|
+
lines.append(f'→ catapult_get(id="doi:{doi}") for full pub details')
|
|
222
|
+
|
|
223
|
+
return "\n".join(lines)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _format_comparison(data: dict[str, Any], catalyst: str) -> str:
|
|
227
|
+
"""Format multi-catalyst comparison table."""
|
|
228
|
+
comparison = data["comparison"]
|
|
229
|
+
total = data["total"]
|
|
230
|
+
|
|
231
|
+
lines = [f"## Comparison — {len(comparison)} catalysts ({total} total reactions)"]
|
|
232
|
+
lines.append("")
|
|
233
|
+
lines.append("| Surface | ΔE (eV) | Ea (eV) | Site | Functional | Source |")
|
|
234
|
+
lines.append("|---------|---------|---------|------|------------|--------|")
|
|
235
|
+
|
|
236
|
+
for r in comparison:
|
|
237
|
+
cat_facet = r.get("catalyst", "?")
|
|
238
|
+
if r.get("facet"):
|
|
239
|
+
cat_facet += f"({r['facet']})"
|
|
240
|
+
energy_s = f"{r['energy']:.2f}" if r.get("energy") is not None else "?"
|
|
241
|
+
barrier_s = f"{r['barrier']:.2f}" if r.get("barrier") is not None else "?"
|
|
242
|
+
cite = r.get("doi", "")
|
|
243
|
+
if cite:
|
|
244
|
+
cite = f"[@{cite}]"
|
|
245
|
+
lines.append(
|
|
246
|
+
f"| {cat_facet} | {energy_s} | {barrier_s} | {r.get('site', '')} | {r.get('functional', '')} | {cite} |"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
lines.append("")
|
|
250
|
+
cats = catalyst.split(",")
|
|
251
|
+
if cats:
|
|
252
|
+
lines.append(
|
|
253
|
+
f'→ catapult_get(catalyst="{cats[0].strip()}") for all {cats[0].strip()} reactions'
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return "\n".join(lines)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _format_rxn_line(i: int, r: dict[str, Any]) -> str:
|
|
260
|
+
"""Format a single reaction result line."""
|
|
261
|
+
cat_facet = r.get("catalyst", "?")
|
|
262
|
+
if r.get("facet"):
|
|
263
|
+
cat_facet += f"({r['facet']})"
|
|
264
|
+
|
|
265
|
+
parts = [f"{cat_facet} — {r.get('equation', '?')}"]
|
|
266
|
+
if r.get("energy") is not None:
|
|
267
|
+
parts.append(f"ΔE {r['energy']:.2f} eV")
|
|
268
|
+
if r.get("barrier") is not None:
|
|
269
|
+
parts.append(f"Ea {r['barrier']:.2f} eV")
|
|
270
|
+
if r.get("functional"):
|
|
271
|
+
parts.append(r["functional"])
|
|
272
|
+
|
|
273
|
+
line = f"{i:>2}. {parts[0]} {' | '.join(parts[1:])}"
|
|
274
|
+
|
|
275
|
+
if r.get("doi"):
|
|
276
|
+
cite = format_citation(r["doi"])
|
|
277
|
+
line += f"\n {cite}"
|
|
278
|
+
|
|
279
|
+
return line
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Shared fixtures for catapult tests."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from sqlalchemy import create_engine, event
|
|
7
|
+
from sqlalchemy.orm import sessionmaker
|
|
8
|
+
|
|
9
|
+
from catapult.db.schema import Base, Reaction
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture
|
|
13
|
+
def engine():
|
|
14
|
+
"""In-memory SQLite engine with schema tables."""
|
|
15
|
+
eng = create_engine(
|
|
16
|
+
"sqlite:///:memory:",
|
|
17
|
+
execution_options={"schema_translate_map": {"catapult": None}},
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
@event.listens_for(eng, "connect")
|
|
21
|
+
def _set_pragma(dbapi_conn, connection_record):
|
|
22
|
+
cursor = dbapi_conn.cursor()
|
|
23
|
+
cursor.execute("PRAGMA journal_mode=WAL")
|
|
24
|
+
cursor.close()
|
|
25
|
+
|
|
26
|
+
Base.metadata.create_all(eng)
|
|
27
|
+
return eng
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def session(engine):
|
|
32
|
+
"""Session with sample reaction data."""
|
|
33
|
+
Session = sessionmaker(bind=engine)
|
|
34
|
+
s = Session()
|
|
35
|
+
|
|
36
|
+
reactions = [
|
|
37
|
+
Reaction(
|
|
38
|
+
equation="CO* -> CO2",
|
|
39
|
+
catalyst="Pd",
|
|
40
|
+
facet="111",
|
|
41
|
+
reactants="CO",
|
|
42
|
+
products="CO2",
|
|
43
|
+
energy=-0.72,
|
|
44
|
+
barrier=0.89,
|
|
45
|
+
site="fcc",
|
|
46
|
+
functional="BEEF-vdW",
|
|
47
|
+
database="cathub",
|
|
48
|
+
doi="10.1021/acscatal.7b02335",
|
|
49
|
+
pub_id="MedfordEtAl2017",
|
|
50
|
+
sys_id="rxn_001",
|
|
51
|
+
),
|
|
52
|
+
Reaction(
|
|
53
|
+
equation="CO* -> CO2",
|
|
54
|
+
catalyst="Pd",
|
|
55
|
+
facet="100",
|
|
56
|
+
reactants="CO",
|
|
57
|
+
products="CO2",
|
|
58
|
+
energy=-0.58,
|
|
59
|
+
barrier=1.02,
|
|
60
|
+
site="bridge",
|
|
61
|
+
functional="BEEF-vdW",
|
|
62
|
+
database="cathub",
|
|
63
|
+
doi="10.1021/acscatal.7b02335",
|
|
64
|
+
pub_id="MedfordEtAl2017",
|
|
65
|
+
sys_id="rxn_002",
|
|
66
|
+
),
|
|
67
|
+
Reaction(
|
|
68
|
+
equation="CO* -> CO2",
|
|
69
|
+
catalyst="Pt",
|
|
70
|
+
facet="111",
|
|
71
|
+
reactants="CO",
|
|
72
|
+
products="CO2",
|
|
73
|
+
energy=-0.91,
|
|
74
|
+
barrier=0.78,
|
|
75
|
+
site="atop",
|
|
76
|
+
functional="BEEF-vdW",
|
|
77
|
+
database="cathub",
|
|
78
|
+
doi="10.1021/acscatal.7b02335",
|
|
79
|
+
pub_id="MedfordEtAl2017",
|
|
80
|
+
sys_id="rxn_003",
|
|
81
|
+
),
|
|
82
|
+
Reaction(
|
|
83
|
+
equation="H2O* -> OH* + H*",
|
|
84
|
+
catalyst="Cu",
|
|
85
|
+
facet="111",
|
|
86
|
+
reactants="H2O",
|
|
87
|
+
products="OH,H",
|
|
88
|
+
energy=-0.33,
|
|
89
|
+
barrier=1.35,
|
|
90
|
+
site="fcc",
|
|
91
|
+
functional="PBE",
|
|
92
|
+
database="cathub",
|
|
93
|
+
doi="10.1021/ja5088237",
|
|
94
|
+
pub_id="NorskovEtAl2014",
|
|
95
|
+
sys_id="rxn_004",
|
|
96
|
+
),
|
|
97
|
+
Reaction(
|
|
98
|
+
equation="O2* -> 2O*",
|
|
99
|
+
catalyst="Pt",
|
|
100
|
+
facet="111",
|
|
101
|
+
reactants="O2",
|
|
102
|
+
products="O",
|
|
103
|
+
energy=-1.45,
|
|
104
|
+
barrier=0.52,
|
|
105
|
+
site="bridge",
|
|
106
|
+
functional="RPBE",
|
|
107
|
+
database="cathub",
|
|
108
|
+
sys_id="rxn_005",
|
|
109
|
+
),
|
|
110
|
+
]
|
|
111
|
+
s.add_all(reactions)
|
|
112
|
+
s.commit()
|
|
113
|
+
yield s
|
|
114
|
+
s.close()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Tests for catapult.db.query."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from chemdb.errors import IdNotFoundError, NoResultsError
|
|
8
|
+
from catapult.db.query import get_by_id, get_shape, search
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestGetById:
|
|
12
|
+
def test_doi_prefix(self, session):
|
|
13
|
+
result = get_by_id(session, "doi:10.1021/acscatal.7b02335")
|
|
14
|
+
assert result["type"] == "publication"
|
|
15
|
+
assert result["total"] == 3 # 3 reactions from Medford
|
|
16
|
+
|
|
17
|
+
def test_pub_prefix(self, session):
|
|
18
|
+
result = get_by_id(session, "pub:MedfordEtAl2017")
|
|
19
|
+
assert result["total"] == 3
|
|
20
|
+
|
|
21
|
+
def test_sys_prefix(self, session):
|
|
22
|
+
result = get_by_id(session, "sys:rxn_001")
|
|
23
|
+
assert result["total"] == 1
|
|
24
|
+
|
|
25
|
+
def test_bare_string_as_pub(self, session):
|
|
26
|
+
result = get_by_id(session, "MedfordEtAl2017")
|
|
27
|
+
assert result["total"] == 3
|
|
28
|
+
|
|
29
|
+
def test_not_found(self, session):
|
|
30
|
+
with pytest.raises(IdNotFoundError):
|
|
31
|
+
get_by_id(session, "doi:10.9999/nonexistent")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TestSearch:
|
|
35
|
+
def test_catalyst_single(self, session):
|
|
36
|
+
result = search(session, catalyst="Pd")
|
|
37
|
+
assert result["total"] == 2
|
|
38
|
+
|
|
39
|
+
def test_catalyst_multi_comparison(self, session):
|
|
40
|
+
result = search(session, catalyst="Pd,Pt")
|
|
41
|
+
assert result["total"] == 4 # 2 Pd + 2 Pt
|
|
42
|
+
assert result["comparison"] is not None
|
|
43
|
+
|
|
44
|
+
def test_facet_filter(self, session):
|
|
45
|
+
result = search(session, facet="111")
|
|
46
|
+
assert result["total"] == 4
|
|
47
|
+
|
|
48
|
+
def test_reactants_filter(self, session):
|
|
49
|
+
result = search(session, reactants="CO")
|
|
50
|
+
assert result["total"] == 3
|
|
51
|
+
|
|
52
|
+
def test_energy_range(self, session):
|
|
53
|
+
result = search(session, energy="-1..0")
|
|
54
|
+
assert result["total"] == 4 # all except O2 dissociation at -1.45
|
|
55
|
+
|
|
56
|
+
def test_barrier_range(self, session):
|
|
57
|
+
result = search(session, barrier="<1.0")
|
|
58
|
+
assert result["total"] == 3 # Pd(111) 0.89, Pt(111) 0.78, Pt(111) 0.52
|
|
59
|
+
|
|
60
|
+
def test_functional_filter(self, session):
|
|
61
|
+
result = search(session, functional="PBE")
|
|
62
|
+
assert result["total"] == 1 # Cu only
|
|
63
|
+
|
|
64
|
+
def test_database_filter(self, session):
|
|
65
|
+
result = search(session, database="cathub")
|
|
66
|
+
assert result["total"] == 5
|
|
67
|
+
|
|
68
|
+
def test_no_results(self, session):
|
|
69
|
+
with pytest.raises(NoResultsError):
|
|
70
|
+
search(session, catalyst="Au")
|
|
71
|
+
|
|
72
|
+
def test_shape_on_page_1(self, session):
|
|
73
|
+
result = search(session, catalyst="Pd")
|
|
74
|
+
assert result["shape"] is not None
|
|
75
|
+
assert "energy" in result["shape"]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class TestGetShape:
|
|
79
|
+
def test_full_shape(self, session):
|
|
80
|
+
shape = get_shape(session)
|
|
81
|
+
assert shape["total"] == 5
|
|
82
|
+
assert "catalysts" in shape
|
|
83
|
+
assert "facets" in shape
|
|
84
|
+
assert "energy" in shape
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Tests for catapult.tool — formatted output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from catapult.db.query import get_by_id, get_shape, search
|
|
6
|
+
from catapult.tool import _format_publication, _format_search, _format_shape
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestFormatPublication:
|
|
10
|
+
def test_pub_output(self, session):
|
|
11
|
+
data = get_by_id(session, "doi:10.1021/acscatal.7b02335")
|
|
12
|
+
text = _format_publication(data)
|
|
13
|
+
assert "reactions" in text
|
|
14
|
+
assert "CO" in text
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestFormatShape:
|
|
18
|
+
def test_shape_output(self, session):
|
|
19
|
+
data = get_shape(session)
|
|
20
|
+
text = _format_shape(data)
|
|
21
|
+
assert "CataPult" in text
|
|
22
|
+
assert "Top catalysts" in text
|
|
23
|
+
assert "Sortable fields" in text
|
|
24
|
+
assert "→ catapult_get" in text
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestFormatSearch:
|
|
28
|
+
def test_search_output(self, session):
|
|
29
|
+
data = search(session, catalyst="Pd")
|
|
30
|
+
text = _format_search(data, catalyst="Pd")
|
|
31
|
+
assert "reactions" in text
|
|
32
|
+
assert "Shape" in text
|
|
33
|
+
|
|
34
|
+
def test_comparison_output(self, session):
|
|
35
|
+
data = search(session, catalyst="Pd,Pt", facet="111")
|
|
36
|
+
text = _format_search(data, catalyst="Pd,Pt")
|
|
37
|
+
assert "Comparison" in text
|
|
38
|
+
assert "Surface" in text
|