rcsb-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rcsb_mcp-0.1.0/.dockerignore +14 -0
- rcsb_mcp-0.1.0/.github/workflows/_workflow-docker.yaml +20 -0
- rcsb_mcp-0.1.0/.github/workflows/publish.yaml +125 -0
- rcsb_mcp-0.1.0/.gitignore +10 -0
- rcsb_mcp-0.1.0/AGENTS.md +93 -0
- rcsb_mcp-0.1.0/Dockerfile +30 -0
- rcsb_mcp-0.1.0/LICENSE.md +21 -0
- rcsb_mcp-0.1.0/PKG-INFO +244 -0
- rcsb_mcp-0.1.0/README.md +233 -0
- rcsb_mcp-0.1.0/evals/README.md +81 -0
- rcsb_mcp-0.1.0/evals/rcsb_pdb_eval.xml +65 -0
- rcsb_mcp-0.1.0/k8s/helm/.helmignore +23 -0
- rcsb_mcp-0.1.0/k8s/helm/Chart.yaml +26 -0
- rcsb_mcp-0.1.0/k8s/helm/templates/_helpers.tpl +53 -0
- rcsb_mcp-0.1.0/k8s/helm/templates/deployment.yaml +87 -0
- rcsb_mcp-0.1.0/k8s/helm/templates/hpa.yaml +33 -0
- rcsb_mcp-0.1.0/k8s/helm/templates/ingress.yaml +61 -0
- rcsb_mcp-0.1.0/k8s/helm/templates/service.yaml +14 -0
- rcsb_mcp-0.1.0/k8s/helm/values/production.yaml +69 -0
- rcsb_mcp-0.1.0/k8s/helm/values.yaml +98 -0
- rcsb_mcp-0.1.0/prompts/pdb-assistant.md +98 -0
- rcsb_mcp-0.1.0/pyproject.toml +21 -0
- rcsb_mcp-0.1.0/scripts/generate_chemical_attributes.py +200 -0
- rcsb_mcp-0.1.0/server.json +24 -0
- rcsb_mcp-0.1.0/src/rcsb_mcp/chemical_search_attributes.py +629 -0
- rcsb_mcp-0.1.0/src/rcsb_mcp/graphql_queries.py +617 -0
- rcsb_mcp-0.1.0/src/rcsb_mcp/queries.py +1061 -0
- rcsb_mcp-0.1.0/src/rcsb_mcp/search_attributes.py +7861 -0
- rcsb_mcp-0.1.0/src/rcsb_mcp/server.py +2177 -0
- rcsb_mcp-0.1.0/tests/test_queries.py +481 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Docker build and push workflow
|
|
2
|
+
|
|
3
|
+
name: Run CI/CD Docker Workflow
|
|
4
|
+
|
|
5
|
+
# Reusable workflow — invoked by publish.yaml (and runnable manually). It does
|
|
6
|
+
# not trigger on push directly; the release pipeline calls it via `uses:`.
|
|
7
|
+
on:
|
|
8
|
+
workflow_call:
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
run-workflow:
|
|
13
|
+
name: "Run automated docker workflow"
|
|
14
|
+
if: github.event_name != 'pull_request'
|
|
15
|
+
uses: rcsb/devops-cicd-github-actions/.github/workflows/workflow-docker.yaml@master
|
|
16
|
+
with:
|
|
17
|
+
repo_project: "rcsb" # REQUIRED. The name of the project or organization in the remote Docker image repository.
|
|
18
|
+
docker_image_name: "rcsb-mcp" # REQUIRED. The name of the Docker image to create.
|
|
19
|
+
docker_build_context: "." # The path location of the docker build context, relative to the project root. Defaults to the project root.
|
|
20
|
+
mainline_branch: "master"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
name: CI Pipeline
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [master]
|
|
8
|
+
release:
|
|
9
|
+
types: [published]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
|
|
13
|
+
hatch-test:
|
|
14
|
+
name: Test on Python ${{ matrix.python-version }}
|
|
15
|
+
runs-on: ["self-hosted", "buildchain"]
|
|
16
|
+
timeout-minutes: 20
|
|
17
|
+
strategy:
|
|
18
|
+
matrix:
|
|
19
|
+
python-version: ["3.10"]
|
|
20
|
+
steps:
|
|
21
|
+
- name: Checkout code
|
|
22
|
+
uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
uses: actions/setup-python@v4
|
|
26
|
+
with:
|
|
27
|
+
python-version: ${{ matrix.python-version }}
|
|
28
|
+
|
|
29
|
+
- name: Install Hatch
|
|
30
|
+
run: pip install hatch
|
|
31
|
+
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: hatch test
|
|
34
|
+
|
|
35
|
+
hatch-build:
|
|
36
|
+
name: Build to PyPI
|
|
37
|
+
needs:
|
|
38
|
+
- hatch-test
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
if: github.event_name == 'release'
|
|
41
|
+
steps:
|
|
42
|
+
- name: Checkout code
|
|
43
|
+
uses: actions/checkout@v4
|
|
44
|
+
|
|
45
|
+
- name: Set up Python 3.10
|
|
46
|
+
uses: actions/setup-python@v4
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.10"
|
|
49
|
+
|
|
50
|
+
- name: Install Hatch
|
|
51
|
+
run: pip install hatch
|
|
52
|
+
|
|
53
|
+
- name: Build distribution
|
|
54
|
+
run: hatch build
|
|
55
|
+
|
|
56
|
+
- name: Store the distribution packages
|
|
57
|
+
uses: actions/upload-artifact@v4
|
|
58
|
+
with:
|
|
59
|
+
name: python-package-distributions
|
|
60
|
+
path: dist/
|
|
61
|
+
|
|
62
|
+
push-image:
|
|
63
|
+
needs:
|
|
64
|
+
- hatch-build
|
|
65
|
+
name: Push image to harbor
|
|
66
|
+
uses: ./.github/workflows/_workflow-docker.yaml
|
|
67
|
+
secrets: inherit
|
|
68
|
+
|
|
69
|
+
publish-to-pypi:
|
|
70
|
+
name: >-
|
|
71
|
+
Publish Python 🐍 distribution 📦 to PyPI
|
|
72
|
+
if: github.event_name == 'release'
|
|
73
|
+
needs:
|
|
74
|
+
- hatch-build
|
|
75
|
+
runs-on: ubuntu-latest
|
|
76
|
+
environment:
|
|
77
|
+
name: pypi
|
|
78
|
+
url: https://pypi.org/p/rcsb-mcp
|
|
79
|
+
permissions:
|
|
80
|
+
id-token: write
|
|
81
|
+
|
|
82
|
+
steps:
|
|
83
|
+
- name: Download all the dists
|
|
84
|
+
uses: actions/download-artifact@v4
|
|
85
|
+
with:
|
|
86
|
+
name: python-package-distributions
|
|
87
|
+
path: dist/
|
|
88
|
+
- name: Publish distribution 📦 to PyPI
|
|
89
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
90
|
+
|
|
91
|
+
publish-to-registry:
|
|
92
|
+
name: Publish to MCP Registry
|
|
93
|
+
if: github.event_name == 'release'
|
|
94
|
+
needs:
|
|
95
|
+
# The PyPI package must exist first — the registry validates that the
|
|
96
|
+
# version in server.json is published and carries the mcp-name marker.
|
|
97
|
+
- publish-to-pypi
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
permissions:
|
|
100
|
+
id-token: write # OIDC token for github-oidc auth to the registry
|
|
101
|
+
contents: read
|
|
102
|
+
|
|
103
|
+
steps:
|
|
104
|
+
- name: Checkout code
|
|
105
|
+
uses: actions/checkout@v4
|
|
106
|
+
|
|
107
|
+
- name: Verify server.json version matches the package
|
|
108
|
+
run: |
|
|
109
|
+
server_version=$(jq -r .version server.json)
|
|
110
|
+
pkg_version=$(grep -E '^version *=' pyproject.toml | head -1 | sed -E 's/.*"([^"]+)".*/\1/')
|
|
111
|
+
echo "server.json=$server_version pyproject.toml=$pkg_version"
|
|
112
|
+
if [ "$server_version" != "$pkg_version" ]; then
|
|
113
|
+
echo "::error::server.json version ($server_version) != pyproject.toml ($pkg_version). Bump both before releasing."
|
|
114
|
+
exit 1
|
|
115
|
+
fi
|
|
116
|
+
|
|
117
|
+
- name: Install mcp-publisher
|
|
118
|
+
run: |
|
|
119
|
+
curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz mcp-publisher
|
|
120
|
+
|
|
121
|
+
- name: Authenticate to MCP Registry (GitHub OIDC)
|
|
122
|
+
run: ./mcp-publisher login github-oidc
|
|
123
|
+
|
|
124
|
+
- name: Publish server to MCP Registry
|
|
125
|
+
run: ./mcp-publisher publish
|
rcsb_mcp-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# AGENTS.md — working on the rcsb-mcp repo
|
|
2
|
+
|
|
3
|
+
Guidance for AI coding agents (and humans) modifying this repository. This is an
|
|
4
|
+
MCP server that lets an LLM **interrogate Protein Data Bank structures** across
|
|
5
|
+
three RCSB APIs: Search (REST), Data (GraphQL), and Sequence Coordinates (GraphQL).
|
|
6
|
+
|
|
7
|
+
> The runtime *assistant* persona and output format are **not** here — they live
|
|
8
|
+
> in [`prompts/pdb-assistant.md`](prompts/pdb-assistant.md), which is pasted into a
|
|
9
|
+
> project as a system prompt. Keep that split (see "Two layers" below).
|
|
10
|
+
|
|
11
|
+
## Layout
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
src/rcsb_mcp/
|
|
15
|
+
server.py MCP server: @mcp.tool() tools, HTTP calls, schema introspection
|
|
16
|
+
queries.py PURE request-body builders (no network) + the DATA_OBJECTS registry
|
|
17
|
+
graphql_queries.py Large GraphQL field-selection constants (ENTRY_ANNOTATIONS, ...)
|
|
18
|
+
search_attributes.py SEARCH_ATTRIBUTES catalog (structure search schema)
|
|
19
|
+
chemical_search_attributes.py CHEMICAL_SEARCH_ATTRIBUTES catalog — auto-generated (see scripts/)
|
|
20
|
+
tests/
|
|
21
|
+
test_queries.py Network-free unit tests for the query builders
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Architecture & conventions
|
|
25
|
+
|
|
26
|
+
- **Pure builders vs. I/O.** `queries.py` builds request bodies and contains **no
|
|
27
|
+
network code**, so it stays unit-testable. `server.py` does the HTTP and exposes
|
|
28
|
+
the tools. Keep new query-construction logic in `queries.py`.
|
|
29
|
+
- **The `DATA_OBJECTS` registry** (`queries.py`) drives every Data API `rcsb_get_*` tool:
|
|
30
|
+
one entry per GraphQL root field (root field, id arg, batch/single, default field
|
|
31
|
+
selection). **Adding a Data API object is ideally a one-line registry entry.**
|
|
32
|
+
- **Compact defaults + escape hatches.** Each `rcsb_get_*`/`rcsb_seqcoord_*` tool returns a
|
|
33
|
+
curated compact field selection but accepts a `fields=` override; `rcsb_describe_data_object`
|
|
34
|
+
/ `rcsb_describe_seqcoord_object` introspect the live schema for discovery; `rcsb_data_graphql` /
|
|
35
|
+
`rcsb_seqcoord_graphql` are raw passthroughs. Don't try to make defaults exhaustive.
|
|
36
|
+
- **Server `instructions` = tool-usage guidance only** (routing, chaining, return
|
|
37
|
+
types). Do **not** put application/presentation policy there — it's always-on for
|
|
38
|
+
every client. That belongs in the project prompt.
|
|
39
|
+
|
|
40
|
+
## Dev workflow
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Unit tests (no network) — run after touching queries.py / graphql_queries.py
|
|
44
|
+
hatch test # or: python tests/test_queries.py
|
|
45
|
+
|
|
46
|
+
# Syntax check both core modules
|
|
47
|
+
python -m py_compile src/rcsb_mcp/server.py src/rcsb_mcp/queries.py
|
|
48
|
+
|
|
49
|
+
# Run the server over stdio (entry point: rcsb_mcp.server:main, console script `rcsb-mcp`)
|
|
50
|
+
python -m rcsb_mcp.server
|
|
51
|
+
|
|
52
|
+
# Inspect interactively
|
|
53
|
+
npx @modelcontextprotocol/inspector python -m rcsb_mcp.server
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
The package is installed editable, so source edits take effect on the next process
|
|
57
|
+
start. The test file lives at `tests/test_queries.py`.
|
|
58
|
+
|
|
59
|
+
## The golden rule: validate against the live API before changing field selections
|
|
60
|
+
|
|
61
|
+
Before editing any default field selection or query body, **run the proposed
|
|
62
|
+
selection against the live endpoint** and confirm it returns data. This is how real
|
|
63
|
+
bugs were caught in this repo (a non-existent `auth_asym_id` field, id case
|
|
64
|
+
sensitivity, `[null]` rows for unknown ids). Pattern:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import asyncio; from rcsb_mcp import server, queries
|
|
68
|
+
body = queries.build_data_query("entries", "4HHB", "rcsb_id <your new fields>")
|
|
69
|
+
print(asyncio.run(server._graphql_field(body, "entries")))
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
After validating, add/adjust the default and re-run `test_queries.py`.
|
|
73
|
+
|
|
74
|
+
## Gotchas
|
|
75
|
+
|
|
76
|
+
- **GraphQL endpoints return HTTP 200 even on query errors** — the error is in the
|
|
77
|
+
`errors` array, not the status code. `_graphql_field` already raises on it.
|
|
78
|
+
- **ID case sensitivity.** Entry/entity/chem ids are upper-cased; group and
|
|
79
|
+
`group_provenance` ids are case-sensitive opaque tokens (the `upper=False` flag in
|
|
80
|
+
`DATA_OBJECTS`). Don't blanket-uppercase.
|
|
81
|
+
- **Unknown ids** are either dropped or returned as `null` depending on the field;
|
|
82
|
+
batch handling filters `None` and reports `not_found`.
|
|
83
|
+
- **Sequence Coordinates: PDB ids must be entity/instance-level** (`4HHB_1`, not
|
|
84
|
+
`4HHB`); only this API cross-references NCBI.
|
|
85
|
+
- **Claude Desktop caches MCP processes.** After code changes, fully quit & relaunch
|
|
86
|
+
(⌘Q) — it does not hot-reload, and stale/duplicate processes have caused confusion.
|
|
87
|
+
|
|
88
|
+
## Two layers (don't merge them)
|
|
89
|
+
|
|
90
|
+
- **Server `instructions`** (in `server.py`) → how to *drive the tools*; reusable
|
|
91
|
+
across every client/project.
|
|
92
|
+
- **`prompts/pdb-assistant.md`** → assistant persona + output format (HTML report,
|
|
93
|
+
columns, conventions); application-specific, pasted as project instructions.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
FROM python:3.11-slim
|
|
3
|
+
|
|
4
|
+
ENV PYTHONUNBUFFERED=1 \
|
|
5
|
+
PIP_NO_CACHE_DIR=1
|
|
6
|
+
|
|
7
|
+
WORKDIR /app
|
|
8
|
+
|
|
9
|
+
# Create an unprivileged user up front (uid/gid 1000 to match the Helm
|
|
10
|
+
# podSecurityContext: runAsUser / runAsGroup / fsGroup = 1000).
|
|
11
|
+
RUN groupadd --gid 1000 appuser \
|
|
12
|
+
&& useradd --create-home --uid 1000 --gid 1000 appuser
|
|
13
|
+
|
|
14
|
+
# Install the package and its dependencies. Copy build metadata + source only
|
|
15
|
+
# (keeps the layer cache friendly and the image lean).
|
|
16
|
+
COPY pyproject.toml README.md ./
|
|
17
|
+
COPY src ./src
|
|
18
|
+
RUN pip install .
|
|
19
|
+
|
|
20
|
+
USER appuser
|
|
21
|
+
|
|
22
|
+
EXPOSE 8080
|
|
23
|
+
|
|
24
|
+
# Liveness: confirm the server is accepting connections on the port.
|
|
25
|
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
|
26
|
+
CMD python -c "import socket,sys; s=socket.socket(); s.settimeout(2); s.connect(('127.0.0.1',8080)); s.close()" || exit 1
|
|
27
|
+
|
|
28
|
+
# Serve the MCP over the streamable-HTTP transport (endpoint: POST /mcp).
|
|
29
|
+
# create_app is the FastMCP streamable-HTTP ASGI app factory (--factory builds it on start).
|
|
30
|
+
CMD ["uvicorn", "rcsb_mcp.server:create_app", "--factory", "--host", "0.0.0.0", "--port", "8080"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 - now, RCSB PDB and contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
rcsb_mcp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rcsb-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An MCP server for interrogating PDB structures — search, inspect, and cross-reference across the RCSB Search, Data, and Sequence Coordinates APIs
|
|
5
|
+
License-File: LICENSE.md
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: mcp>=1.2.0
|
|
9
|
+
Requires-Dist: uvicorn>=0.30
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
<!-- mcp-name: io.github.rcsb/rcsb-mcp -->
|
|
13
|
+
|
|
14
|
+
# rcsb-mcp
|
|
15
|
+
|
|
16
|
+
An [MCP](https://modelcontextprotocol.io) server for **interrogating Protein Data
|
|
17
|
+
Bank structures** — discover, inspect, and cross-reference — from LLM clients
|
|
18
|
+
(Claude Desktop, MCP Inspector, Cursor, etc.). It spans three RCSB APIs:
|
|
19
|
+
|
|
20
|
+
- **Discover** — find structures with the [Search API](https://search.rcsb.org)
|
|
21
|
+
(keyword, attribute, sequence, chemistry, 3D shape, motif).
|
|
22
|
+
- **Inspect** — fetch entry / entity / assembly / ligand details and annotations
|
|
23
|
+
from the [Data API](https://data.rcsb.org/graphql).
|
|
24
|
+
- **Relate** — map sequences and positional features across PDB, UniProt, and NCBI
|
|
25
|
+
with the [Sequence Coordinates API](https://sequence-coordinates.rcsb.org/graphql).
|
|
26
|
+
|
|
27
|
+
## Tools
|
|
28
|
+
|
|
29
|
+
### Search (search.rcsb.org)
|
|
30
|
+
|
|
31
|
+
| Tool | What it does |
|
|
32
|
+
|------|--------------|
|
|
33
|
+
| `rcsb_list_pdb_search_attributes` | Discover searchable attribute paths, types, and operators. `schema="structure"` (default, ~677) or `schema="chemical"` (~57: `chem_comp.*`, `drugbank_info.*`, ...). |
|
|
34
|
+
| `rcsb_find_go_terms` | Resolve a free-text molecular function / biological process / cellular component to Gene Ontology ids (via EBI QuickGO), annotated with PDB entry counts — then search by `rcsb_polymer_entity_annotation.annotation_lineage.id`. |
|
|
35
|
+
| `rcsb_find_interpro_domains` | Resolve a free-text protein domain / family / fold to InterPro ids (via EBI InterPro API), annotated with PDB entry counts — then search by `rcsb_polymer_entity_annotation.annotation_id`. |
|
|
36
|
+
| `rcsb_find_enzyme_classes` | Resolve a free-text enzyme / reaction to Enzyme Commission (EC) numbers (via EBI Search/IntEnz), annotated with PDB entry counts — then search by `rcsb_polymer_entity.rcsb_ec_lineage.id` (hierarchical). |
|
|
37
|
+
| `rcsb_find_disease_terms` | Resolve a free-text disease / condition to MONDO ids (via EBI OLS), annotated with PDB entry counts — then search by `rcsb_uniprot_annotation.annotation_lineage.id` (hierarchical, UniProt-based). |
|
|
38
|
+
| `rcsb_find_organisms` | Resolve a free-text organism / common name / clade to NCBI Taxonomy ids (via UniProt taxonomy), annotated with PDB entry counts — then search by `rcsb_entity_source_organism.taxonomy_lineage.id` (hierarchical: a clade id matches every organism beneath it). |
|
|
39
|
+
| `rcsb_search_fulltext` | Free-text keyword search (e.g. `"CRISPR Cas9"`). |
|
|
40
|
+
| `rcsb_search_by_attribute` | Structured search on an indexed attribute (resolution, organism, release date, ...). Supports `exists`, `negation`, `case_sensitive`, and `chemical=True` (text_chem). |
|
|
41
|
+
| `rcsb_search_combined` | Combine free text + multiple attribute filters (AND/OR) in one query, with optional sort. |
|
|
42
|
+
| `rcsb_search_count` | Return only the **number** of matches — for "how many ..." questions. |
|
|
43
|
+
| `rcsb_search_facets` | Aggregate matches into buckets/statistics (terms, histogram, date_histogram, range, cardinality) — for "distribution / breakdown / per X" questions. |
|
|
44
|
+
| `rcsb_search_by_sequence` | MMseqs2 sequence-similarity search (BLAST-like). |
|
|
45
|
+
| `rcsb_search_by_chemical` | Chemical search by SMILES/InChI descriptor (whole-molecule or substructure) or molecular formula. |
|
|
46
|
+
| `rcsb_search_by_structure` | 3D shape-similarity search against a reference PDB assembly or chain. |
|
|
47
|
+
| `rcsb_search_by_seqmotif` | Short **sequence**-motif search (PROSITE pattern, regex, or simple wildcards). |
|
|
48
|
+
| `rcsb_search_strucmotif` | 3D **structural**-motif search: structures sharing a geometric arrangement of specific residues (e.g. a catalytic triad). |
|
|
49
|
+
| `rcsb_search_advanced` | Escape hatch: run a raw Search API query body (`return_all_hits`, grouped results, deeply nested boolean queries, ...). |
|
|
50
|
+
|
|
51
|
+
The three text tools (`rcsb_search_fulltext`, `rcsb_search_by_attribute`, `rcsb_search_combined`)
|
|
52
|
+
also take `group_by_identity` (100/95/90/70/50/30) to return one representative
|
|
53
|
+
per sequence-identity cluster — i.e. non-redundant results. To search
|
|
54
|
+
chemical-component attributes, find the path with
|
|
55
|
+
`rcsb_list_pdb_search_attributes(schema="chemical")`, then pass `chemical=True` to
|
|
56
|
+
`rcsb_search_by_attribute` / `rcsb_search_combined` (usually with `return_type="mol_definition"`).
|
|
57
|
+
The chemical catalog is generated from the live metadata schema by
|
|
58
|
+
[`scripts/generate_chemical_attributes.py`](scripts/generate_chemical_attributes.py).
|
|
59
|
+
|
|
60
|
+
**Paging.** Every search tool that returns hits accepts `limit` (1–100, default
|
|
61
|
+
10) and `offset` (default 0). Each response reports `total_count`, `has_more`,
|
|
62
|
+
and `next_offset`; to fetch the next page, call the tool again with the same
|
|
63
|
+
query and `offset` set to the returned `next_offset`.
|
|
64
|
+
|
|
65
|
+
### Data (data.rcsb.org/graphql)
|
|
66
|
+
|
|
67
|
+
There is one tool per Data API GraphQL root field. Each takes a **list of IDs**
|
|
68
|
+
(singular lookups = a one-element list) plus an optional `fields` argument to
|
|
69
|
+
override the curated default selection with your own GraphQL sub-selection.
|
|
70
|
+
Unknown IDs are reported under `not_found`.
|
|
71
|
+
|
|
72
|
+
| Tool | Object | Example ID |
|
|
73
|
+
|------|--------|----------------------------------|
|
|
74
|
+
| `rcsb_get_entries` | PDB entries | `"4HHB"` |
|
|
75
|
+
| `rcsb_get_entry_annotations` | Entry biological/functional annotations (GO, domains, disease, ...) | `"4HHB"` |
|
|
76
|
+
| `rcsb_get_entry_exp_info` | Entry experimental conditions / determination metadata | `"4HHB"` |
|
|
77
|
+
| `rcsb_get_polymer_entities` | Polymer entities (protein/NA) | `"4HHB_1"` |
|
|
78
|
+
| `rcsb_get_nonpolymer_entities` | Ligand/cofactor entities | `"4HHB_3"` |
|
|
79
|
+
| `rcsb_get_branched_entities` | Carbohydrate entities | `"5FMB_2"` |
|
|
80
|
+
| `rcsb_get_polymer_entity_instances` | Polymer chains | `"4HHB.A"` |
|
|
81
|
+
| `rcsb_get_nonpolymer_entity_instances` | Bound-ligand instances | `"4HHB.E"` |
|
|
82
|
+
| `rcsb_get_branched_entity_instances` | Glycan chains | `"5FMB.C"` |
|
|
83
|
+
| `rcsb_get_assemblies` | Biological assemblies | `"4HHB-1"` |
|
|
84
|
+
| `rcsb_get_interfaces` | Assembly interfaces | `"1BMV-1.1"` |
|
|
85
|
+
| `rcsb_get_chem_comps` | Chemical components / ligands | `"HEM"`, `"ATP"` |
|
|
86
|
+
| `rcsb_get_entry_groups` | Entry groups | `"G_1002266"` |
|
|
87
|
+
| `rcsb_get_polymer_entity_groups` | Polymer entity groups (seq. clusters) | `"85_70"` |
|
|
88
|
+
| `rcsb_get_nonpolymer_entity_groups` | Non-polymer entity groups | `"ATP"` |
|
|
89
|
+
| `rcsb_get_uniprot` | UniProt record (single) | `"P69905"` |
|
|
90
|
+
| `rcsb_get_pubmed` | PubMed record (single, integer) | `6726807` |
|
|
91
|
+
| `rcsb_get_group_provenance` | Grouping provenance (single) | `"provenance_sequence_identity"` |
|
|
92
|
+
| `rcsb_data_graphql` | Escape hatch: run any GraphQL query against the Data API. | — |
|
|
93
|
+
|
|
94
|
+
The Search API only returns identifiers, so the search tools optionally
|
|
95
|
+
**enrich** entry hits with metadata. Enrichment and all Data API tools query
|
|
96
|
+
the GraphQL endpoint, batching every requested ID into one request. All 18
|
|
97
|
+
typed tools are generated from a single registry in
|
|
98
|
+
[`queries.py`](src/rcsb_mcp/queries.py) (`DATA_OBJECTS`), so adding a field or
|
|
99
|
+
endpoint is a one-line change.
|
|
100
|
+
|
|
101
|
+
### Sequence Coordinates (sequence-coordinates.rcsb.org/graphql)
|
|
102
|
+
|
|
103
|
+
Maps alignments and positional annotations between sequence reference systems
|
|
104
|
+
(`UNIPROT`, `NCBI_PROTEIN`, `NCBI_GENOME`, `PDB_ENTITY`, `PDB_INSTANCE`). Each
|
|
105
|
+
tool takes an optional `fields` argument to override the default selection; use
|
|
106
|
+
`rcsb_describe_seqcoord_object` to discover what fields are available.
|
|
107
|
+
|
|
108
|
+
This is the **only** RCSB API that cross-references **NCBI** (RefSeq protein /
|
|
109
|
+
genome) — the Data API only knows UniProt. So "what NCBI proteins map to a PDB
|
|
110
|
+
structure?" is answered by `rcsb_seqcoord_alignments`, not the Data API. PDB query
|
|
111
|
+
ids must be **entity-level** (`4HHB_1`), not a bare entry (`4HHB`); for a whole
|
|
112
|
+
entry, query each polymer entity.
|
|
113
|
+
|
|
114
|
+
| Tool | What it does |
|
|
115
|
+
|------|--------------|
|
|
116
|
+
| `rcsb_seqcoord_alignments` | Cross-reference a sequence across PDB / UniProt / NCBI with aligned ranges (e.g. `4HHB_1` → NCBI proteins `NP_000508`, `NP_000549`). |
|
|
117
|
+
| `rcsb_seqcoord_annotations` | Positional features for one sequence, from one or more annotation `sources` (`UNIPROT`, `PDB_ENTITY`, `PDB_INSTANCE`, `PDB_INTERFACE`). |
|
|
118
|
+
| `rcsb_seqcoord_group_alignments` | Alignments among members of a sequence group (`MATCHING_UNIPROT_ACCESSION` / `SEQUENCE_IDENTITY`). |
|
|
119
|
+
| `rcsb_seqcoord_group_annotations` | Annotations across a group; `summary=True` returns a positional summary. |
|
|
120
|
+
| `rcsb_seqcoord_graphql` | Escape hatch: run any GraphQL query against the Sequence Coordinates API. |
|
|
121
|
+
| `rcsb_describe_seqcoord_object` | Introspect the live schema to discover fields available on a seqcoord object (for use with `fields=`). |
|
|
122
|
+
|
|
123
|
+
## Install
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# run the published package without installing (recommended for clients)
|
|
127
|
+
uvx rcsb-mcp
|
|
128
|
+
# or install it
|
|
129
|
+
pip install rcsb-mcp
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
`rcsb-mcp` is listed in the [Official MCP Registry](https://registry.modelcontextprotocol.io)
|
|
133
|
+
as `io.github.rcsb/rcsb-mcp`, so registry-aware clients can discover it directly.
|
|
134
|
+
|
|
135
|
+
For local development, install from the project root instead:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pip install -e .
|
|
139
|
+
# or with uv
|
|
140
|
+
uv pip install -e .
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Run / test
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# unit tests (no network)
|
|
147
|
+
hatch test # or: python tests/test_queries.py
|
|
148
|
+
|
|
149
|
+
# run the server over stdio
|
|
150
|
+
python -m rcsb_mcp.server
|
|
151
|
+
# or, after install:
|
|
152
|
+
rcsb-mcp
|
|
153
|
+
|
|
154
|
+
# inspect interactively
|
|
155
|
+
npx @modelcontextprotocol/inspector python -m rcsb_mcp.server
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
There is also an end-to-end **evaluation suite** ([`evals/`](evals/)) — 10
|
|
159
|
+
read-only, stable questions that measure how well an LLM can drive these tools to
|
|
160
|
+
answer real PDB questions. See [`evals/README.md`](evals/README.md) to run it.
|
|
161
|
+
|
|
162
|
+
## Connect to Claude Desktop
|
|
163
|
+
|
|
164
|
+
Edit `claude_desktop_config.json`:
|
|
165
|
+
- macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
166
|
+
- Windows: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
167
|
+
|
|
168
|
+
```json
|
|
169
|
+
{
|
|
170
|
+
"mcpServers": {
|
|
171
|
+
"rcsb-mcp": {
|
|
172
|
+
"command": "uvx",
|
|
173
|
+
"args": ["rcsb-mcp"]
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
For a local source checkout, point at the module instead:
|
|
180
|
+
|
|
181
|
+
```json
|
|
182
|
+
{
|
|
183
|
+
"mcpServers": {
|
|
184
|
+
"rcsb-mcp": {
|
|
185
|
+
"command": "python",
|
|
186
|
+
"args": ["-m", "rcsb_mcp.server"],
|
|
187
|
+
"cwd": "/absolute/path/to/rcsb-mcp/src"
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Restart Claude Desktop. The tools appear under the connectors (plug) icon.
|
|
194
|
+
|
|
195
|
+
## Example prompts
|
|
196
|
+
|
|
197
|
+
- "Find high-resolution human hemoglobin structures." → `rcsb_search_by_attribute` + `rcsb_search_fulltext`
|
|
198
|
+
- "Human hemoglobin structures better than 2 Å, best resolution first." → `rcsb_search_combined`
|
|
199
|
+
- "What PDB entries match this protein sequence: MTEY..." → `rcsb_search_by_sequence`
|
|
200
|
+
- "Find structures containing a ligand like this SMILES / with formula C8H9NO2." → `rcsb_search_by_chemical`
|
|
201
|
+
- "Which structures have a 3D fold similar to 4HHB?" → `rcsb_search_by_structure`
|
|
202
|
+
- "Find proteins with a zinc-finger motif." → `rcsb_search_by_seqmotif`
|
|
203
|
+
- "Structures of proteins with kinase activity / involved in DNA repair / in the mitochondrial membrane." → `rcsb_find_go_terms` → `rcsb_search_by_attribute` on `rcsb_polymer_entity_annotation.annotation_lineage.id`
|
|
204
|
+
- "Structures containing an SH2 domain / immunoglobulin fold." → `rcsb_find_interpro_domains` → `rcsb_search_by_attribute` on `rcsb_polymer_entity_annotation.annotation_id`
|
|
205
|
+
- "Alcohol dehydrogenase structures / any EC 3.4.21 serine protease." → `rcsb_find_enzyme_classes` → `rcsb_search_by_attribute` on `rcsb_polymer_entity.rcsb_ec_lineage.id`
|
|
206
|
+
- "Structures of proteins associated with cystic fibrosis / breast cancer." → `rcsb_find_disease_terms` → `rcsb_search_by_attribute` on `rcsb_uniprot_annotation.annotation_lineage.id`
|
|
207
|
+
- "Structures from mammals / from a particular organism or clade." → `rcsb_find_organisms` → `rcsb_search_by_attribute` on `rcsb_entity_source_organism.taxonomy_lineage.id`
|
|
208
|
+
- "Non-redundant human kinase structures (90% identity clusters)." → `rcsb_search_fulltext` / `rcsb_search_combined` with `group_by_identity=90`
|
|
209
|
+
- "How many human X-ray structures are there?" → `rcsb_search_count`
|
|
210
|
+
- "Break down ribosome structures by experimental method / by release year." → `rcsb_search_facets`
|
|
211
|
+
- "Find structures with the same catalytic-site geometry as residues 162/193/219 of 2MNR." → `rcsb_search_strucmotif`
|
|
212
|
+
- "Find chemical components under 150 Da." → `rcsb_list_pdb_search_attributes(schema="chemical")` + `rcsb_search_by_attribute` with `chemical=True`
|
|
213
|
+
- "Summarize PDB entries 4HHB, 1MBN and 6VXX." → `rcsb_get_entries`
|
|
214
|
+
- "What's the sequence and organism of entity 4HHB_1?" → `rcsb_get_polymer_entities`
|
|
215
|
+
- "Tell me about the ligand HEM." → `rcsb_get_chem_comps`
|
|
216
|
+
- "What's the composition of the 4HHB biological assembly?" → `rcsb_get_assemblies`
|
|
217
|
+
- "Which PDB entries does P69905 map to?" → `rcsb_get_uniprot`
|
|
218
|
+
- "Which PDB entities align to UniProt P69905, and over what ranges?" → `rcsb_seqcoord_alignments`
|
|
219
|
+
- "What NCBI proteins map to 4HHB?" → `rcsb_seqcoord_alignments` per entity (`4HHB_1`, `4HHB_2`), `to_ref=NCBI_PROTEIN`
|
|
220
|
+
- "Show UniProt features mapped onto PDB entity 4HHB_1." → `rcsb_seqcoord_annotations`
|
|
221
|
+
- "Pull a field GraphQL doesn't expose by default / combine objects." → `rcsb_data_graphql`
|
|
222
|
+
|
|
223
|
+
## Notes
|
|
224
|
+
|
|
225
|
+
- Search endpoint: `https://search.rcsb.org/rcsbsearch/v2/query` (POST, JSON body).
|
|
226
|
+
- Data endpoint: `https://data.rcsb.org/graphql` (POST, GraphQL). It returns
|
|
227
|
+
HTTP 200 even for query errors, reporting them in an `errors` array.
|
|
228
|
+
- Sequence Coordinates endpoint: `https://sequence-coordinates.rcsb.org/graphql`
|
|
229
|
+
(POST, GraphQL; same HTTP-200-with-`errors` behavior).
|
|
230
|
+
- The `rcsb_find_*` resolvers map free text to ontology ids via EBI services — the non-RCSB
|
|
231
|
+
dependencies: GO via QuickGO (`.../QuickGO/services/ontology/go/search`), InterPro
|
|
232
|
+
(`.../interpro/api/entry/interpro/`), EC via EBI Search/IntEnz (`.../ebisearch/ws/rest/intenz`),
|
|
233
|
+
and disease via OLS/MONDO (`.../ols4/api/search?ontology=mondo`). The resolved ids then drive
|
|
234
|
+
RCSB annotation searches (`rcsb_polymer_entity_annotation.*`, `rcsb_polymer_entity.rcsb_ec_lineage.id`,
|
|
235
|
+
`rcsb_uniprot_annotation.annotation_lineage.id`).
|
|
236
|
+
- No API key required; the APIs are public. Be considerate with request volume.
|
|
237
|
+
- A full list of searchable attributes for `rcsb_search_by_attribute` is in the
|
|
238
|
+
[Search API attribute reference](https://search.rcsb.org/structure-search-attributes.html);
|
|
239
|
+
the Data API schema is documented at
|
|
240
|
+
[data.rcsb.org/index.html#gql-api](https://data.rcsb.org/index.html#gql-api).
|
|
241
|
+
|
|
242
|
+
## Instructions prompt
|
|
243
|
+
|
|
244
|
+
Use [prompts/pdb-assistant.md](./prompts/pdb-assistant.md) as the instruction prompt for your project.
|