rcsb-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ .git
2
+ .gitignore
3
+ .idea
4
+ .claude
5
+ __pycache__/
6
+ *.py[cod]
7
+ *.egg-info/
8
+ .venv/
9
+ venv/
10
+ .DS_Store
11
+ prompts/
12
+ scripts/
13
+ Dockerfile
14
+ .dockerignore
@@ -0,0 +1,20 @@
1
+ # Docker build and push workflow
2
+
3
+ name: Run CI/CD Docker Workflow
4
+
5
+ # Reusable workflow — invoked by publish.yaml (and runnable manually). It does
6
+ # not trigger on push directly; the release pipeline calls it via `uses:`.
7
+ on:
8
+ workflow_call:
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ run-workflow:
13
+ name: "Run automated docker workflow"
14
+ if: github.event_name != 'pull_request'
15
+ uses: rcsb/devops-cicd-github-actions/.github/workflows/workflow-docker.yaml@master
16
+ with:
17
+ repo_project: "rcsb" # REQUIRED. The name of the project or organization in the remote Docker image repository.
18
+ docker_image_name: "rcsb-mcp" # REQUIRED. The name of the Docker image to create.
19
+ docker_build_context: "." # The path location of the docker build context, relative to the project root. Defaults to the project root.
20
+ mainline_branch: "master"
@@ -0,0 +1,125 @@
1
+ name: CI Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+ branches: [master]
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+
13
+ hatch-test:
14
+ name: Test on Python ${{ matrix.python-version }}
15
+ runs-on: ["self-hosted", "buildchain"]
16
+ timeout-minutes: 20
17
+ strategy:
18
+ matrix:
19
+ python-version: ["3.10"]
20
+ steps:
21
+ - name: Checkout code
22
+ uses: actions/checkout@v4
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v4
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+
29
+ - name: Install Hatch
30
+ run: pip install hatch
31
+
32
+ - name: Run tests
33
+ run: hatch test
34
+
35
+ hatch-build:
36
+ name: Build to PyPI
37
+ needs:
38
+ - hatch-test
39
+ runs-on: ubuntu-latest
40
+ if: github.event_name == 'release'
41
+ steps:
42
+ - name: Checkout code
43
+ uses: actions/checkout@v4
44
+
45
+ - name: Set up Python 3.10
46
+ uses: actions/setup-python@v4
47
+ with:
48
+ python-version: "3.10"
49
+
50
+ - name: Install Hatch
51
+ run: pip install hatch
52
+
53
+ - name: Build distribution
54
+ run: hatch build
55
+
56
+ - name: Store the distribution packages
57
+ uses: actions/upload-artifact@v4
58
+ with:
59
+ name: python-package-distributions
60
+ path: dist/
61
+
62
+ push-image:
63
+ needs:
64
+ - hatch-build
65
+ name: Push image to harbor
66
+ uses: ./.github/workflows/_workflow-docker.yaml
67
+ secrets: inherit
68
+
69
+ publish-to-pypi:
70
+ name: >-
71
+ Publish Python 🐍 distribution 📦 to PyPI
72
+ if: github.event_name == 'release'
73
+ needs:
74
+ - hatch-build
75
+ runs-on: ubuntu-latest
76
+ environment:
77
+ name: pypi
78
+ url: https://pypi.org/p/rcsb-mcp
79
+ permissions:
80
+ id-token: write
81
+
82
+ steps:
83
+ - name: Download all the dists
84
+ uses: actions/download-artifact@v4
85
+ with:
86
+ name: python-package-distributions
87
+ path: dist/
88
+ - name: Publish distribution 📦 to PyPI
89
+ uses: pypa/gh-action-pypi-publish@release/v1
90
+
91
+ publish-to-registry:
92
+ name: Publish to MCP Registry
93
+ if: github.event_name == 'release'
94
+ needs:
95
+ # The PyPI package must exist first — the registry validates that the
96
+ # version in server.json is published and carries the mcp-name marker.
97
+ - publish-to-pypi
98
+ runs-on: ubuntu-latest
99
+ permissions:
100
+ id-token: write # OIDC token for github-oidc auth to the registry
101
+ contents: read
102
+
103
+ steps:
104
+ - name: Checkout code
105
+ uses: actions/checkout@v4
106
+
107
+ - name: Verify server.json version matches the package
108
+ run: |
109
+ server_version=$(jq -r .version server.json)
110
+ pkg_version=$(grep -E '^version *=' pyproject.toml | head -1 | sed -E 's/.*"([^"]+)".*/\1/')
111
+ echo "server.json=$server_version pyproject.toml=$pkg_version"
112
+ if [ "$server_version" != "$pkg_version" ]; then
113
+ echo "::error::server.json version ($server_version) != pyproject.toml ($pkg_version). Bump both before releasing."
114
+ exit 1
115
+ fi
116
+
117
+ - name: Install mcp-publisher
118
+ run: |
119
+ curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz mcp-publisher
120
+
121
+ - name: Authenticate to MCP Registry (GitHub OIDC)
122
+ run: ./mcp-publisher login github-oidc
123
+
124
+ - name: Publish server to MCP Registry
125
+ run: ./mcp-publisher publish
@@ -0,0 +1,10 @@
1
+ /.claude/
2
+ /.idea/
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *.egg-info/
8
+ .pytest_cache/
9
+ build/
10
+ dist/
@@ -0,0 +1,93 @@
1
+ # AGENTS.md — working on the rcsb-mcp repo
2
+
3
+ Guidance for AI coding agents (and humans) modifying this repository. This is an
4
+ MCP server that lets an LLM **interrogate Protein Data Bank structures** across
5
+ three RCSB APIs: Search (REST), Data (GraphQL), and Sequence Coordinates (GraphQL).
6
+
7
+ > The runtime *assistant* persona and output format are **not** here — they live
8
+ > in [`prompts/pdb-assistant.md`](prompts/pdb-assistant.md), which is pasted into a
9
+ > project as a system prompt. Keep that split (see "Two layers" below).
10
+
11
+ ## Layout
12
+
13
+ ```
14
+ src/rcsb_mcp/
15
+ server.py MCP server: @mcp.tool() tools, HTTP calls, schema introspection
16
+ queries.py PURE request-body builders (no network) + the DATA_OBJECTS registry
17
+ graphql_queries.py Large GraphQL field-selection constants (ENTRY_ANNOTATIONS, ...)
18
+ search_attributes.py SEARCH_ATTRIBUTES catalog (structure search schema)
19
+ chemical_search_attributes.py CHEMICAL_SEARCH_ATTRIBUTES catalog — auto-generated (see scripts/)
20
+ tests/
21
+ test_queries.py Network-free unit tests for the query builders
22
+ ```
23
+
24
+ ## Architecture & conventions
25
+
26
+ - **Pure builders vs. I/O.** `queries.py` builds request bodies and contains **no
27
+ network code**, so it stays unit-testable. `server.py` does the HTTP and exposes
28
+ the tools. Keep new query-construction logic in `queries.py`.
29
+ - **The `DATA_OBJECTS` registry** (`queries.py`) drives every Data API `rcsb_get_*` tool:
30
+ one entry per GraphQL root field (root field, id arg, batch/single, default field
31
+ selection). **Adding a Data API object is ideally a one-line registry entry.**
32
+ - **Compact defaults + escape hatches.** Each `rcsb_get_*`/`rcsb_seqcoord_*` tool returns a
33
+ curated compact field selection but accepts a `fields=` override; `rcsb_describe_data_object`
34
+ / `rcsb_describe_seqcoord_object` introspect the live schema for discovery; `rcsb_data_graphql` /
35
+ `rcsb_seqcoord_graphql` are raw passthroughs. Don't try to make defaults exhaustive.
36
+ - **Server `instructions` = tool-usage guidance only** (routing, chaining, return
37
+ types). Do **not** put application/presentation policy there — it's always-on for
38
+ every client. That belongs in the project prompt.
39
+
40
+ ## Dev workflow
41
+
42
+ ```bash
43
+ # Unit tests (no network) — run after touching queries.py / graphql_queries.py
44
+ hatch test # or: python tests/test_queries.py
45
+
46
+ # Syntax check both core modules
47
+ python -m py_compile src/rcsb_mcp/server.py src/rcsb_mcp/queries.py
48
+
49
+ # Run the server over stdio (entry point: rcsb_mcp.server:main, console script `rcsb-mcp`)
50
+ python -m rcsb_mcp.server
51
+
52
+ # Inspect interactively
53
+ npx @modelcontextprotocol/inspector python -m rcsb_mcp.server
54
+ ```
55
+
56
+ The package is installed editable, so source edits take effect on the next process
57
+ start. The test file lives at `tests/test_queries.py`.
58
+
59
+ ## The golden rule: validate against the live API before changing field selections
60
+
61
+ Before editing any default field selection or query body, **run the proposed
62
+ selection against the live endpoint** and confirm it returns data. This is how real
63
+ bugs were caught in this repo (a non-existent `auth_asym_id` field, id case
64
+ sensitivity, `[null]` rows for unknown ids). Pattern:
65
+
66
+ ```python
67
+ import asyncio; from rcsb_mcp import server, queries
68
+ body = queries.build_data_query("entries", "4HHB", "rcsb_id <your new fields>")
69
+ print(asyncio.run(server._graphql_field(body, "entries")))
70
+ ```
71
+
72
+ After validating, add/adjust the default and re-run `test_queries.py`.
73
+
74
+ ## Gotchas
75
+
76
+ - **GraphQL endpoints return HTTP 200 even on query errors** — the error is in the
77
+ `errors` array, not the status code. `_graphql_field` already raises on it.
78
+ - **ID case sensitivity.** Entry/entity/chem ids are upper-cased; group and
79
+ `group_provenance` ids are case-sensitive opaque tokens (the `upper=False` flag in
80
+ `DATA_OBJECTS`). Don't blanket-uppercase.
81
+ - **Unknown ids** are either dropped or returned as `null` depending on the field;
82
+ batch handling filters `None` and reports `not_found`.
83
+ - **Sequence Coordinates: PDB ids must be entity/instance-level** (`4HHB_1`, not
84
+ `4HHB`); only this API cross-references NCBI.
85
+ - **Claude Desktop caches MCP processes.** After code changes, fully quit & relaunch
86
+ (⌘Q) — it does not hot-reload, and stale/duplicate processes have caused confusion.
87
+
88
+ ## Two layers (don't merge them)
89
+
90
+ - **Server `instructions`** (in `server.py`) → how to *drive the tools*; reusable
91
+ across every client/project.
92
+ - **`prompts/pdb-assistant.md`** → assistant persona + output format (HTML report,
93
+ columns, conventions); application-specific, pasted as project instructions.
@@ -0,0 +1,30 @@
1
+ # syntax=docker/dockerfile:1
2
+ FROM python:3.11-slim
3
+
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1
6
+
7
+ WORKDIR /app
8
+
9
+ # Create an unprivileged user up front (uid/gid 1000 to match the Helm
10
+ # podSecurityContext: runAsUser / runAsGroup / fsGroup = 1000).
11
+ RUN groupadd --gid 1000 appuser \
12
+ && useradd --create-home --uid 1000 --gid 1000 appuser
13
+
14
+ # Install the package and its dependencies. Copy build metadata + source only
15
+ # (keeps the layer cache friendly and the image lean).
16
+ COPY pyproject.toml README.md ./
17
+ COPY src ./src
18
+ RUN pip install .
19
+
20
+ USER appuser
21
+
22
+ EXPOSE 8080
23
+
24
+ # Liveness: confirm the server is accepting connections on the port.
25
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
26
+ CMD python -c "import socket,sys; s=socket.socket(); s.settimeout(2); s.connect(('127.0.0.1',8080)); s.close()" || exit 1
27
+
28
+ # Serve the MCP over the streamable-HTTP transport (endpoint: POST /mcp).
29
+ # create_app is the FastMCP streamable-HTTP ASGI app factory (--factory builds it on start).
30
+ CMD ["uvicorn", "rcsb_mcp.server:create_app", "--factory", "--host", "0.0.0.0", "--port", "8080"]
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2026 - now, RCSB PDB and contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,244 @@
1
+ Metadata-Version: 2.4
2
+ Name: rcsb-mcp
3
+ Version: 0.1.0
4
+ Summary: An MCP server for interrogating PDB structures — search, inspect, and cross-reference across the RCSB Search, Data, and Sequence Coordinates APIs
5
+ License-File: LICENSE.md
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: httpx>=0.27
8
+ Requires-Dist: mcp>=1.2.0
9
+ Requires-Dist: uvicorn>=0.30
10
+ Description-Content-Type: text/markdown
11
+
12
+ <!-- mcp-name: io.github.rcsb/rcsb-mcp -->
13
+
14
+ # rcsb-mcp
15
+
16
+ An [MCP](https://modelcontextprotocol.io) server for **interrogating Protein Data
17
+ Bank structures** — discover, inspect, and cross-reference — from LLM clients
18
+ (Claude Desktop, MCP Inspector, Cursor, etc.). It spans three RCSB APIs:
19
+
20
+ - **Discover** — find structures with the [Search API](https://search.rcsb.org)
21
+ (keyword, attribute, sequence, chemistry, 3D shape, motif).
22
+ - **Inspect** — fetch entry / entity / assembly / ligand details and annotations
23
+ from the [Data API](https://data.rcsb.org/graphql).
24
+ - **Relate** — map sequences and positional features across PDB, UniProt, and NCBI
25
+ with the [Sequence Coordinates API](https://sequence-coordinates.rcsb.org/graphql).
26
+
27
+ ## Tools
28
+
29
+ ### Search (search.rcsb.org)
30
+
31
+ | Tool | What it does |
32
+ |------|--------------|
33
+ | `rcsb_list_pdb_search_attributes` | Discover searchable attribute paths, types, and operators. `schema="structure"` (default, ~677) or `schema="chemical"` (~57: `chem_comp.*`, `drugbank_info.*`, ...). |
34
+ | `rcsb_find_go_terms` | Resolve a free-text molecular function / biological process / cellular component to Gene Ontology ids (via EBI QuickGO), annotated with PDB entry counts — then search by `rcsb_polymer_entity_annotation.annotation_lineage.id`. |
35
+ | `rcsb_find_interpro_domains` | Resolve a free-text protein domain / family / fold to InterPro ids (via EBI InterPro API), annotated with PDB entry counts — then search by `rcsb_polymer_entity_annotation.annotation_id`. |
36
+ | `rcsb_find_enzyme_classes` | Resolve a free-text enzyme / reaction to Enzyme Commission (EC) numbers (via EBI Search/IntEnz), annotated with PDB entry counts — then search by `rcsb_polymer_entity.rcsb_ec_lineage.id` (hierarchical). |
37
+ | `rcsb_find_disease_terms` | Resolve a free-text disease / condition to MONDO ids (via EBI OLS), annotated with PDB entry counts — then search by `rcsb_uniprot_annotation.annotation_lineage.id` (hierarchical, UniProt-based). |
38
+ | `rcsb_find_organisms` | Resolve a free-text organism / common name / clade to NCBI Taxonomy ids (via UniProt taxonomy), annotated with PDB entry counts — then search by `rcsb_entity_source_organism.taxonomy_lineage.id` (hierarchical: a clade id matches every organism beneath it). |
39
+ | `rcsb_search_fulltext` | Free-text keyword search (e.g. `"CRISPR Cas9"`). |
40
+ | `rcsb_search_by_attribute` | Structured search on an indexed attribute (resolution, organism, release date, ...). Supports `exists`, `negation`, `case_sensitive`, and `chemical=True` (text_chem). |
41
+ | `rcsb_search_combined` | Combine free text + multiple attribute filters (AND/OR) in one query, with optional sort. |
42
+ | `rcsb_search_count` | Return only the **number** of matches — for "how many ..." questions. |
43
+ | `rcsb_search_facets` | Aggregate matches into buckets/statistics (terms, histogram, date_histogram, range, cardinality) — for "distribution / breakdown / per X" questions. |
44
+ | `rcsb_search_by_sequence` | MMseqs2 sequence-similarity search (BLAST-like). |
45
+ | `rcsb_search_by_chemical` | Chemical search by SMILES/InChI descriptor (whole-molecule or substructure) or molecular formula. |
46
+ | `rcsb_search_by_structure` | 3D shape-similarity search against a reference PDB assembly or chain. |
47
+ | `rcsb_search_by_seqmotif` | Short **sequence**-motif search (PROSITE pattern, regex, or simple wildcards). |
48
+ | `rcsb_search_strucmotif` | 3D **structural**-motif search: structures sharing a geometric arrangement of specific residues (e.g. a catalytic triad). |
49
+ | `rcsb_search_advanced` | Escape hatch: run a raw Search API query body (`return_all_hits`, grouped results, deeply nested boolean queries, ...). |
50
+
51
+ The three text tools (`rcsb_search_fulltext`, `rcsb_search_by_attribute`, `rcsb_search_combined`)
52
+ also take `group_by_identity` (100/95/90/70/50/30) to return one representative
53
+ per sequence-identity cluster — i.e. non-redundant results. To search
54
+ chemical-component attributes, find the path with
55
+ `rcsb_list_pdb_search_attributes(schema="chemical")`, then pass `chemical=True` to
56
+ `rcsb_search_by_attribute` / `rcsb_search_combined` (usually with `return_type="mol_definition"`).
57
+ The chemical catalog is generated from the live metadata schema by
58
+ [`scripts/generate_chemical_attributes.py`](scripts/generate_chemical_attributes.py).
59
+
60
+ **Paging.** Every search tool that returns hits accepts `limit` (1–100, default
61
+ 10) and `offset` (default 0). Each response reports `total_count`, `has_more`,
62
+ and `next_offset`; to fetch the next page, call the tool again with the same
63
+ query and `offset` set to the returned `next_offset`.
64
+
65
+ ### Data (data.rcsb.org/graphql)
66
+
67
+ There is one tool per Data API GraphQL root field. Each takes a **list of IDs**
68
+ (singular lookups = a one-element list) plus an optional `fields` argument to
69
+ override the curated default selection with your own GraphQL sub-selection.
70
+ Unknown IDs are reported under `not_found`.
71
+
72
+ | Tool | Object | Example ID |
73
+ |------|--------|----------------------------------|
74
+ | `rcsb_get_entries` | PDB entries | `"4HHB"` |
75
+ | `rcsb_get_entry_annotations` | Entry biological/functional annotations (GO, domains, disease, ...) | `"4HHB"` |
76
+ | `rcsb_get_entry_exp_info` | Entry experimental conditions / determination metadata | `"4HHB"` |
77
+ | `rcsb_get_polymer_entities` | Polymer entities (protein/NA) | `"4HHB_1"` |
78
+ | `rcsb_get_nonpolymer_entities` | Ligand/cofactor entities | `"4HHB_3"` |
79
+ | `rcsb_get_branched_entities` | Carbohydrate entities | `"5FMB_2"` |
80
+ | `rcsb_get_polymer_entity_instances` | Polymer chains | `"4HHB.A"` |
81
+ | `rcsb_get_nonpolymer_entity_instances` | Bound-ligand instances | `"4HHB.E"` |
82
+ | `rcsb_get_branched_entity_instances` | Glycan chains | `"5FMB.C"` |
83
+ | `rcsb_get_assemblies` | Biological assemblies | `"4HHB-1"` |
84
+ | `rcsb_get_interfaces` | Assembly interfaces | `"1BMV-1.1"` |
85
+ | `rcsb_get_chem_comps` | Chemical components / ligands | `"HEM"`, `"ATP"` |
86
+ | `rcsb_get_entry_groups` | Entry groups | `"G_1002266"` |
87
+ | `rcsb_get_polymer_entity_groups` | Polymer entity groups (seq. clusters) | `"85_70"` |
88
+ | `rcsb_get_nonpolymer_entity_groups` | Non-polymer entity groups | `"ATP"` |
89
+ | `rcsb_get_uniprot` | UniProt record (single) | `"P69905"` |
90
+ | `rcsb_get_pubmed` | PubMed record (single, integer) | `6726807` |
91
+ | `rcsb_get_group_provenance` | Grouping provenance (single) | `"provenance_sequence_identity"` |
92
+ | `rcsb_data_graphql` | Escape hatch: run any GraphQL query against the Data API. | — |
93
+
94
+ The Search API only returns identifiers, so the search tools optionally
95
+ **enrich** entry hits with metadata. Enrichment and all Data API tools query
96
+ the GraphQL endpoint, batching every requested ID into one request. All 18
97
+ typed tools are generated from a single registry in
98
+ [`queries.py`](src/rcsb_mcp/queries.py) (`DATA_OBJECTS`), so adding a field or
99
+ endpoint is a one-line change.
100
+
101
+ ### Sequence Coordinates (sequence-coordinates.rcsb.org/graphql)
102
+
103
+ Maps alignments and positional annotations between sequence reference systems
104
+ (`UNIPROT`, `NCBI_PROTEIN`, `NCBI_GENOME`, `PDB_ENTITY`, `PDB_INSTANCE`). Each
105
+ tool takes an optional `fields` argument to override the default selection; use
106
+ `rcsb_describe_seqcoord_object` to discover what fields are available.
107
+
108
+ This is the **only** RCSB API that cross-references **NCBI** (RefSeq protein /
109
+ genome) — the Data API only knows UniProt. So "what NCBI proteins map to a PDB
110
+ structure?" is answered by `rcsb_seqcoord_alignments`, not the Data API. PDB query
111
+ ids must be **entity-level** (`4HHB_1`), not a bare entry (`4HHB`); for a whole
112
+ entry, query each polymer entity.
113
+
114
+ | Tool | What it does |
115
+ |------|--------------|
116
+ | `rcsb_seqcoord_alignments` | Cross-reference a sequence across PDB / UniProt / NCBI with aligned ranges (e.g. `4HHB_1` → NCBI proteins `NP_000508`, `NP_000549`). |
117
+ | `rcsb_seqcoord_annotations` | Positional features for one sequence, from one or more annotation `sources` (`UNIPROT`, `PDB_ENTITY`, `PDB_INSTANCE`, `PDB_INTERFACE`). |
118
+ | `rcsb_seqcoord_group_alignments` | Alignments among members of a sequence group (`MATCHING_UNIPROT_ACCESSION` / `SEQUENCE_IDENTITY`). |
119
+ | `rcsb_seqcoord_group_annotations` | Annotations across a group; `summary=True` returns a positional summary. |
120
+ | `rcsb_seqcoord_graphql` | Escape hatch: run any GraphQL query against the Sequence Coordinates API. |
121
+ | `rcsb_describe_seqcoord_object` | Introspect the live schema to discover fields available on a seqcoord object (for use with `fields=`). |
122
+
123
+ ## Install
124
+
125
+ ```bash
126
+ # run the published package without installing (recommended for clients)
127
+ uvx rcsb-mcp
128
+ # or install it
129
+ pip install rcsb-mcp
130
+ ```
131
+
132
+ `rcsb-mcp` is listed in the [Official MCP Registry](https://registry.modelcontextprotocol.io)
133
+ as `io.github.rcsb/rcsb-mcp`, so registry-aware clients can discover it directly.
134
+
135
+ For local development, install from the project root instead:
136
+
137
+ ```bash
138
+ pip install -e .
139
+ # or with uv
140
+ uv pip install -e .
141
+ ```
142
+
143
+ ## Run / test
144
+
145
+ ```bash
146
+ # unit tests (no network)
147
+ hatch test # or: python tests/test_queries.py
148
+
149
+ # run the server over stdio
150
+ python -m rcsb_mcp.server
151
+ # or, after install:
152
+ rcsb-mcp
153
+
154
+ # inspect interactively
155
+ npx @modelcontextprotocol/inspector python -m rcsb_mcp.server
156
+ ```
157
+
158
+ There is also an end-to-end **evaluation suite** ([`evals/`](evals/)) — 10
159
+ read-only, stable questions that measure how well an LLM can drive these tools to
160
+ answer real PDB questions. See [`evals/README.md`](evals/README.md) to run it.
161
+
162
+ ## Connect to Claude Desktop
163
+
164
+ Edit `claude_desktop_config.json`:
165
+ - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
166
+ - Windows: `%APPDATA%\Claude\claude_desktop_config.json`
167
+
168
+ ```json
169
+ {
170
+ "mcpServers": {
171
+ "rcsb-mcp": {
172
+ "command": "uvx",
173
+ "args": ["rcsb-mcp"]
174
+ }
175
+ }
176
+ }
177
+ ```
178
+
179
+ For a local source checkout, point at the module instead:
180
+
181
+ ```json
182
+ {
183
+ "mcpServers": {
184
+ "rcsb-mcp": {
185
+ "command": "python",
186
+ "args": ["-m", "rcsb_mcp.server"],
187
+ "cwd": "/absolute/path/to/rcsb-mcp/src"
188
+ }
189
+ }
190
+ }
191
+ ```
192
+
193
+ Restart Claude Desktop. The tools appear under the connectors (plug) icon.
194
+
195
+ ## Example prompts
196
+
197
+ - "Find high-resolution human hemoglobin structures." → `rcsb_search_by_attribute` + `rcsb_search_fulltext`
198
+ - "Human hemoglobin structures better than 2 Å, best resolution first." → `rcsb_search_combined`
199
+ - "What PDB entries match this protein sequence: MTEY..." → `rcsb_search_by_sequence`
200
+ - "Find structures containing a ligand like this SMILES / with formula C8H9NO2." → `rcsb_search_by_chemical`
201
+ - "Which structures have a 3D fold similar to 4HHB?" → `rcsb_search_by_structure`
202
+ - "Find proteins with a zinc-finger motif." → `rcsb_search_by_seqmotif`
203
+ - "Structures of proteins with kinase activity / involved in DNA repair / in the mitochondrial membrane." → `rcsb_find_go_terms` → `rcsb_search_by_attribute` on `rcsb_polymer_entity_annotation.annotation_lineage.id`
204
+ - "Structures containing an SH2 domain / immunoglobulin fold." → `rcsb_find_interpro_domains` → `rcsb_search_by_attribute` on `rcsb_polymer_entity_annotation.annotation_id`
205
+ - "Alcohol dehydrogenase structures / any EC 3.4.21 serine protease." → `rcsb_find_enzyme_classes` → `rcsb_search_by_attribute` on `rcsb_polymer_entity.rcsb_ec_lineage.id`
206
+ - "Structures of proteins associated with cystic fibrosis / breast cancer." → `rcsb_find_disease_terms` → `rcsb_search_by_attribute` on `rcsb_uniprot_annotation.annotation_lineage.id`
207
+ - "Structures from mammals / from a particular organism or clade." → `rcsb_find_organisms` → `rcsb_search_by_attribute` on `rcsb_entity_source_organism.taxonomy_lineage.id`
208
+ - "Non-redundant human kinase structures (90% identity clusters)." → `rcsb_search_fulltext` / `rcsb_search_combined` with `group_by_identity=90`
209
+ - "How many human X-ray structures are there?" → `rcsb_search_count`
210
+ - "Break down ribosome structures by experimental method / by release year." → `rcsb_search_facets`
211
+ - "Find structures with the same catalytic-site geometry as residues 162/193/219 of 2MNR." → `rcsb_search_strucmotif`
212
+ - "Find chemical components under 150 Da." → `rcsb_list_pdb_search_attributes(schema="chemical")` + `rcsb_search_by_attribute` with `chemical=True`
213
+ - "Summarize PDB entries 4HHB, 1MBN and 6VXX." → `rcsb_get_entries`
214
+ - "What's the sequence and organism of entity 4HHB_1?" → `rcsb_get_polymer_entities`
215
+ - "Tell me about the ligand HEM." → `rcsb_get_chem_comps`
216
+ - "What's the composition of the 4HHB biological assembly?" → `rcsb_get_assemblies`
217
+ - "Which PDB entries does P69905 map to?" → `rcsb_get_uniprot`
218
+ - "Which PDB entities align to UniProt P69905, and over what ranges?" → `rcsb_seqcoord_alignments`
219
+ - "What NCBI proteins map to 4HHB?" → `rcsb_seqcoord_alignments` per entity (`4HHB_1`, `4HHB_2`), `to_ref=NCBI_PROTEIN`
220
+ - "Show UniProt features mapped onto PDB entity 4HHB_1." → `rcsb_seqcoord_annotations`
221
+ - "Pull a field GraphQL doesn't expose by default / combine objects." → `rcsb_data_graphql`
222
+
223
+ ## Notes
224
+
225
+ - Search endpoint: `https://search.rcsb.org/rcsbsearch/v2/query` (POST, JSON body).
226
+ - Data endpoint: `https://data.rcsb.org/graphql` (POST, GraphQL). It returns
227
+ HTTP 200 even for query errors, reporting them in an `errors` array.
228
+ - Sequence Coordinates endpoint: `https://sequence-coordinates.rcsb.org/graphql`
229
+ (POST, GraphQL; same HTTP-200-with-`errors` behavior).
230
+ - The `rcsb_find_*` resolvers map free text to ontology ids via EBI services — the non-RCSB
231
+ dependencies: GO via QuickGO (`.../QuickGO/services/ontology/go/search`), InterPro
232
+ (`.../interpro/api/entry/interpro/`), EC via EBI Search/IntEnz (`.../ebisearch/ws/rest/intenz`),
233
+ and disease via OLS/MONDO (`.../ols4/api/search?ontology=mondo`). The resolved ids then drive
234
+ RCSB annotation searches (`rcsb_polymer_entity_annotation.*`, `rcsb_polymer_entity.rcsb_ec_lineage.id`,
235
+ `rcsb_uniprot_annotation.annotation_lineage.id`).
236
+ - No API key required; the APIs are public. Be considerate with request volume.
237
+ - A full list of searchable attributes for `rcsb_search_by_attribute` is in the
238
+ [Search API attribute reference](https://search.rcsb.org/structure-search-attributes.html);
239
+ the Data API schema is documented at
240
+ [data.rcsb.org/index.html#gql-api](https://data.rcsb.org/index.html#gql-api).
241
+
242
+ ## Instructions prompt
243
+
244
+ Use [prompts/pdb-assistant.md](./prompts/pdb-assistant.md) as the instruction prompt for your project.