ncbi-datasets-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ncbi_datasets_mcp-0.1.0/.github/workflows/ci.yml +90 -0
- ncbi_datasets_mcp-0.1.0/CHANGELOG.md +23 -0
- ncbi_datasets_mcp-0.1.0/LICENSE +1 -0
- ncbi_datasets_mcp-0.1.0/PKG-INFO +145 -0
- ncbi_datasets_mcp-0.1.0/README.md +114 -0
- ncbi_datasets_mcp-0.1.0/build_mcpb.py +81 -0
- ncbi_datasets_mcp-0.1.0/ci.yml +90 -0
- ncbi_datasets_mcp-0.1.0/installer.py +110 -0
- ncbi_datasets_mcp-0.1.0/manifest.json +53 -0
- ncbi_datasets_mcp-0.1.0/pyproject.toml +60 -0
- ncbi_datasets_mcp-0.1.0/scripts/build_mcpb.py +81 -0
- ncbi_datasets_mcp-0.1.0/scripts/gen_enums.py +111 -0
- ncbi_datasets_mcp-0.1.0/server.py +316 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/__init__.py +3 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/installer.py +110 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/locator.py +64 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/runner.py +125 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/config.py +40 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/_generated_enums.py +33 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/common.py +36 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/genome.py +127 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/taxonomy.py +24 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/models/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/models/responses.py +28 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/rest/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/rest/client.py +90 -0
- ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/server.py +315 -0
- ncbi_datasets_mcp-0.1.0/tests/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/tests/conftest.py +32 -0
- ncbi_datasets_mcp-0.1.0/tests/integration/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/tests/integration/test_integration.py +47 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/__init__.py +1 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_installer.py +51 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_locator.py +86 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_runner.py +98 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_domains_genome.py +152 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_domains_taxonomy.py +56 -0
- ncbi_datasets_mcp-0.1.0/tests/unit/test_rest_client.py +116 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags: ["v*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main]
|
|
9
|
+
schedule:
|
|
10
|
+
# Nightly at 06:00 UTC — runs integration tests against live NCBI API
|
|
11
|
+
- cron: "0 6 * * *"
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
inputs:
|
|
14
|
+
run_integration:
|
|
15
|
+
description: "Run integration tests against live NCBI API"
|
|
16
|
+
type: boolean
|
|
17
|
+
default: false
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
lint:
|
|
21
|
+
name: Lint
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
- run: pip install ruff
|
|
29
|
+
- run: ruff check src/ tests/
|
|
30
|
+
|
|
31
|
+
unit-tests:
|
|
32
|
+
name: Unit tests (Python ${{ matrix.python-version }})
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
needs: lint
|
|
35
|
+
strategy:
|
|
36
|
+
fail-fast: false
|
|
37
|
+
matrix:
|
|
38
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: ${{ matrix.python-version }}
|
|
44
|
+
- name: Install
|
|
45
|
+
run: pip install -e ".[dev]"
|
|
46
|
+
- name: Run unit tests
|
|
47
|
+
run: pytest tests/unit/ -v --tb=short
|
|
48
|
+
|
|
49
|
+
integration-tests:
|
|
50
|
+
name: Integration tests (live NCBI API)
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
# Only run on schedule or manual trigger — not on every push/PR
|
|
53
|
+
if: |
|
|
54
|
+
github.event_name == 'schedule' ||
|
|
55
|
+
github.event.inputs.run_integration == 'true'
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
- uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.12"
|
|
61
|
+
- name: Install
|
|
62
|
+
run: pip install -e ".[dev]"
|
|
63
|
+
- name: Run integration tests
|
|
64
|
+
run: pytest -m integration -v --tb=short
|
|
65
|
+
env:
|
|
66
|
+
# Optional: add NCBI_API_KEY as a repo secret for higher rate limits
|
|
67
|
+
NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
|
|
68
|
+
|
|
69
|
+
publish:
|
|
70
|
+
name: Publish to PyPI
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
needs: unit-tests
|
|
73
|
+
# Only publish on version tags pushed to main
|
|
74
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
75
|
+
environment:
|
|
76
|
+
name: pypi
|
|
77
|
+
url: https://pypi.org/project/ncbi-datasets-mcp/
|
|
78
|
+
permissions:
|
|
79
|
+
id-token: write # Required for trusted publishing (no token needed)
|
|
80
|
+
steps:
|
|
81
|
+
- uses: actions/checkout@v4
|
|
82
|
+
- uses: actions/setup-python@v5
|
|
83
|
+
with:
|
|
84
|
+
python-version: "3.12"
|
|
85
|
+
- name: Build
|
|
86
|
+
run: |
|
|
87
|
+
pip install build
|
|
88
|
+
python -m build
|
|
89
|
+
- name: Publish
|
|
90
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented here.
|
|
4
|
+
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
5
|
+
|
|
6
|
+
## [Unreleased]
|
|
7
|
+
|
|
8
|
+
## [0.1.0] — Initial release
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- `ensure_cli` tool: detect and auto-install `datasets`/`dataformat` binaries from NCBI FTP
|
|
12
|
+
- `genome_summary_by_taxon`: search genome assemblies by organism name or tax ID (REST)
|
|
13
|
+
- `genome_summary_by_accession`: fetch metadata for known accessions (REST)
|
|
14
|
+
- `genome_download_by_taxon`: download genome packages by taxon (CLI)
|
|
15
|
+
- `genome_download_by_accession`: download genome packages by accession (CLI)
|
|
16
|
+
- `rehydrate_genome_package`: fetch sequence files for dehydrated packages (CLI)
|
|
17
|
+
- `dataformat_genome_tsv`: convert genome JSONL data reports to TSV (CLI)
|
|
18
|
+
- `taxonomy_summary`: get lineage, rank, and names for a taxon (REST)
|
|
19
|
+
- `taxonomy_download`: download taxonomy packages (CLI)
|
|
20
|
+
- Desktop Extension manifest (`.mcpb`) with `NCBI_AUTO_INSTALL=true` default
|
|
21
|
+
- `scripts/gen_enums.py` to regenerate enums from the live NCBI OpenAPI spec
|
|
22
|
+
- `scripts/build_mcpb.py` to produce a distributable `.mcpb` bundle
|
|
23
|
+
- GitHub Actions CI: lint, unit test matrix (Python 3.10–3.12), nightly integration tests, PyPI trusted publish on tag
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MIT License
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ncbi-datasets-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for NCBI Datasets — search metadata and download genomic data
|
|
5
|
+
Author-email: John Torcivia <jtorcivia@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: bioinformatics,datasets,genomics,mcp,ncbi
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: httpx>=0.27.0
|
|
18
|
+
Requires-Dist: mcp[cli]>=1.9.0
|
|
19
|
+
Requires-Dist: platformdirs>=4.0.0
|
|
20
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
21
|
+
Requires-Dist: pydantic>=2.0.0
|
|
22
|
+
Requires-Dist: tenacity>=8.0.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: pyyaml>=6.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: respx>=0.21.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# ncbi-datasets-mcp
|
|
33
|
+
NOTE: This is not affiliated with NCBI or NCBI Datasets, this is a user provided tool.
|
|
34
|
+
|
|
35
|
+
An MCP server that gives Claude access to [NCBI Datasets v2](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/) — search genome assembly metadata, retrieve taxonomy records, and download data packages without leaving your conversation.
|
|
36
|
+
|
|
37
|
+
## Tools
|
|
38
|
+
|
|
39
|
+
| Tool | Transport | Description |
|
|
40
|
+
|------|-----------|-------------|
|
|
41
|
+
| `ensure_cli` | — | Install the NCBI CLI tools (run once, or set `NCBI_AUTO_INSTALL=true`) |
|
|
42
|
+
| `genome_summary_by_taxon` | REST | Search genome assemblies by organism name or tax ID |
|
|
43
|
+
| `genome_summary_by_accession` | REST | Fetch assembly metadata for known accessions |
|
|
44
|
+
| `genome_download_by_taxon` | CLI | Download a genome package by taxon |
|
|
45
|
+
| `genome_download_by_accession` | CLI | Download a genome package by accession |
|
|
46
|
+
| `rehydrate_genome_package` | CLI | Fetch sequence files for a dehydrated package |
|
|
47
|
+
| `dataformat_genome_tsv` | CLI | Convert a genome JSONL data report to TSV |
|
|
48
|
+
| `taxonomy_summary` | REST | Get lineage, rank, and names for a taxon |
|
|
49
|
+
| `taxonomy_download` | CLI | Download a taxonomy package |
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
### Option 1 — Desktop Extension (recommended for Claude Desktop users)
|
|
54
|
+
|
|
55
|
+
1. Download `ncbi-datasets.mcpb` from the [Releases](../../releases) page.
|
|
56
|
+
2. Double-click the file and click **Install** in Claude Desktop.
|
|
57
|
+
3. Optionally enter your [NCBI API key](https://www.ncbi.nlm.nih.gov/account/) and download directory.
|
|
58
|
+
|
|
59
|
+
The NCBI CLI tools are downloaded automatically on first use (`NCBI_AUTO_INSTALL=true` is set by default in the extension).
|
|
60
|
+
|
|
61
|
+
### Option 2 — JSON config (Claude Desktop / Claude Code)
|
|
62
|
+
|
|
63
|
+
Add to `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"mcpServers": {
|
|
68
|
+
"ncbi-datasets": {
|
|
69
|
+
"command": "uvx",
|
|
70
|
+
"args": ["ncbi-datasets-mcp"],
|
|
71
|
+
"env": {
|
|
72
|
+
"NCBI_API_KEY": "your_key_here",
|
|
73
|
+
"NCBI_DOWNLOAD_DIR": "/path/to/downloads",
|
|
74
|
+
"NCBI_AUTO_INSTALL": "true"
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Requires [uv](https://docs.astral.sh/uv/getting-started/installation/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`).
|
|
82
|
+
|
|
83
|
+
## Configuration
|
|
84
|
+
|
|
85
|
+
| Variable | Default | Description |
|
|
86
|
+
|----------|---------|-------------|
|
|
87
|
+
| `NCBI_API_KEY` | *(none)* | NCBI API key — raises rate limit to 10 req/s |
|
|
88
|
+
| `NCBI_DOWNLOAD_DIR` | `~/Downloads/ncbi_datasets` | Default download location |
|
|
89
|
+
| `NCBI_AUTO_INSTALL` | `false` | Auto-install CLI tools on startup |
|
|
90
|
+
| `NCBI_MAX_RESULTS` | `20` | Cap for summary tool result counts |
|
|
91
|
+
| `NCBI_REQUEST_TIMEOUT` | `300` | Seconds before a download times out |
|
|
92
|
+
| `NCBI_CLI_PATH` | *(auto)* | Override path to `datasets` binary |
|
|
93
|
+
| `NCBI_DATAFORMAT_PATH` | *(auto)* | Override path to `dataformat` binary |
|
|
94
|
+
|
|
95
|
+
## Development
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Install with dev extras
|
|
99
|
+
pip install -e ".[dev]"
|
|
100
|
+
|
|
101
|
+
# Run unit tests
|
|
102
|
+
pytest
|
|
103
|
+
|
|
104
|
+
# Run all tests including live network calls
|
|
105
|
+
pytest -m integration
|
|
106
|
+
|
|
107
|
+
# Regenerate enums from the current NCBI OpenAPI spec
|
|
108
|
+
python scripts/gen_enums.py
|
|
109
|
+
|
|
110
|
+
# Run the server locally (stdio transport)
|
|
111
|
+
ncbi-datasets-mcp
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Architecture
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
src/ncbi_datasets_mcp/
|
|
118
|
+
server.py FastMCP app — tool registrations only
|
|
119
|
+
config.py Pydantic-settings env config
|
|
120
|
+
cli/
|
|
121
|
+
locator.py Find datasets/dataformat (config → PATH → cache)
|
|
122
|
+
installer.py Download binaries from NCBI FTP
|
|
123
|
+
runner.py Async subprocess wrapper
|
|
124
|
+
rest/
|
|
125
|
+
client.py httpx client for metadata/summary endpoints
|
|
126
|
+
domains/
|
|
127
|
+
_generated_enums.py Vendored enums from OpenAPI spec
|
|
128
|
+
common.py Shared utilities (output dir, filename sanitising)
|
|
129
|
+
genome.py Genome CLI arg builders + response shaping
|
|
130
|
+
taxonomy.py Taxonomy CLI arg builders
|
|
131
|
+
models/
|
|
132
|
+
responses.py Shared DownloadResult dataclass
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Summary tools (no file I/O) → REST API.
|
|
136
|
+
Download and format-conversion tools → NCBI CLI binaries.
|
|
137
|
+
|
|
138
|
+
## Cite
|
|
139
|
+
|
|
140
|
+
If you use NCBI Datasets in your research, please cite:
|
|
141
|
+
> NCBI Datasets. National Center for Biotechnology Information. https://www.ncbi.nlm.nih.gov/datasets/
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# ncbi-datasets-mcp
|
|
2
|
+
NOTE: This is not affiliated with NCBI or NCBI Datasets, this is a user provided tool.
|
|
3
|
+
|
|
4
|
+
An MCP server that gives Claude access to [NCBI Datasets v2](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/) — search genome assembly metadata, retrieve taxonomy records, and download data packages without leaving your conversation.
|
|
5
|
+
|
|
6
|
+
## Tools
|
|
7
|
+
|
|
8
|
+
| Tool | Transport | Description |
|
|
9
|
+
|------|-----------|-------------|
|
|
10
|
+
| `ensure_cli` | — | Install the NCBI CLI tools (run once, or set `NCBI_AUTO_INSTALL=true`) |
|
|
11
|
+
| `genome_summary_by_taxon` | REST | Search genome assemblies by organism name or tax ID |
|
|
12
|
+
| `genome_summary_by_accession` | REST | Fetch assembly metadata for known accessions |
|
|
13
|
+
| `genome_download_by_taxon` | CLI | Download a genome package by taxon |
|
|
14
|
+
| `genome_download_by_accession` | CLI | Download a genome package by accession |
|
|
15
|
+
| `rehydrate_genome_package` | CLI | Fetch sequence files for a dehydrated package |
|
|
16
|
+
| `dataformat_genome_tsv` | CLI | Convert a genome JSONL data report to TSV |
|
|
17
|
+
| `taxonomy_summary` | REST | Get lineage, rank, and names for a taxon |
|
|
18
|
+
| `taxonomy_download` | CLI | Download a taxonomy package |
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
### Option 1 — Desktop Extension (recommended for Claude Desktop users)
|
|
23
|
+
|
|
24
|
+
1. Download `ncbi-datasets.mcpb` from the [Releases](../../releases) page.
|
|
25
|
+
2. Double-click the file and click **Install** in Claude Desktop.
|
|
26
|
+
3. Optionally enter your [NCBI API key](https://www.ncbi.nlm.nih.gov/account/) and download directory.
|
|
27
|
+
|
|
28
|
+
The NCBI CLI tools are downloaded automatically on first use (`NCBI_AUTO_INSTALL=true` is set by default in the extension).
|
|
29
|
+
|
|
30
|
+
### Option 2 — JSON config (Claude Desktop / Claude Code)
|
|
31
|
+
|
|
32
|
+
Add to `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"mcpServers": {
|
|
37
|
+
"ncbi-datasets": {
|
|
38
|
+
"command": "uvx",
|
|
39
|
+
"args": ["ncbi-datasets-mcp"],
|
|
40
|
+
"env": {
|
|
41
|
+
"NCBI_API_KEY": "your_key_here",
|
|
42
|
+
"NCBI_DOWNLOAD_DIR": "/path/to/downloads",
|
|
43
|
+
"NCBI_AUTO_INSTALL": "true"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Requires [uv](https://docs.astral.sh/uv/getting-started/installation/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`).
|
|
51
|
+
|
|
52
|
+
## Configuration
|
|
53
|
+
|
|
54
|
+
| Variable | Default | Description |
|
|
55
|
+
|----------|---------|-------------|
|
|
56
|
+
| `NCBI_API_KEY` | *(none)* | NCBI API key — raises rate limit to 10 req/s |
|
|
57
|
+
| `NCBI_DOWNLOAD_DIR` | `~/Downloads/ncbi_datasets` | Default download location |
|
|
58
|
+
| `NCBI_AUTO_INSTALL` | `false` | Auto-install CLI tools on startup |
|
|
59
|
+
| `NCBI_MAX_RESULTS` | `20` | Cap for summary tool result counts |
|
|
60
|
+
| `NCBI_REQUEST_TIMEOUT` | `300` | Seconds before a download times out |
|
|
61
|
+
| `NCBI_CLI_PATH` | *(auto)* | Override path to `datasets` binary |
|
|
62
|
+
| `NCBI_DATAFORMAT_PATH` | *(auto)* | Override path to `dataformat` binary |
|
|
63
|
+
|
|
64
|
+
## Development
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Install with dev extras
|
|
68
|
+
pip install -e ".[dev]"
|
|
69
|
+
|
|
70
|
+
# Run unit tests
|
|
71
|
+
pytest
|
|
72
|
+
|
|
73
|
+
# Run all tests including live network calls
|
|
74
|
+
pytest -m integration
|
|
75
|
+
|
|
76
|
+
# Regenerate enums from the current NCBI OpenAPI spec
|
|
77
|
+
python scripts/gen_enums.py
|
|
78
|
+
|
|
79
|
+
# Run the server locally (stdio transport)
|
|
80
|
+
ncbi-datasets-mcp
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Architecture
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
src/ncbi_datasets_mcp/
|
|
87
|
+
server.py FastMCP app — tool registrations only
|
|
88
|
+
config.py Pydantic-settings env config
|
|
89
|
+
cli/
|
|
90
|
+
locator.py Find datasets/dataformat (config → PATH → cache)
|
|
91
|
+
installer.py Download binaries from NCBI FTP
|
|
92
|
+
runner.py Async subprocess wrapper
|
|
93
|
+
rest/
|
|
94
|
+
client.py httpx client for metadata/summary endpoints
|
|
95
|
+
domains/
|
|
96
|
+
_generated_enums.py Vendored enums from OpenAPI spec
|
|
97
|
+
common.py Shared utilities (output dir, filename sanitising)
|
|
98
|
+
genome.py Genome CLI arg builders + response shaping
|
|
99
|
+
taxonomy.py Taxonomy CLI arg builders
|
|
100
|
+
models/
|
|
101
|
+
responses.py Shared DownloadResult dataclass
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Summary tools (no file I/O) → REST API.
|
|
105
|
+
Download and format-conversion tools → NCBI CLI binaries.
|
|
106
|
+
|
|
107
|
+
## Cite
|
|
108
|
+
|
|
109
|
+
If you use NCBI Datasets in your research, please cite:
|
|
110
|
+
> NCBI Datasets. National Center for Biotechnology Information. https://www.ncbi.nlm.nih.gov/datasets/
|
|
111
|
+
|
|
112
|
+
## License
|
|
113
|
+
|
|
114
|
+
MIT
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Build ncbi-datasets.mcpb — a Claude Desktop Extension bundle.
|
|
3
|
+
|
|
4
|
+
The .mcpb is a ZIP archive containing manifest.json plus any bundled assets.
|
|
5
|
+
For this Python server, the manifest points to `uvx ncbi-datasets-mcp`, so
|
|
6
|
+
the archive only needs the manifest (and optional icon). The Python package
|
|
7
|
+
itself is fetched from PyPI by uvx at install time.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python scripts/build_mcpb.py [--version VERSION] [--out DIR]
|
|
11
|
+
|
|
12
|
+
Requires: an icon at assets/icon.png (optional but recommended for the registry)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import shutil
|
|
18
|
+
import sys
|
|
19
|
+
import zipfile
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
ROOT = Path(__file__).parent.parent
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_manifest() -> dict:
|
|
26
|
+
manifest_path = ROOT / "manifest.json"
|
|
27
|
+
with manifest_path.open() as fh:
|
|
28
|
+
return json.load(fh)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def sync_version(manifest: dict, version: str) -> dict:
|
|
32
|
+
"""Keep manifest version in sync with pyproject.toml version."""
|
|
33
|
+
manifest = manifest.copy()
|
|
34
|
+
manifest["version"] = version
|
|
35
|
+
return manifest
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build(version: str | None, out_dir: Path) -> Path:
|
|
39
|
+
manifest = load_manifest()
|
|
40
|
+
|
|
41
|
+
if version:
|
|
42
|
+
manifest = sync_version(manifest, version)
|
|
43
|
+
else:
|
|
44
|
+
version = manifest.get("version", "0.0.0")
|
|
45
|
+
|
|
46
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
bundle_path = out_dir / f"ncbi-datasets-{version}.mcpb"
|
|
48
|
+
|
|
49
|
+
with zipfile.ZipFile(bundle_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
50
|
+
# manifest.json is the only required file
|
|
51
|
+
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
|
|
52
|
+
|
|
53
|
+
# Optional icon
|
|
54
|
+
icon_path = ROOT / "assets" / "icon.png"
|
|
55
|
+
if icon_path.exists():
|
|
56
|
+
zf.write(icon_path, "icon.png")
|
|
57
|
+
print(f" + icon.png")
|
|
58
|
+
else:
|
|
59
|
+
print(
|
|
60
|
+
" (no assets/icon.png found — bundle will work but won't show an icon)",
|
|
61
|
+
file=sys.stderr,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
size_kb = bundle_path.stat().st_size // 1024
|
|
65
|
+
print(f"Built {bundle_path} ({size_kb} KB)")
|
|
66
|
+
return bundle_path
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def main() -> None:
|
|
70
|
+
parser = argparse.ArgumentParser(description="Build ncbi-datasets.mcpb")
|
|
71
|
+
parser.add_argument("--version", help="Override version (default: from manifest.json)")
|
|
72
|
+
parser.add_argument("--out", default="dist", help="Output directory (default: dist/)")
|
|
73
|
+
args = parser.parse_args()
|
|
74
|
+
|
|
75
|
+
bundle_path = build(version=args.version, out_dir=Path(args.out))
|
|
76
|
+
print(f"\nTo install: double-click {bundle_path.name} in Finder/Explorer")
|
|
77
|
+
print(f"Or: open Claude Desktop → Settings → Extensions → Install from file")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
main()
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags: ["v*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main]
|
|
9
|
+
schedule:
|
|
10
|
+
# Nightly at 06:00 UTC — runs integration tests against live NCBI API
|
|
11
|
+
- cron: "0 6 * * *"
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
inputs:
|
|
14
|
+
run_integration:
|
|
15
|
+
description: "Run integration tests against live NCBI API"
|
|
16
|
+
type: boolean
|
|
17
|
+
default: false
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
lint:
|
|
21
|
+
name: Lint
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.12"
|
|
28
|
+
- run: pip install ruff
|
|
29
|
+
- run: ruff check src/ tests/
|
|
30
|
+
|
|
31
|
+
unit-tests:
|
|
32
|
+
name: Unit tests (Python ${{ matrix.python-version }})
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
needs: lint
|
|
35
|
+
strategy:
|
|
36
|
+
fail-fast: false
|
|
37
|
+
matrix:
|
|
38
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: ${{ matrix.python-version }}
|
|
44
|
+
- name: Install
|
|
45
|
+
run: pip install -e ".[dev]"
|
|
46
|
+
- name: Run unit tests
|
|
47
|
+
run: pytest tests/unit/ -v --tb=short
|
|
48
|
+
|
|
49
|
+
integration-tests:
|
|
50
|
+
name: Integration tests (live NCBI API)
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
# Only run on schedule or manual trigger — not on every push/PR
|
|
53
|
+
if: |
|
|
54
|
+
github.event_name == 'schedule' ||
|
|
55
|
+
github.event.inputs.run_integration == 'true'
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
- uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.12"
|
|
61
|
+
- name: Install
|
|
62
|
+
run: pip install -e ".[dev]"
|
|
63
|
+
- name: Run integration tests
|
|
64
|
+
run: pytest -m integration -v --tb=short
|
|
65
|
+
env:
|
|
66
|
+
# Optional: add NCBI_API_KEY as a repo secret for higher rate limits
|
|
67
|
+
NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
|
|
68
|
+
|
|
69
|
+
publish:
|
|
70
|
+
name: Publish to PyPI
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
needs: unit-tests
|
|
73
|
+
# Only publish on version tags pushed to main
|
|
74
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
75
|
+
environment:
|
|
76
|
+
name: pypi
|
|
77
|
+
url: https://pypi.org/project/ncbi-datasets-mcp/
|
|
78
|
+
permissions:
|
|
79
|
+
id-token: write # Required for trusted publishing (no token needed)
|
|
80
|
+
steps:
|
|
81
|
+
- uses: actions/checkout@v4
|
|
82
|
+
- uses: actions/setup-python@v5
|
|
83
|
+
with:
|
|
84
|
+
python-version: "3.12"
|
|
85
|
+
- name: Build
|
|
86
|
+
run: |
|
|
87
|
+
pip install build
|
|
88
|
+
python -m build
|
|
89
|
+
- name: Publish
|
|
90
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Download the NCBI datasets/dataformat CLI binaries for the current platform.
|
|
2
|
+
|
|
3
|
+
Binaries are single static files published by NCBI at predictable FTP URLs:
|
|
4
|
+
https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/{platform}/{binary}
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import platform
|
|
9
|
+
import stat
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from ncbi_datasets_mcp.cli.locator import CLIBinaries, _exe, get_install_dir
|
|
16
|
+
|
|
17
|
+
FTP_BASE = "https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2"
|
|
18
|
+
|
|
19
|
+
# Maps (system, machine) → NCBI FTP platform path
|
|
20
|
+
_PLATFORM_MAP: dict[tuple[str, str], str] = {
|
|
21
|
+
("Linux", "x86_64"): "linux-amd64",
|
|
22
|
+
("Linux", "aarch64"): "linux-arm64",
|
|
23
|
+
("Linux", "armv7l"): "linux-arm",
|
|
24
|
+
("Darwin", "x86_64"): "mac",
|
|
25
|
+
("Darwin", "arm64"): "mac", # Universal binary covers Apple Silicon
|
|
26
|
+
("Windows", "AMD64"): "win64",
|
|
27
|
+
("Windows", "x86_64"): "win64",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class InstallResult:
|
|
33
|
+
success: bool
|
|
34
|
+
datasets_path: Path | None = None
|
|
35
|
+
dataformat_path: Path | None = None
|
|
36
|
+
version: str | None = None
|
|
37
|
+
message: str = ""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def detect_platform() -> str:
|
|
41
|
+
"""Return the NCBI FTP sub-path for the current OS/arch, or raise."""
|
|
42
|
+
system = platform.system()
|
|
43
|
+
machine = platform.machine()
|
|
44
|
+
key = (system, machine)
|
|
45
|
+
if key in _PLATFORM_MAP:
|
|
46
|
+
return _PLATFORM_MAP[key]
|
|
47
|
+
raise RuntimeError(
|
|
48
|
+
f"Unsupported platform: {system}/{machine}. "
|
|
49
|
+
"Install manually: "
|
|
50
|
+
"https://www.ncbi.nlm.nih.gov/datasets/docs/v2/command-line-tools/download-and-install/"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def _download_binary(url: str, dest: Path, client: httpx.AsyncClient) -> None:
|
|
55
|
+
"""Stream a binary from *url* to *dest*, then mark it executable."""
|
|
56
|
+
async with client.stream("GET", url) as response:
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
with dest.open("wb") as fh:
|
|
60
|
+
async for chunk in response.aiter_bytes(chunk_size=65_536):
|
|
61
|
+
fh.write(chunk)
|
|
62
|
+
|
|
63
|
+
if platform.system() != "Windows":
|
|
64
|
+
dest.chmod(dest.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def install_cli() -> InstallResult:
|
|
68
|
+
"""Download both binaries into the managed cache dir and verify them."""
|
|
69
|
+
try:
|
|
70
|
+
platform_path = detect_platform()
|
|
71
|
+
except RuntimeError as exc:
|
|
72
|
+
return InstallResult(success=False, message=str(exc))
|
|
73
|
+
|
|
74
|
+
install_dir = get_install_dir()
|
|
75
|
+
datasets_dest = install_dir / _exe("datasets")
|
|
76
|
+
dataformat_dest = install_dir / _exe("dataformat")
|
|
77
|
+
|
|
78
|
+
datasets_url = f"{FTP_BASE}/{platform_path}/{_exe('datasets')}"
|
|
79
|
+
dataformat_url = f"{FTP_BASE}/{platform_path}/{_exe('dataformat')}"
|
|
80
|
+
|
|
81
|
+
async with httpx.AsyncClient(follow_redirects=True, timeout=120.0) as client:
|
|
82
|
+
try:
|
|
83
|
+
await _download_binary(datasets_url, datasets_dest, client)
|
|
84
|
+
await _download_binary(dataformat_url, dataformat_dest, client)
|
|
85
|
+
except httpx.HTTPError as exc:
|
|
86
|
+
return InstallResult(success=False, message=f"Download failed: {exc}")
|
|
87
|
+
|
|
88
|
+
# Smoke-test: run --version on the freshly installed binary
|
|
89
|
+
try:
|
|
90
|
+
proc = await asyncio.create_subprocess_exec(
|
|
91
|
+
str(datasets_dest),
|
|
92
|
+
"--version",
|
|
93
|
+
stdout=asyncio.subprocess.PIPE,
|
|
94
|
+
stderr=asyncio.subprocess.PIPE,
|
|
95
|
+
)
|
|
96
|
+
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=15.0)
|
|
97
|
+
version = stdout.decode().strip()
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
return InstallResult(
|
|
100
|
+
success=False,
|
|
101
|
+
message=f"Binary installed but verification failed: {exc}",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return InstallResult(
|
|
105
|
+
success=True,
|
|
106
|
+
datasets_path=datasets_dest,
|
|
107
|
+
dataformat_path=dataformat_dest,
|
|
108
|
+
version=version,
|
|
109
|
+
message=f"Installed {version} → {install_dir}",
|
|
110
|
+
)
|