ncbi-datasets-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. ncbi_datasets_mcp-0.1.0/.github/workflows/ci.yml +90 -0
  2. ncbi_datasets_mcp-0.1.0/CHANGELOG.md +23 -0
  3. ncbi_datasets_mcp-0.1.0/LICENSE +1 -0
  4. ncbi_datasets_mcp-0.1.0/PKG-INFO +145 -0
  5. ncbi_datasets_mcp-0.1.0/README.md +114 -0
  6. ncbi_datasets_mcp-0.1.0/build_mcpb.py +81 -0
  7. ncbi_datasets_mcp-0.1.0/ci.yml +90 -0
  8. ncbi_datasets_mcp-0.1.0/installer.py +110 -0
  9. ncbi_datasets_mcp-0.1.0/manifest.json +53 -0
  10. ncbi_datasets_mcp-0.1.0/pyproject.toml +60 -0
  11. ncbi_datasets_mcp-0.1.0/scripts/build_mcpb.py +81 -0
  12. ncbi_datasets_mcp-0.1.0/scripts/gen_enums.py +111 -0
  13. ncbi_datasets_mcp-0.1.0/server.py +316 -0
  14. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/__init__.py +3 -0
  15. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/__init__.py +1 -0
  16. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/installer.py +110 -0
  17. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/locator.py +64 -0
  18. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/cli/runner.py +125 -0
  19. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/config.py +40 -0
  20. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/__init__.py +1 -0
  21. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/_generated_enums.py +33 -0
  22. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/common.py +36 -0
  23. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/genome.py +127 -0
  24. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/domains/taxonomy.py +24 -0
  25. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/models/__init__.py +1 -0
  26. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/models/responses.py +28 -0
  27. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/rest/__init__.py +1 -0
  28. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/rest/client.py +90 -0
  29. ncbi_datasets_mcp-0.1.0/src/ncbi_datasets_mcp/server.py +315 -0
  30. ncbi_datasets_mcp-0.1.0/tests/__init__.py +1 -0
  31. ncbi_datasets_mcp-0.1.0/tests/conftest.py +32 -0
  32. ncbi_datasets_mcp-0.1.0/tests/integration/__init__.py +1 -0
  33. ncbi_datasets_mcp-0.1.0/tests/integration/test_integration.py +47 -0
  34. ncbi_datasets_mcp-0.1.0/tests/unit/__init__.py +1 -0
  35. ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_installer.py +51 -0
  36. ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_locator.py +86 -0
  37. ncbi_datasets_mcp-0.1.0/tests/unit/test_cli_runner.py +98 -0
  38. ncbi_datasets_mcp-0.1.0/tests/unit/test_domains_genome.py +152 -0
  39. ncbi_datasets_mcp-0.1.0/tests/unit/test_domains_taxonomy.py +56 -0
  40. ncbi_datasets_mcp-0.1.0/tests/unit/test_rest_client.py +116 -0
@@ -0,0 +1,90 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*"]
7
+ pull_request:
8
+ branches: [main]
9
+ schedule:
10
+ # Nightly at 06:00 UTC — runs integration tests against live NCBI API
11
+ - cron: "0 6 * * *"
12
+ workflow_dispatch:
13
+ inputs:
14
+ run_integration:
15
+ description: "Run integration tests against live NCBI API"
16
+ type: boolean
17
+ default: false
18
+
19
+ jobs:
20
+ lint:
21
+ name: Lint
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.12"
28
+ - run: pip install ruff
29
+ - run: ruff check src/ tests/
30
+
31
+ unit-tests:
32
+ name: Unit tests (Python ${{ matrix.python-version }})
33
+ runs-on: ubuntu-latest
34
+ needs: lint
35
+ strategy:
36
+ fail-fast: false
37
+ matrix:
38
+ python-version: ["3.10", "3.11", "3.12"]
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: ${{ matrix.python-version }}
44
+ - name: Install
45
+ run: pip install -e ".[dev]"
46
+ - name: Run unit tests
47
+ run: pytest tests/unit/ -v --tb=short
48
+
49
+ integration-tests:
50
+ name: Integration tests (live NCBI API)
51
+ runs-on: ubuntu-latest
52
+ # Only run on schedule or manual trigger — not on every push/PR
53
+ if: |
54
+ github.event_name == 'schedule' ||
55
+ github.event.inputs.run_integration == 'true'
56
+ steps:
57
+ - uses: actions/checkout@v4
58
+ - uses: actions/setup-python@v5
59
+ with:
60
+ python-version: "3.12"
61
+ - name: Install
62
+ run: pip install -e ".[dev]"
63
+ - name: Run integration tests
64
+ run: pytest -m integration -v --tb=short
65
+ env:
66
+ # Optional: add NCBI_API_KEY as a repo secret for higher rate limits
67
+ NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
68
+
69
+ publish:
70
+ name: Publish to PyPI
71
+ runs-on: ubuntu-latest
72
+ needs: unit-tests
73
+ # Only publish on version tags pushed to main
74
+ if: startsWith(github.ref, 'refs/tags/v')
75
+ environment:
76
+ name: pypi
77
+ url: https://pypi.org/project/ncbi-datasets-mcp/
78
+ permissions:
79
+ id-token: write # Required for trusted publishing (no token needed)
80
+ steps:
81
+ - uses: actions/checkout@v4
82
+ - uses: actions/setup-python@v5
83
+ with:
84
+ python-version: "3.12"
85
+ - name: Build
86
+ run: |
87
+ pip install build
88
+ python -m build
89
+ - name: Publish
90
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,23 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented here.
4
+ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
5
+
6
+ ## [Unreleased]
7
+
8
+ ## [0.1.0] — Initial release
9
+
10
+ ### Added
11
+ - `ensure_cli` tool: detect and auto-install `datasets`/`dataformat` binaries from NCBI FTP
12
+ - `genome_summary_by_taxon`: search genome assemblies by organism name or tax ID (REST)
13
+ - `genome_summary_by_accession`: fetch metadata for known accessions (REST)
14
+ - `genome_download_by_taxon`: download genome packages by taxon (CLI)
15
+ - `genome_download_by_accession`: download genome packages by accession (CLI)
16
+ - `rehydrate_genome_package`: fetch sequence files for dehydrated packages (CLI)
17
+ - `dataformat_genome_tsv`: convert genome JSONL data reports to TSV (CLI)
18
+ - `taxonomy_summary`: get lineage, rank, and names for a taxon (REST)
19
+ - `taxonomy_download`: download taxonomy packages (CLI)
20
+ - Desktop Extension manifest (`.mcpb`) with `NCBI_AUTO_INSTALL=true` default
21
+ - `scripts/gen_enums.py` to regenerate enums from the live NCBI OpenAPI spec
22
+ - `scripts/build_mcpb.py` to produce a distributable `.mcpb` bundle
23
+ - GitHub Actions CI: lint, unit test matrix (Python 3.10–3.12), nightly integration tests, PyPI trusted publish on tag
@@ -0,0 +1 @@
1
+ MIT License
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: ncbi-datasets-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server for NCBI Datasets — search metadata and download genomic data
5
+ Author-email: John Torcivia <jtorcivia@gmail.com>
6
+ License: MIT License
7
+ License-File: LICENSE
8
+ Keywords: bioinformatics,datasets,genomics,mcp,ncbi
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: httpx>=0.27.0
18
+ Requires-Dist: mcp[cli]>=1.9.0
19
+ Requires-Dist: platformdirs>=4.0.0
20
+ Requires-Dist: pydantic-settings>=2.0.0
21
+ Requires-Dist: pydantic>=2.0.0
22
+ Requires-Dist: tenacity>=8.0.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
25
+ Requires-Dist: pytest-mock>=3.12.0; extra == 'dev'
26
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
27
+ Requires-Dist: pyyaml>=6.0; extra == 'dev'
28
+ Requires-Dist: respx>=0.21.0; extra == 'dev'
29
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # ncbi-datasets-mcp
33
+ NOTE: This is not affiliated with NCBI or NCBI Datasets, this is a user provided tool.
34
+
35
+ An MCP server that gives Claude access to [NCBI Datasets v2](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/) — search genome assembly metadata, retrieve taxonomy records, and download data packages without leaving your conversation.
36
+
37
+ ## Tools
38
+
39
+ | Tool | Transport | Description |
40
+ |------|-----------|-------------|
41
+ | `ensure_cli` | — | Install the NCBI CLI tools (run once, or set `NCBI_AUTO_INSTALL=true`) |
42
+ | `genome_summary_by_taxon` | REST | Search genome assemblies by organism name or tax ID |
43
+ | `genome_summary_by_accession` | REST | Fetch assembly metadata for known accessions |
44
+ | `genome_download_by_taxon` | CLI | Download a genome package by taxon |
45
+ | `genome_download_by_accession` | CLI | Download a genome package by accession |
46
+ | `rehydrate_genome_package` | CLI | Fetch sequence files for a dehydrated package |
47
+ | `dataformat_genome_tsv` | CLI | Convert a genome JSONL data report to TSV |
48
+ | `taxonomy_summary` | REST | Get lineage, rank, and names for a taxon |
49
+ | `taxonomy_download` | CLI | Download a taxonomy package |
50
+
51
+ ## Installation
52
+
53
+ ### Option 1 — Desktop Extension (recommended for Claude Desktop users)
54
+
55
+ 1. Download `ncbi-datasets.mcpb` from the [Releases](../../releases) page.
56
+ 2. Double-click the file and click **Install** in Claude Desktop.
57
+ 3. Optionally enter your [NCBI API key](https://www.ncbi.nlm.nih.gov/account/) and download directory.
58
+
59
+ The NCBI CLI tools are downloaded automatically on first use (`NCBI_AUTO_INSTALL=true` is set by default in the extension).
60
+
61
+ ### Option 2 — JSON config (Claude Desktop / Claude Code)
62
+
63
+ Add to `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
64
+
65
+ ```json
66
+ {
67
+ "mcpServers": {
68
+ "ncbi-datasets": {
69
+ "command": "uvx",
70
+ "args": ["ncbi-datasets-mcp"],
71
+ "env": {
72
+ "NCBI_API_KEY": "your_key_here",
73
+ "NCBI_DOWNLOAD_DIR": "/path/to/downloads",
74
+ "NCBI_AUTO_INSTALL": "true"
75
+ }
76
+ }
77
+ }
78
+ }
79
+ ```
80
+
81
+ Requires [uv](https://docs.astral.sh/uv/getting-started/installation/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`).
82
+
83
+ ## Configuration
84
+
85
+ | Variable | Default | Description |
86
+ |----------|---------|-------------|
87
+ | `NCBI_API_KEY` | *(none)* | NCBI API key — raises rate limit to 10 req/s |
88
+ | `NCBI_DOWNLOAD_DIR` | `~/Downloads/ncbi_datasets` | Default download location |
89
+ | `NCBI_AUTO_INSTALL` | `false` | Auto-install CLI tools on startup |
90
+ | `NCBI_MAX_RESULTS` | `20` | Cap for summary tool result counts |
91
+ | `NCBI_REQUEST_TIMEOUT` | `300` | Seconds before a download times out |
92
+ | `NCBI_CLI_PATH` | *(auto)* | Override path to `datasets` binary |
93
+ | `NCBI_DATAFORMAT_PATH` | *(auto)* | Override path to `dataformat` binary |
94
+
95
+ ## Development
96
+
97
+ ```bash
98
+ # Install with dev extras
99
+ pip install -e ".[dev]"
100
+
101
+ # Run unit tests
102
+ pytest
103
+
104
+ # Run all tests including live network calls
105
+ pytest -m integration
106
+
107
+ # Regenerate enums from the current NCBI OpenAPI spec
108
+ python scripts/gen_enums.py
109
+
110
+ # Run the server locally (stdio transport)
111
+ ncbi-datasets-mcp
112
+ ```
113
+
114
+ ## Architecture
115
+
116
+ ```
117
+ src/ncbi_datasets_mcp/
118
+ server.py FastMCP app — tool registrations only
119
+ config.py Pydantic-settings env config
120
+ cli/
121
+ locator.py Find datasets/dataformat (config → PATH → cache)
122
+ installer.py Download binaries from NCBI FTP
123
+ runner.py Async subprocess wrapper
124
+ rest/
125
+ client.py httpx client for metadata/summary endpoints
126
+ domains/
127
+ _generated_enums.py Vendored enums from OpenAPI spec
128
+ common.py Shared utilities (output dir, filename sanitising)
129
+ genome.py Genome CLI arg builders + response shaping
130
+ taxonomy.py Taxonomy CLI arg builders
131
+ models/
132
+ responses.py Shared DownloadResult dataclass
133
+ ```
134
+
135
+ Summary tools (no file I/O) → REST API.
136
+ Download and format-conversion tools → NCBI CLI binaries.
137
+
138
+ ## Cite
139
+
140
+ If you use NCBI Datasets in your research, please cite:
141
+ > NCBI Datasets. National Center for Biotechnology Information. https://www.ncbi.nlm.nih.gov/datasets/
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,114 @@
1
+ # ncbi-datasets-mcp
2
+ NOTE: This is not affiliated with NCBI or NCBI Datasets, this is a user provided tool.
3
+
4
+ An MCP server that gives Claude access to [NCBI Datasets v2](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/) — search genome assembly metadata, retrieve taxonomy records, and download data packages without leaving your conversation.
5
+
6
+ ## Tools
7
+
8
+ | Tool | Transport | Description |
9
+ |------|-----------|-------------|
10
+ | `ensure_cli` | — | Install the NCBI CLI tools (run once, or set `NCBI_AUTO_INSTALL=true`) |
11
+ | `genome_summary_by_taxon` | REST | Search genome assemblies by organism name or tax ID |
12
+ | `genome_summary_by_accession` | REST | Fetch assembly metadata for known accessions |
13
+ | `genome_download_by_taxon` | CLI | Download a genome package by taxon |
14
+ | `genome_download_by_accession` | CLI | Download a genome package by accession |
15
+ | `rehydrate_genome_package` | CLI | Fetch sequence files for a dehydrated package |
16
+ | `dataformat_genome_tsv` | CLI | Convert a genome JSONL data report to TSV |
17
+ | `taxonomy_summary` | REST | Get lineage, rank, and names for a taxon |
18
+ | `taxonomy_download` | CLI | Download a taxonomy package |
19
+
20
+ ## Installation
21
+
22
+ ### Option 1 — Desktop Extension (recommended for Claude Desktop users)
23
+
24
+ 1. Download `ncbi-datasets.mcpb` from the [Releases](../../releases) page.
25
+ 2. Double-click the file and click **Install** in Claude Desktop.
26
+ 3. Optionally enter your [NCBI API key](https://www.ncbi.nlm.nih.gov/account/) and download directory.
27
+
28
+ The NCBI CLI tools are downloaded automatically on first use (`NCBI_AUTO_INSTALL=true` is set by default in the extension).
29
+
30
+ ### Option 2 — JSON config (Claude Desktop / Claude Code)
31
+
32
+ Add to `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
33
+
34
+ ```json
35
+ {
36
+ "mcpServers": {
37
+ "ncbi-datasets": {
38
+ "command": "uvx",
39
+ "args": ["ncbi-datasets-mcp"],
40
+ "env": {
41
+ "NCBI_API_KEY": "your_key_here",
42
+ "NCBI_DOWNLOAD_DIR": "/path/to/downloads",
43
+ "NCBI_AUTO_INSTALL": "true"
44
+ }
45
+ }
46
+ }
47
+ }
48
+ ```
49
+
50
+ Requires [uv](https://docs.astral.sh/uv/getting-started/installation/) (`curl -LsSf https://astral.sh/uv/install.sh | sh`).
51
+
52
+ ## Configuration
53
+
54
+ | Variable | Default | Description |
55
+ |----------|---------|-------------|
56
+ | `NCBI_API_KEY` | *(none)* | NCBI API key — raises rate limit to 10 req/s |
57
+ | `NCBI_DOWNLOAD_DIR` | `~/Downloads/ncbi_datasets` | Default download location |
58
+ | `NCBI_AUTO_INSTALL` | `false` | Auto-install CLI tools on startup |
59
+ | `NCBI_MAX_RESULTS` | `20` | Cap for summary tool result counts |
60
+ | `NCBI_REQUEST_TIMEOUT` | `300` | Seconds before a download times out |
61
+ | `NCBI_CLI_PATH` | *(auto)* | Override path to `datasets` binary |
62
+ | `NCBI_DATAFORMAT_PATH` | *(auto)* | Override path to `dataformat` binary |
63
+
64
+ ## Development
65
+
66
+ ```bash
67
+ # Install with dev extras
68
+ pip install -e ".[dev]"
69
+
70
+ # Run unit tests
71
+ pytest
72
+
73
+ # Run all tests including live network calls
74
+ pytest -m integration
75
+
76
+ # Regenerate enums from the current NCBI OpenAPI spec
77
+ python scripts/gen_enums.py
78
+
79
+ # Run the server locally (stdio transport)
80
+ ncbi-datasets-mcp
81
+ ```
82
+
83
+ ## Architecture
84
+
85
+ ```
86
+ src/ncbi_datasets_mcp/
87
+ server.py FastMCP app — tool registrations only
88
+ config.py Pydantic-settings env config
89
+ cli/
90
+ locator.py Find datasets/dataformat (config → PATH → cache)
91
+ installer.py Download binaries from NCBI FTP
92
+ runner.py Async subprocess wrapper
93
+ rest/
94
+ client.py httpx client for metadata/summary endpoints
95
+ domains/
96
+ _generated_enums.py Vendored enums from OpenAPI spec
97
+ common.py Shared utilities (output dir, filename sanitising)
98
+ genome.py Genome CLI arg builders + response shaping
99
+ taxonomy.py Taxonomy CLI arg builders
100
+ models/
101
+ responses.py Shared DownloadResult dataclass
102
+ ```
103
+
104
+ Summary tools (no file I/O) → REST API.
105
+ Download and format-conversion tools → NCBI CLI binaries.
106
+
107
+ ## Cite
108
+
109
+ If you use NCBI Datasets in your research, please cite:
110
+ > NCBI Datasets. National Center for Biotechnology Information. https://www.ncbi.nlm.nih.gov/datasets/
111
+
112
+ ## License
113
+
114
+ MIT
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env python3
2
+ """Build ncbi-datasets.mcpb — a Claude Desktop Extension bundle.
3
+
4
+ The .mcpb is a ZIP archive containing manifest.json plus any bundled assets.
5
+ For this Python server, the manifest points to `uvx ncbi-datasets-mcp`, so
6
+ the archive only needs the manifest (and optional icon). The Python package
7
+ itself is fetched from PyPI by uvx at install time.
8
+
9
+ Usage:
10
+ python scripts/build_mcpb.py [--version VERSION] [--out DIR]
11
+
12
+ Requires: an icon at assets/icon.png (optional but recommended for the registry)
13
+ """
14
+
15
+ import argparse
16
+ import json
17
+ import shutil
18
+ import sys
19
+ import zipfile
20
+ from pathlib import Path
21
+
22
+ ROOT = Path(__file__).parent.parent
23
+
24
+
25
+ def load_manifest() -> dict:
26
+ manifest_path = ROOT / "manifest.json"
27
+ with manifest_path.open() as fh:
28
+ return json.load(fh)
29
+
30
+
31
+ def sync_version(manifest: dict, version: str) -> dict:
32
+ """Keep manifest version in sync with pyproject.toml version."""
33
+ manifest = manifest.copy()
34
+ manifest["version"] = version
35
+ return manifest
36
+
37
+
38
+ def build(version: str | None, out_dir: Path) -> Path:
39
+ manifest = load_manifest()
40
+
41
+ if version:
42
+ manifest = sync_version(manifest, version)
43
+ else:
44
+ version = manifest.get("version", "0.0.0")
45
+
46
+ out_dir.mkdir(parents=True, exist_ok=True)
47
+ bundle_path = out_dir / f"ncbi-datasets-{version}.mcpb"
48
+
49
+ with zipfile.ZipFile(bundle_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
50
+ # manifest.json is the only required file
51
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
52
+
53
+ # Optional icon
54
+ icon_path = ROOT / "assets" / "icon.png"
55
+ if icon_path.exists():
56
+ zf.write(icon_path, "icon.png")
57
+ print(f" + icon.png")
58
+ else:
59
+ print(
60
+ " (no assets/icon.png found — bundle will work but won't show an icon)",
61
+ file=sys.stderr,
62
+ )
63
+
64
+ size_kb = bundle_path.stat().st_size // 1024
65
+ print(f"Built {bundle_path} ({size_kb} KB)")
66
+ return bundle_path
67
+
68
+
69
+ def main() -> None:
70
+ parser = argparse.ArgumentParser(description="Build ncbi-datasets.mcpb")
71
+ parser.add_argument("--version", help="Override version (default: from manifest.json)")
72
+ parser.add_argument("--out", default="dist", help="Output directory (default: dist/)")
73
+ args = parser.parse_args()
74
+
75
+ bundle_path = build(version=args.version, out_dir=Path(args.out))
76
+ print(f"\nTo install: double-click {bundle_path.name} in Finder/Explorer")
77
+ print(f"Or: open Claude Desktop → Settings → Extensions → Install from file")
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
@@ -0,0 +1,90 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*"]
7
+ pull_request:
8
+ branches: [main]
9
+ schedule:
10
+ # Nightly at 06:00 UTC — runs integration tests against live NCBI API
11
+ - cron: "0 6 * * *"
12
+ workflow_dispatch:
13
+ inputs:
14
+ run_integration:
15
+ description: "Run integration tests against live NCBI API"
16
+ type: boolean
17
+ default: false
18
+
19
+ jobs:
20
+ lint:
21
+ name: Lint
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.12"
28
+ - run: pip install ruff
29
+ - run: ruff check src/ tests/
30
+
31
+ unit-tests:
32
+ name: Unit tests (Python ${{ matrix.python-version }})
33
+ runs-on: ubuntu-latest
34
+ needs: lint
35
+ strategy:
36
+ fail-fast: false
37
+ matrix:
38
+ python-version: ["3.10", "3.11", "3.12"]
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: ${{ matrix.python-version }}
44
+ - name: Install
45
+ run: pip install -e ".[dev]"
46
+ - name: Run unit tests
47
+ run: pytest tests/unit/ -v --tb=short
48
+
49
+ integration-tests:
50
+ name: Integration tests (live NCBI API)
51
+ runs-on: ubuntu-latest
52
+ # Only run on schedule or manual trigger — not on every push/PR
53
+ if: |
54
+ github.event_name == 'schedule' ||
55
+ github.event.inputs.run_integration == 'true'
56
+ steps:
57
+ - uses: actions/checkout@v4
58
+ - uses: actions/setup-python@v5
59
+ with:
60
+ python-version: "3.12"
61
+ - name: Install
62
+ run: pip install -e ".[dev]"
63
+ - name: Run integration tests
64
+ run: pytest -m integration -v --tb=short
65
+ env:
66
+ # Optional: add NCBI_API_KEY as a repo secret for higher rate limits
67
+ NCBI_API_KEY: ${{ secrets.NCBI_API_KEY }}
68
+
69
+ publish:
70
+ name: Publish to PyPI
71
+ runs-on: ubuntu-latest
72
+ needs: unit-tests
73
+ # Only publish on version tags pushed to main
74
+ if: startsWith(github.ref, 'refs/tags/v')
75
+ environment:
76
+ name: pypi
77
+ url: https://pypi.org/project/ncbi-datasets-mcp/
78
+ permissions:
79
+ id-token: write # Required for trusted publishing (no token needed)
80
+ steps:
81
+ - uses: actions/checkout@v4
82
+ - uses: actions/setup-python@v5
83
+ with:
84
+ python-version: "3.12"
85
+ - name: Build
86
+ run: |
87
+ pip install build
88
+ python -m build
89
+ - name: Publish
90
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,110 @@
1
+ """Download the NCBI datasets/dataformat CLI binaries for the current platform.
2
+
3
+ Binaries are single static files published by NCBI at predictable FTP URLs:
4
+ https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/{platform}/{binary}
5
+ """
6
+
7
+ import asyncio
8
+ import platform
9
+ import stat
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+
13
+ import httpx
14
+
15
+ from ncbi_datasets_mcp.cli.locator import CLIBinaries, _exe, get_install_dir
16
+
17
+ FTP_BASE = "https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2"
18
+
19
+ # Maps (system, machine) → NCBI FTP platform path
20
+ _PLATFORM_MAP: dict[tuple[str, str], str] = {
21
+ ("Linux", "x86_64"): "linux-amd64",
22
+ ("Linux", "aarch64"): "linux-arm64",
23
+ ("Linux", "armv7l"): "linux-arm",
24
+ ("Darwin", "x86_64"): "mac",
25
+ ("Darwin", "arm64"): "mac", # Universal binary covers Apple Silicon
26
+ ("Windows", "AMD64"): "win64",
27
+ ("Windows", "x86_64"): "win64",
28
+ }
29
+
30
+
31
+ @dataclass
32
+ class InstallResult:
33
+ success: bool
34
+ datasets_path: Path | None = None
35
+ dataformat_path: Path | None = None
36
+ version: str | None = None
37
+ message: str = ""
38
+
39
+
40
+ def detect_platform() -> str:
41
+ """Return the NCBI FTP sub-path for the current OS/arch, or raise."""
42
+ system = platform.system()
43
+ machine = platform.machine()
44
+ key = (system, machine)
45
+ if key in _PLATFORM_MAP:
46
+ return _PLATFORM_MAP[key]
47
+ raise RuntimeError(
48
+ f"Unsupported platform: {system}/{machine}. "
49
+ "Install manually: "
50
+ "https://www.ncbi.nlm.nih.gov/datasets/docs/v2/command-line-tools/download-and-install/"
51
+ )
52
+
53
+
54
+ async def _download_binary(url: str, dest: Path, client: httpx.AsyncClient) -> None:
55
+ """Stream a binary from *url* to *dest*, then mark it executable."""
56
+ async with client.stream("GET", url) as response:
57
+ response.raise_for_status()
58
+ dest.parent.mkdir(parents=True, exist_ok=True)
59
+ with dest.open("wb") as fh:
60
+ async for chunk in response.aiter_bytes(chunk_size=65_536):
61
+ fh.write(chunk)
62
+
63
+ if platform.system() != "Windows":
64
+ dest.chmod(dest.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
65
+
66
+
67
+ async def install_cli() -> InstallResult:
68
+ """Download both binaries into the managed cache dir and verify them."""
69
+ try:
70
+ platform_path = detect_platform()
71
+ except RuntimeError as exc:
72
+ return InstallResult(success=False, message=str(exc))
73
+
74
+ install_dir = get_install_dir()
75
+ datasets_dest = install_dir / _exe("datasets")
76
+ dataformat_dest = install_dir / _exe("dataformat")
77
+
78
+ datasets_url = f"{FTP_BASE}/{platform_path}/{_exe('datasets')}"
79
+ dataformat_url = f"{FTP_BASE}/{platform_path}/{_exe('dataformat')}"
80
+
81
+ async with httpx.AsyncClient(follow_redirects=True, timeout=120.0) as client:
82
+ try:
83
+ await _download_binary(datasets_url, datasets_dest, client)
84
+ await _download_binary(dataformat_url, dataformat_dest, client)
85
+ except httpx.HTTPError as exc:
86
+ return InstallResult(success=False, message=f"Download failed: {exc}")
87
+
88
+ # Smoke-test: run --version on the freshly installed binary
89
+ try:
90
+ proc = await asyncio.create_subprocess_exec(
91
+ str(datasets_dest),
92
+ "--version",
93
+ stdout=asyncio.subprocess.PIPE,
94
+ stderr=asyncio.subprocess.PIPE,
95
+ )
96
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=15.0)
97
+ version = stdout.decode().strip()
98
+ except Exception as exc:
99
+ return InstallResult(
100
+ success=False,
101
+ message=f"Binary installed but verification failed: {exc}",
102
+ )
103
+
104
+ return InstallResult(
105
+ success=True,
106
+ datasets_path=datasets_dest,
107
+ dataformat_path=dataformat_dest,
108
+ version=version,
109
+ message=f"Installed {version} → {install_dir}",
110
+ )