gi-mcp 0.1.0a9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. gi_mcp-0.1.0a9/.dockerignore +15 -0
  2. gi_mcp-0.1.0a9/.gitignore +8 -0
  3. gi_mcp-0.1.0a9/CHANGELOG.md +75 -0
  4. gi_mcp-0.1.0a9/CLAUDE.md +71 -0
  5. gi_mcp-0.1.0a9/CONTRIBUTING.md +161 -0
  6. gi_mcp-0.1.0a9/Dockerfile +30 -0
  7. gi_mcp-0.1.0a9/LICENSE +21 -0
  8. gi_mcp-0.1.0a9/PKG-INFO +235 -0
  9. gi_mcp-0.1.0a9/README.md +205 -0
  10. gi_mcp-0.1.0a9/pyproject.toml +93 -0
  11. gi_mcp-0.1.0a9/scripts/mcp_header_probe.py +124 -0
  12. gi_mcp-0.1.0a9/src/gi_mcp/__init__.py +1 -0
  13. gi_mcp-0.1.0a9/src/gi_mcp/_client.py +271 -0
  14. gi_mcp-0.1.0a9/src/gi_mcp/_demos.py +72 -0
  15. gi_mcp-0.1.0a9/src/gi_mcp/_ensembl.py +256 -0
  16. gi_mcp-0.1.0a9/src/gi_mcp/_fasta.py +71 -0
  17. gi_mcp-0.1.0a9/src/gi_mcp/_state.py +217 -0
  18. gi_mcp-0.1.0a9/src/gi_mcp/_store.py +102 -0
  19. gi_mcp-0.1.0a9/src/gi_mcp/config.py +163 -0
  20. gi_mcp-0.1.0a9/src/gi_mcp/data/__init__.py +0 -0
  21. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/__init__.py +0 -0
  22. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/annotation_hbb_chr11.fa +155 -0
  23. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/chromatin_active_promoter_chr19.fa +502 -0
  24. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/enhancer_eve.fa +168 -0
  25. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/expression_hbb_k562.fa +116 -0
  26. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/promoter_tp53.fa +321 -0
  27. gi_mcp-0.1.0a9/src/gi_mcp/data/sequences/splice_hbb.fa +67 -0
  28. gi_mcp-0.1.0a9/src/gi_mcp/inventory.py +40 -0
  29. gi_mcp-0.1.0a9/src/gi_mcp/prompts/__init__.py +11 -0
  30. gi_mcp-0.1.0a9/src/gi_mcp/prompts/library.py +40 -0
  31. gi_mcp-0.1.0a9/src/gi_mcp/resources/__init__.py +11 -0
  32. gi_mcp-0.1.0a9/src/gi_mcp/resources/account.py +46 -0
  33. gi_mcp-0.1.0a9/src/gi_mcp/resources/catalog.py +63 -0
  34. gi_mcp-0.1.0a9/src/gi_mcp/resources/contract.py +51 -0
  35. gi_mcp-0.1.0a9/src/gi_mcp/resources/jobs.py +34 -0
  36. gi_mcp-0.1.0a9/src/gi_mcp/resources/sequences.py +41 -0
  37. gi_mcp-0.1.0a9/src/gi_mcp/server.py +131 -0
  38. gi_mcp-0.1.0a9/src/gi_mcp/tools/__init__.py +11 -0
  39. gi_mcp-0.1.0a9/src/gi_mcp/tools/_common.py +138 -0
  40. gi_mcp-0.1.0a9/src/gi_mcp/tools/acquisition.py +202 -0
  41. gi_mcp-0.1.0a9/src/gi_mcp/tools/jobs.py +49 -0
  42. gi_mcp-0.1.0a9/src/gi_mcp/tools/models.py +34 -0
  43. gi_mcp-0.1.0a9/src/gi_mcp/tools/predict.py +185 -0
  44. gi_mcp-0.1.0a9/src/gi_mcp/tools/workflows.py +85 -0
  45. gi_mcp-0.1.0a9/tests/__init__.py +0 -0
  46. gi_mcp-0.1.0a9/tests/_helpers.py +27 -0
  47. gi_mcp-0.1.0a9/tests/conftest.py +102 -0
  48. gi_mcp-0.1.0a9/tests/test_acquisition.py +210 -0
  49. gi_mcp-0.1.0a9/tests/test_client.py +76 -0
  50. gi_mcp-0.1.0a9/tests/test_config.py +31 -0
  51. gi_mcp-0.1.0a9/tests/test_ensembl.py +118 -0
  52. gi_mcp-0.1.0a9/tests/test_fasta.py +43 -0
  53. gi_mcp-0.1.0a9/tests/test_hosted.py +190 -0
  54. gi_mcp-0.1.0a9/tests/test_integration.py +53 -0
  55. gi_mcp-0.1.0a9/tests/test_limits_parity.py +61 -0
  56. gi_mcp-0.1.0a9/tests/test_predict.py +138 -0
  57. gi_mcp-0.1.0a9/tests/test_prompts.py +18 -0
  58. gi_mcp-0.1.0a9/tests/test_resources.py +62 -0
  59. gi_mcp-0.1.0a9/tests/test_robustness.py +124 -0
  60. gi_mcp-0.1.0a9/tests/test_store.py +48 -0
  61. gi_mcp-0.1.0a9/tests/test_version_consistency.py +27 -0
  62. gi_mcp-0.1.0a9/tests/test_workflows.py +100 -0
  63. gi_mcp-0.1.0a9/tests/test_workflows_jobs.py +36 -0
@@ -0,0 +1,15 @@
1
+ .venv/
2
+ dist/
3
+ build/
4
+ *.egg-info/
5
+ **/__pycache__/
6
+ .pytest_cache/
7
+ tests/
8
+ scripts/
9
+ _http_smoke.py
10
+ _e2e_*.py
11
+ *.log
12
+ *.md
13
+ !README.md
14
+ Dockerfile
15
+ .dockerignore
@@ -0,0 +1,8 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .pytest_cache/
4
+ .venv/
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ uv.lock
@@ -0,0 +1,75 @@
1
+ # Changelog
2
+
3
+ All notable changes to `gi-mcp` are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project will adopt
5
+ [Semantic Versioning](https://semver.org/spec/v2.0.0.html) once it leaves the
6
+ alpha series. While in the `0.1.0aN` alpha, the tool surface and outputs may
7
+ change between releases without a major-version bump.
8
+
9
+ ## [Unreleased]
10
+
11
+ _Nothing yet._
12
+
13
+ ## [0.1.0a9]
14
+
15
+ ### Added
16
+
17
+ - Bundled `annotation_hbb_chr11` demo sequence — a plus-strand HBB locus
18
+ (`chr11:5,222,472-5,231,670`) that `find_genes` recovers as a single
19
+ high-confidence transcript centred on the TSS. Completes the "one positive
20
+ control per task" set, so the no-key `load_demo_sequence` flow can now
21
+ smoke-test annotation too.
22
+ - `ruff` (lint + format) and `mypy` (typing) gates, wired into CI across
23
+ Python 3.10–3.12.
24
+
25
+ ### Changed
26
+
27
+ - Releases now publish to PyPI via a tag-gated OIDC trusted-publish workflow
28
+ (push a `gi-mcp-v*` tag → build → checks → publish, no long-lived token). See
29
+ `CONTRIBUTING.md` → Releasing.
30
+
31
+ ## [0.1.0a8]
32
+
33
+ ### Fixed
34
+
35
+ - `gi://models` resource returned empty per-task model lists. The live
36
+ `/v1/tasks/{task}/models` response is unwrapped (`{task, default_model,
37
+ models}`), but the resource dug for a non-existent `data.models` envelope and
38
+ fell through to `[]`. Now reads the list directly (tolerating a `{data}`
39
+ wrapper too), so the catalog resource matches the populated `list_models`
40
+ tool. The test fake was wrapping in `data`, masking the bug; corrected to the
41
+ real shape with a non-empty assertion guarding regression.
42
+
43
+ ## [0.1.0a7]
44
+
45
+ ### Added
46
+
47
+ - `fetch_region` acquisition tool: fetch a genomic region by coordinates
48
+ (`chr8:127,680,000-127,800,000`) and get a handle for `find_genes` / `predict_*`.
49
+ Accepts the same lenient formats as the Web UI's gene search (commas, en/em
50
+ dashes, `..`, optional `chr`) and defaults to the plus strand, which the
51
+ gene-finder expects. This is what makes the documented "find the genes in
52
+ chr8:…" flow executable over MCP — previously only gene *symbols* could be
53
+ acquired.
54
+
55
+ ## [0.1.0a6]
56
+
57
+ Initial alpha — an MCP server exposing the Genomic Intelligence `/v1`
58
+ DNA-analysis API to LLM hosts over stdio, plus a hosted Streamable-HTTP demo.
59
+
60
+ - **Six inference tasks** — promoter, splice, enhancer, chromatin, expression,
61
+ and annotation (`find_genes`) — plus the composite
62
+ `find_genes_and_predict_expression` workflow.
63
+ - **Sequence acquisition** (Ensembl, local FASTA, inline, and bundled demo
64
+ sequences) via the handle pattern, so large sequences never round-trip through
65
+ the LLM.
66
+ - **Reference resources** (`gi://models`, `gi://docs/tasks`, `gi://openapi.json`,
67
+ `gi://sequences`, `gi://cache/{ref}`, `gi://jobs/recent`, `gi://account`) and
68
+ two slash-command prompts (`gi-promoter-screen`, `gi-expression-screen`).
69
+ - **One execution model** — every tool call blocks until the result is ready and
70
+ returns the `{data, meta}` envelope; slow tasks stream progress to the host
71
+ while they run. An explicit detached mode (`wait=False`) instead returns a
72
+ pollable `job_id`.
73
+ - **Secure-by-default HTTP transport** and a hosted multi-tenant mode
74
+ (per-request key resolution, per-principal sequence store, `load_local_fasta`
75
+ disabled remotely).
@@ -0,0 +1,71 @@
1
+ # MCP Server — Claude Context
2
+
3
+ `gi-mcp`: an MCP (Model Context Protocol) server that exposes the Genomic
4
+ Intelligence `/v1` API to LLM hosts (Claude Desktop/Code, Cursor) over stdio.
5
+ A thin protocol translator over the `/v1` contract.
6
+
7
+ ## What it wraps
8
+
9
+ The GI GPU service `/v1` contract — same endpoints the Web UI and partners use.
10
+ This server is a **protocol translator**: it owns no inference, it forwards to
11
+ `api.genomicintelligence.ai` (override with `GI_BASE_URL`).
12
+
13
+ ## Stack
14
+
15
+ - Python 3.10+, `mcp` SDK (FastMCP high-level API), `httpx`
16
+ - `pytest` + `pytest-asyncio` (auto mode) + `pytest-httpx`; build via `hatchling`
17
+
18
+ ## Layout & conventions
19
+
20
+ - `server.py` builds one `FastMCP` and calls `tools/resources/prompts.register_all`.
21
+ - Each primitive module exposes `register(mcp)`; no global mcp in submodules.
22
+ - Network only through `_client.Client` (GI) and `_ensembl` (Ensembl REST).
23
+ - Shared state is `_state.AppState` (client + `SequenceStore`), a lazy singleton;
24
+ tests inject a `FakeClient` via `_state.set_client`.
25
+ - Task bounds live in `config.TASKS` — mirror of `gpu_service/core/limits.py`.
26
+ Keep them in sync if the API's limits change.
27
+
28
+ ## The handle pattern (important)
29
+
30
+ Large sequences must not round-trip through the LLM. Acquisition tools store the
31
+ sequence in `SequenceStore` and return a short `seq_…` handle; predict tools take
32
+ `sequence` **xor** `sequence_ref`. See `tools/_common.py:resolve_sequence`.
33
+
34
+ ## Primitive map
35
+
36
+ | Primitive | Where | Notes |
37
+ |---|---|---|
38
+ | Tools | `tools/` | 16 total; 6 acquisition + 6 predict + workflow + jobs + models |
39
+ | Resources | `resources/` | `gi://models`, `gi://docs/tasks`, `gi://openapi.json`, `gi://sequences`, `gi://cache/{ref}`, `gi://jobs/recent`, `gi://account` |
40
+ | Prompts | `prompts/library.py` | 2 slash-command workflows |
41
+ | Roots | `tools/acquisition.py` | `load_local_fasta` gates on `ctx.session.list_roots()` |
42
+
43
+ ## Commands
44
+
45
+ ```bash
46
+ cd mcp-server
47
+ python3 -m venv .venv && .venv/bin/pip install -e ".[dev]"
48
+ .venv/bin/pytest # full suite, no network
49
+ GI_RUN_INTEGRATION=1 GI_API_KEY=gi_... .venv/bin/pytest -m integration # live
50
+ GI_API_KEY=gi_... .venv/bin/gi-mcp # run the server (clients normally launch it)
51
+ ```
52
+
53
+ ## Gotchas
54
+
55
+ - Submodules import `config` as a module (`from gi_mcp import config`) so call
56
+ sites like `config.allow_any_fasta_path()` stay monkeypatchable in tests.
57
+ - Demo FASTAs in `data/sequences/` ship in the wheel via `[tool.hatch.build]
58
+ artifacts`. `_demos.py` indexes them; `gi://sequences` lists the catalog and
59
+ `load_demo_sequence(name)` loads one into a handle.
60
+ - `load_local_fasta` needs a request context (`mcp.get_context()`) to read
61
+ MCP roots; tests that exercise it monkeypatch the context/session.
62
+
63
+ ## Docs
64
+
65
+ - `README.md` — front door: install, per-client config, handle pattern, full
66
+ tool/resource/prompt tables, "Try it" examples, environment
67
+ - `CONTRIBUTING.md` — dev setup, conventions, architecture walkthrough, release
68
+ - `CHANGELOG.md` — release history (Keep a Changelog)
69
+ - `../docs/platform/MCP_SERVER.md` — canonical design doc: architecture, primitives,
70
+ execution model (call→progress→result), and the compressed hosting section
71
+ (demo key + BYOK, transport/ALB, why hosting is a multi-tenancy refactor)
@@ -0,0 +1,161 @@
1
+ # Contributing to gi-mcp
2
+
3
+ `gi-mcp` is the MCP server that exposes the Genomic Intelligence `/v1`
4
+ DNA-analysis API to LLM hosts. It's a thin protocol translator: it owns no
5
+ inference and forwards to `api.genomicintelligence.ai`. Contributions that keep
6
+ the surface tight, honest, and well-tested are very welcome.
7
+
8
+ ## Development setup
9
+
10
+ ```bash
11
+ cd mcp-server
12
+ python3 -m venv .venv && .venv/bin/pip install -e ".[dev]"
13
+ ```
14
+
15
+ Run the server locally (your MCP client normally launches it for you):
16
+
17
+ ```bash
18
+ GI_API_KEY=gi_... .venv/bin/gi-mcp
19
+ ```
20
+
21
+ ## Tests
22
+
23
+ ```bash
24
+ .venv/bin/pytest # full suite — mocked, no network
25
+ .venv/bin/pytest -q tests/test_predict.py
26
+
27
+ # Live tests against the real API (consume quota):
28
+ GI_RUN_INTEGRATION=1 GI_API_KEY=gi_... .venv/bin/pytest -m integration
29
+ ```
30
+
31
+ The suite is network-free by default: tools run through the real FastMCP
32
+ dispatch with a `FakeClient` injected via `_state.set_client` (see
33
+ `tests/conftest.py`), and HTTP behaviour is exercised with `pytest-httpx`.
34
+ CI runs this on Python 3.10–3.12; please keep it green and add a test with any
35
+ behaviour change.
36
+
37
+ ## Conventions
38
+
39
+ - **One source of truth for the version:** `src/gi_mcp/__init__.py:__version__`.
40
+ `pyproject.toml` reads it dynamically (Hatch); the doc status banners are
41
+ pinned to it by `tests/test_version_consistency.py`. Don't hand-edit the
42
+ version anywhere else.
43
+ - **Task bounds** live in `config.TASKS`, mirroring `gpu_service/core/limits.py`.
44
+ `tests/test_limits_parity.py` fails if they drift (in a monorepo checkout).
45
+ - Each primitive module exposes `register(mcp)`; there is no global `mcp` in
46
+ submodules. All network access goes through `_client.Client` (GI `/v1`) and
47
+ `_ensembl` (Ensembl REST) — never call `httpx` directly elsewhere.
48
+ - Tools return the GI `{data, meta}` envelope on success and
49
+ `{error: {code, message, ...}}` on failure — the same shape the API uses.
50
+ Map new failure modes into that envelope; never let a raw traceback reach the
51
+ host.
52
+ - Tool/argument docstrings are the LLM-facing contract. Keep them accurate and
53
+ specific — they steer tool selection.
54
+
55
+ ## Architecture
56
+
57
+ `server.py` builds one `FastMCP` instance and calls
58
+ `tools/resources/prompts.register_all(mcp)`. Each primitive module exposes
59
+ `register(mcp)`; there is no global `mcp` in submodules.
60
+
61
+ ```
62
+ MCP client (Claude Desktop / Code / Cursor)
63
+ │ stdio (JSON-RPC)
64
+
65
+ server.py — one FastMCP; registers tools · resources · prompts
66
+
67
+ ┌────┴───────────────┬────────────────────┐
68
+ ▼ ▼ ▼
69
+ _state.py _store.py _client.py
70
+ AppState → SequenceStore Client → /v1 (httpx, envelopes)
71
+ (client+store) handle ↔ sequence │ HTTPS + Bearer
72
+ │ _ensembl.py (REST) _fasta.py (parse+roots) _demos.py (bundled FASTAs)
73
+ ▼ api.genomicintelligence.ai
74
+ config.py — task registry, limits, env
75
+ ```
76
+
77
+ Everything that touches the network lives behind two clients: `_client.Client`
78
+ (GI `/v1`) and `_ensembl` (Ensembl REST).
79
+
80
+ ### Source layout
81
+
82
+ ```
83
+ src/gi_mcp/
84
+ ├── server.py assemble FastMCP, register primitives, stdio entry
85
+ ├── config.py TaskSpec registry, length bounds, env resolution
86
+ ├── _client.py GI /v1 client: predict, async submit/poll, workflow, jobs
87
+ ├── _ensembl.py symbol → locus → sequence; TSS-centred window
88
+ ├── _fasta.py FASTA parse + roots path-containment
89
+ ├── _store.py SequenceStore: handles, dedupe, TTL, size cap
90
+ ├── _state.py AppState (stdio singleton / hosted per-request); test seams
91
+ ├── _demos.py bundled demo FASTA index (catalog + name → sequence)
92
+ ├── tools/ acquisition · predict (×6) · workflow · jobs · models
93
+ ├── resources/ catalog · contract · sequences · jobs · account
94
+ ├── prompts/library.py the two prompt workflows
95
+ └── data/sequences/ bundled demo FASTAs
96
+ ```
97
+
98
+ ### The four MCP primitives
99
+
100
+ - **Tools** (`@mcp.tool()`) — the verbs. Return the GI `{data, meta}` envelope
101
+ on success, `{error: {code, message, ...}}` on failure. Core design choice:
102
+ **acquisition vs prediction separation** — acquisition tools return a `seq_…`
103
+ handle via the `SequenceStore`; predict tools accept `sequence` (inline) **xor**
104
+ `sequence_ref` (handle), resolved by `tools/_common.py:resolve_sequence`.
105
+ Length is validated locally (`config.TaskSpec.validate_length`) before any
106
+ request leaves the machine. Slow tasks (`find_genes`,
107
+ `find_genes_and_predict_expression`, which always submits async) block and
108
+ stream progress via `tools/_common.py:block_and_poll`, returning the result in
109
+ one call; the response shape never flips on a timer. Exceeding the
110
+ `GI_ASYNC_TIMEOUT` ceiling returns a clean `{error: timeout}`, and the explicit
111
+ detached mode (`wait=False`) is the only path that returns a `job_id` to poll
112
+ with `get_job`. See [`docs/platform/MCP_SERVER.md`](../docs/platform/MCP_SERVER.md) §5.
113
+ - **Resources** (`@mcp.resource(uri)`) — the nouns: an index the host reads on
114
+ demand, static (`gi://models`) or templated (`gi://cache/{ref}`). Nothing loads
115
+ until asked, so even a 500 kb cached sequence is safe to expose.
116
+ - **Roots** — `load_local_fasta` calls `ctx.session.list_roots()` and refuses any
117
+ path outside the granted roots (`realpath` defeats `..`). Not registered when
118
+ `GI_MCP_HOSTED=1` (host file-read is unsafe on a shared server).
119
+ - **Prompts** (`@mcp.prompt()`) — instruction text naming this server's tools, so
120
+ one slash-command drives a multi-step recipe.
121
+
122
+ ### State & lifecycle
123
+
124
+ `AppState` is lazily built in `_state.py`, so importing the package needs no
125
+ `GI_API_KEY` (tests rely on this). The store is in-memory, TTL- and size-bounded,
126
+ keyed by a content hash so identical fetches dedupe. In hosted mode the client is
127
+ resolved per request (BYOK header, else the demo key) and the store is keyed per
128
+ principal. Restarting clears everything — no persistence, by design.
129
+
130
+ For the full server design — architecture, execution model, and the hosted HTTP
131
+ deployment (transport, ALB, multi-tenancy) — see
132
+ [`docs/platform/MCP_SERVER.md`](../docs/platform/MCP_SERVER.md).
133
+
134
+ ## Changelog
135
+
136
+ Add a bullet under `## [Unreleased]` in `CHANGELOG.md`
137
+ ([Keep a Changelog](https://keepachangelog.com/) style: Added / Changed /
138
+ Fixed / Removed / Security). While in the `0.1.0aN` alpha series the surface may
139
+ change between releases without a major bump.
140
+
141
+ ## Releasing (maintainers)
142
+
143
+ The package is not yet on PyPI. Release is automated by the
144
+ `.github/workflows/mcp-publish.yml` workflow, which publishes via PyPI **OIDC
145
+ trusted publishing** — no API token is stored. One-time setup: register this
146
+ repo + workflow as a Trusted Publisher for the `gi-mcp` project on PyPI and
147
+ create a GitHub Environment named `pypi`.
148
+
149
+ To cut a release:
150
+
151
+ 1. Bump `__version__` in `src/gi_mcp/__init__.py`; move the `[Unreleased]`
152
+ CHANGELOG section under the new version. Commit and merge to `main`.
153
+ 2. Locally, sanity-check the artifact: `pip install build twine && python -m
154
+ build && twine check dist/*` (CI gates this too).
155
+ 3. Optionally publish to **TestPyPI** first and `pip install` into a fresh venv
156
+ to smoke-test `gi-mcp --help` and a stdio handshake.
157
+ 4. Tag the commit `gi-mcp-v<version>` (e.g. `gi-mcp-v0.1.0a9`) and push the tag.
158
+ The workflow asserts the tag matches `__version__`, builds, runs
159
+ `twine check`, and publishes to **PyPI** — after which `uvx gi-mcp` works
160
+ without a checkout. The tag is namespaced (`gi-mcp-v*`) because this is a
161
+ monorepo with multiple deployables.
@@ -0,0 +1,30 @@
1
+ # gi-mcp hosted (Streamable HTTP) image.
2
+ #
3
+ # Build context is mcp-server/ ONLY — no other monorepo code (gpu_service,
4
+ # web-ui, deploy, AWS configs, .env*) is present in the context, so it cannot
5
+ # leak into the image. The runtime stage installs only the gi-mcp wheel
6
+ # (gi_mcp package + mcp + httpx + bundled demo FASTAs). The demo key is
7
+ # injected at runtime via GI_MCP_DEMO_KEY — never baked into a layer.
8
+
9
+ FROM python:3.12-slim AS build
10
+ WORKDIR /build
11
+ RUN pip install --no-cache-dir build
12
+ COPY pyproject.toml README.md ./
13
+ COPY src ./src
14
+ RUN python -m build --wheel
15
+
16
+ FROM python:3.12-slim AS runtime
17
+ RUN useradd --create-home --uid 10001 appuser
18
+ COPY --from=build /build/dist/*.whl /tmp/
19
+ RUN pip install --no-cache-dir /tmp/*.whl && rm -f /tmp/*.whl
20
+ USER appuser
21
+
22
+ # Hosted HTTP defaults; GI_MCP_DEMO_KEY is supplied at runtime (Secrets Manager).
23
+ ENV MCP_TRANSPORT=http \
24
+ GI_MCP_HOSTED=1 \
25
+ MCP_HTTP_HOST=0.0.0.0 \
26
+ MCP_HTTP_PORT=8000
27
+ EXPOSE 8000
28
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
29
+ CMD python -c "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/healthz',timeout=2).read()==b'ok' else 1)"
30
+ CMD ["gi-mcp"]
gi_mcp-0.1.0a9/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Genomic Intelligence
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: gi-mcp
3
+ Version: 0.1.0a9
4
+ Summary: MCP server for the Genomic Intelligence DNA-analysis API: promoter, splice, enhancer, chromatin, expression, annotation + a composite annotation→expression workflow, over MCP stdio.
5
+ Project-URL: Homepage, https://genomicintelligence.ai
6
+ Project-URL: Documentation, https://docs.genomicintelligence.ai
7
+ Project-URL: Get an API key, https://genomicintelligence.ai/contact
8
+ Author-email: Genomic Intelligence <hello@genomicintelligence.ai>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: bioinformatics,claude,dna,ensembl,genomics,llm,mcp,model-context-protocol
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: httpx<1,>=0.27
22
+ Requires-Dist: mcp<2,>=1.27
23
+ Provides-Extra: dev
24
+ Requires-Dist: mypy>=1.11; extra == 'dev'
25
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
26
+ Requires-Dist: pytest-httpx>=0.30; extra == 'dev'
27
+ Requires-Dist: pytest>=8.0; extra == 'dev'
28
+ Requires-Dist: ruff>=0.6; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # gi-mcp
32
+
33
+ > **Status: ALPHA (`0.1.0a9`).** Provided for development, testing, and
34
+ > internal use. The task set, outputs, and tool surface may change without
35
+ > notice, and availability and results are not guaranteed. Not for production
36
+ > systems or clinical/diagnostic decisions.
37
+
38
+ An MCP server for the [Genomic Intelligence](https://genomicintelligence.ai)
39
+ DNA-analysis API. It exposes the six inference tasks (promoter, splice,
40
+ enhancer, chromatin, expression, annotation), a composite
41
+ annotation→expression workflow, sequence acquisition (Ensembl + local FASTA),
42
+ reference resources, and ready-made prompt workflows — all over MCP stdio.
43
+
44
+ It runs locally as a thin protocol translator: it owns no inference and stores
45
+ no key, forwarding each request to your Genomic Intelligence backend under
46
+ **your** API key.
47
+
48
+ ## Install
49
+
50
+ `gi-mcp` runs with [`uv`](https://docs.astral.sh/uv/) (`uvx` fetches and runs it
51
+ with no manual install or virtualenv):
52
+
53
+ ```bash
54
+ uvx gi-mcp # recommended (once published to PyPI)
55
+ pip install gi-mcp # alternative
56
+ ```
57
+
58
+ Install `uv` with `curl -LsSf https://astral.sh/uv/install.sh | sh` (macOS/Linux)
59
+ or see the [uv docs](https://docs.astral.sh/uv/). For local development from this
60
+ monorepo:
61
+
62
+ ```bash
63
+ cd mcp-server
64
+ python3 -m venv .venv && .venv/bin/pip install -e ".[dev]"
65
+ ```
66
+
67
+ ## Configure your MCP client
68
+
69
+ Request a partner key from <https://genomicintelligence.ai/contact>, then add a
70
+ server block. In every client the pattern is the same: run `uvx gi-mcp` with
71
+ `GI_API_KEY` in the environment.
72
+
73
+ **Claude Desktop** — edit `claude_desktop_config.json` (macOS:
74
+ `~/Library/Application Support/Claude/`, Windows: `%APPDATA%\Claude\`), then
75
+ fully quit and reopen:
76
+
77
+ ```json
78
+ {
79
+ "mcpServers": {
80
+ "genomic-intelligence": {
81
+ "command": "uvx",
82
+ "args": ["gi-mcp"],
83
+ "env": { "GI_API_KEY": "gi_..." }
84
+ }
85
+ }
86
+ }
87
+ ```
88
+
89
+ **Claude Code (CLI):**
90
+
91
+ ```bash
92
+ claude mcp add genomic-intelligence --env GI_API_KEY=gi_... -- uvx gi-mcp
93
+ ```
94
+
95
+ **Cursor / Windsurf / Zed** use the same JSON shape as Claude Desktop in their
96
+ own MCP config files. **Your own agent:** point any MCP-capable client at the
97
+ stdio command `uvx gi-mcp` with `GI_API_KEY` set — it speaks standard MCP.
98
+
99
+ From a **local checkout**, point `command` at the venv entry point instead of
100
+ `uvx`:
101
+
102
+ ```json
103
+ { "command": "/abs/path/to/mcp-server/.venv/bin/gi-mcp", "env": { "GI_API_KEY": "gi_..." } }
104
+ ```
105
+
106
+ Notes:
107
+
108
+ - **Verify:** your client should list the **genomic-intelligence** server with
109
+ 16 tools. Ask *"List the available promoter models"* → runs `list_models`.
110
+ - **Your key, your quota:** calls count against your partner key's rate /
111
+ concurrency caps. The server stores no key and owns no inference.
112
+ - **Updating / pinning:** `uvx --refresh gi-mcp` picks up new releases; pin with
113
+ `"args": ["gi-mcp@0.1.0a9"]`.
114
+
115
+ ## The handle pattern (why large sequences don't bloat your context)
116
+
117
+ Genomic sequences are big — the expression model alone wants 9,198 bp, and
118
+ promoter accepts up to 500,000. Round-tripping those through the LLM twice
119
+ (once as a tool result, once as the next tool's argument) is wasteful and
120
+ blows the context window.
121
+
122
+ So acquisition and prediction are split:
123
+
124
+ 1. An **acquisition** tool fetches/loads a sequence, stores it server-side, and
125
+ returns a short **handle** (`seq_ab12cd34`) plus light metadata — never the
126
+ bases.
127
+ 2. A **prediction** tool takes that `sequence_ref`. The server resolves the
128
+ bases internally.
129
+
130
+ ```
131
+ fetch_ensembl_sequence(gene="TP53")
132
+ → { ref: "seq_ab12cd34", name: "TP53", length: 19148, preview: "CACC…GGTG" }
133
+
134
+ predict_promoter(sequence_ref="seq_ab12cd34")
135
+ → { data: { regions: [...] }, meta: {...} } # 19 kb never hit the LLM
136
+ ```
137
+
138
+ Small sequences can still be passed inline via `sequence`. Every predict tool
139
+ accepts `sequence` **xor** `sequence_ref`. The handle lives server-side, so a
140
+ remote client (e.g. ChatGPT) that opens a fresh session per tool call still
141
+ resolves the fetched sequence across the fetch→predict steps.
142
+
143
+ ## Try it
144
+
145
+ Copy-paste these natural-language prompts into a freshly-connected client to
146
+ smoke-test the live connection end-to-end — one per inference task plus the
147
+ composite workflow, ordered from simplest (no inference) to full fetch→predict
148
+ chains.
149
+
150
+ | # | Task | Prompt | Expected behavior |
151
+ |---|------|--------|-------------------|
152
+ | 0 | warm-up (no inference) | "What Genomic Intelligence models are available for the expression task?" | Calls `list_models`; returns the model registry. No inference. |
153
+ | 1 | promoter | "Fetch human TP53 from Ensembl and scan it for promoter regions — show me the strongest hits." | Fetch → `predict_promoter`; ranked promoter windows. |
154
+ | 2 | expression | "Fetch HBB prepared for expression and predict its expression in K562 cells." | `fetch_gene_for_expression` → `predict_expression`. Cell-type-specific — omit the cell type and it returns `invalid_input` asking for it. |
155
+ | 3 | splice | "Fetch the human HBB gene sequence and predict its splice sites." | Fetch → `predict_splice`; donor/acceptor sites. |
156
+ | 4 | enhancer | "Fetch human GATA1 and predict enhancer regions." | Fetch → `predict_enhancer`. |
157
+ | 5 | chromatin | "Fetch human SOX2 and predict chromatin accessibility." | Fetch → `predict_chromatin`. |
158
+ | 6 | annotation | "Find the genes in chr8:127,680,000–127,800,000." | `fetch_region` → `find_genes`; transcript intervals (plus-strand). |
159
+ | 7 | composite | "Find the genes in chr8:127,680,000–127,800,000 and predict each one's expression in K562." | `find_genes_and_predict_expression` — annotation→expression in one call. |
160
+
161
+ No API key handy? *"List the bundled demo sequences, then load the K562
162
+ expression one and predict its expression"* runs `load_demo_sequence` → no
163
+ Ensembl fetch, no personal quota. The server ships one curated positive control
164
+ per task (including `annotation_hbb_chr11`, a plus-strand HBB locus the
165
+ gene-finder recovers centred on the TSS), so every task above is smoke-testable
166
+ this way.
167
+
168
+ **Going further** — the server also works as an analytical assistant that chains
169
+ tools and reasons over results:
170
+
171
+ - *"Compare the regulatory landscape of HBB versus HBA1 — which has stronger promoter signal and higher predicted expression in K562?"*
172
+ - *"Run promoter prediction on TP53 with every available model and give me a diff table of where they disagree."* (`list_models` + multi-model fan-out)
173
+ - *"Annotate chr8:127,680,000–127,800,000 — if it's slow, hand me a job ID and I'll check back."* (async `find_genes` → `get_job`)
174
+ - *"Characterize GATA1: run every applicable task and assemble a one-page report for a wet-lab audience."*
175
+
176
+ ## What's exposed
177
+
178
+ ### Tools
179
+
180
+ | Group | Tools |
181
+ |---|---|
182
+ | Acquisition | `fetch_ensembl_sequence`, `fetch_region`, `fetch_gene_for_expression`, `load_local_fasta`, `store_inline_sequence`, `load_demo_sequence` |
183
+ | Prediction (sync) | `predict_promoter`, `predict_splice`, `predict_enhancer`, `predict_chromatin`, `predict_expression` |
184
+ | Prediction (async) | `find_genes` |
185
+ | Workflow | `find_genes_and_predict_expression` |
186
+ | Jobs | `get_job`, `list_jobs` |
187
+ | Catalog | `list_models` |
188
+
189
+ ### Resources
190
+
191
+ | URI | Contents |
192
+ |---|---|
193
+ | `gi://models` | Full model catalog across all six tasks |
194
+ | `gi://models/{model_id}` | Bio spec for one model |
195
+ | `gi://docs/tasks` | What each task does, sync/async, length bounds |
196
+ | `gi://openapi.json` | Live `/v1` OpenAPI contract |
197
+ | `gi://sequences` | Bundled demo references — one positive control per task; load one with `load_demo_sequence` |
198
+ | `gi://cache/{ref}` | Inspect a stored sequence handle |
199
+ | `gi://jobs/recent` | The caller's recent async jobs |
200
+ | `gi://account` | Configured backend + health + exposed tasks |
201
+
202
+ ### Prompts (slash-command workflows)
203
+
204
+ | Prompt | What it does |
205
+ |---|---|
206
+ | `gi-promoter-screen <gene>` | Fetch a gene → scan for promoters → report strong hits |
207
+ | `gi-expression-screen <gene>` | TSS window → predict expression → summarise across tissues |
208
+
209
+ ### Roots
210
+
211
+ `load_local_fasta` only reads files inside directories the user granted via MCP
212
+ roots. Set `GI_FASTA_ALLOW_ANY=1` to bypass on hosts that don't implement roots.
213
+
214
+ ## Environment
215
+
216
+ | Var | Default | Purpose |
217
+ |---|---|---|
218
+ | `GI_API_KEY` | — (required) | Partner bearer key (`gi_…`) |
219
+ | `GI_BASE_URL` | `https://api.genomicintelligence.ai` | Override for staging / local backend |
220
+ | `GI_ENSEMBL_URL` | `https://rest.ensembl.org` | Ensembl REST base |
221
+ | `GI_FASTA_ALLOW_ANY` | unset | `1` to skip roots gating for local FASTA |
222
+ | `GI_ASYNC_TIMEOUT` | `240` | Ceiling (s) a `wait=True` slow task blocks while streaming progress before returning a `timeout` error (never a job_id) |
223
+ | `GI_ASYNC_POLL_INTERVAL` | `2` | Poll cadence (s) for the block-and-stream loop |
224
+ | `GI_HTTP_TIMEOUT` | `300` | Per-request read timeout (s) for `/v1` calls |
225
+
226
+ ## Development & contributing
227
+
228
+ Setup, conventions, the architecture walkthrough, and the release process live
229
+ in [`CONTRIBUTING.md`](CONTRIBUTING.md). Quick start:
230
+
231
+ ```bash
232
+ .venv/bin/pytest # full suite (mocked; no network)
233
+ ```
234
+
235
+ Release history is in [`CHANGELOG.md`](CHANGELOG.md).