copernicus-mcp 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copernicus_mcp-0.1.2/.gitignore +49 -0
- copernicus_mcp-0.1.2/LICENSE +28 -0
- copernicus_mcp-0.1.2/PKG-INFO +173 -0
- copernicus_mcp-0.1.2/README.md +116 -0
- copernicus_mcp-0.1.2/pyproject.toml +74 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/__init__.py +3 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/__main__.py +12 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/auth/__init__.py +17 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/auth/adapter.py +33 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/auth/cmems.py +88 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/auth/resolver.py +240 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/__init__.py +4 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/abstract.py +129 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/cmems/__init__.py +23 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/cmems/backend.py +1371 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/cmems/tools.py +268 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/protocol.py +49 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/backends/registry.py +72 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/bootstrap.py +140 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/cache/__init__.py +3 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/cache/manager.py +193 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/cli.py +383 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/common/__init__.py +0 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/common/time.py +37 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/config/__init__.py +21 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/config/defaults.yaml +36 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/config/loader.py +76 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/config/schema.py +65 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/__init__.py +0 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/cache_key.py +107 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/canonicalisation.py +66 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/coordinator.py +93 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/envelope.py +75 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/provenance.py +303 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/data_model/schemas_cmems.py +179 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/errors/__init__.py +46 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/errors/classes.py +130 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/errors/records.py +139 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/errors/sanitiser.py +278 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/http/__init__.py +4 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/http/client_factory.py +64 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/http/retry.py +60 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/observability/__init__.py +15 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/observability/logger.py +109 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/persistence/__init__.py +19 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/persistence/protocol.py +84 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/persistence/schema.py +81 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/persistence/sqlite_backend.py +401 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/resources/__init__.py +0 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/server.py +160 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/version.py +1 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/workflow/__init__.py +0 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/workflow/confirmation.py +52 -0
- copernicus_mcp-0.1.2/src/copernicus_mcp/workflow/orchestrator.py +387 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
openai/
|
|
2
|
+
claude/
|
|
3
|
+
plan/
|
|
4
|
+
|
|
5
|
+
# Secrets — never commit
|
|
6
|
+
.env
|
|
7
|
+
.env.local
|
|
8
|
+
.env.*.local
|
|
9
|
+
.envrc
|
|
10
|
+
!.env.example
|
|
11
|
+
|
|
12
|
+
# Python
|
|
13
|
+
__pycache__/
|
|
14
|
+
*.py[cod]
|
|
15
|
+
*$py.class
|
|
16
|
+
*.so
|
|
17
|
+
.Python
|
|
18
|
+
build/
|
|
19
|
+
dist/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
*.egg
|
|
22
|
+
.eggs/
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.mypy_cache/
|
|
25
|
+
.ruff_cache/
|
|
26
|
+
.coverage
|
|
27
|
+
.coverage.*
|
|
28
|
+
htmlcov/
|
|
29
|
+
.tox/
|
|
30
|
+
|
|
31
|
+
# Venvs
|
|
32
|
+
.venv/
|
|
33
|
+
venv/
|
|
34
|
+
env/
|
|
35
|
+
|
|
36
|
+
# Editors / OS
|
|
37
|
+
.idea/
|
|
38
|
+
.vscode/
|
|
39
|
+
.DS_Store
|
|
40
|
+
|
|
41
|
+
# Project runtime artifacts
|
|
42
|
+
.cache/
|
|
43
|
+
*.db
|
|
44
|
+
*.sqlite
|
|
45
|
+
*.sqlite3
|
|
46
|
+
|
|
47
|
+
# NB: *.provenance.json is intentionally NOT ignored — provenance sidecars
|
|
48
|
+
# are user-facing artifacts.
|
|
49
|
+
.mcp.json
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026, Ivan Kuznetsov
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: copernicus-mcp
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: MCP server for safe, validated, cost-aware access to Copernicus Earth observation data.
|
|
5
|
+
Author: Ivan Kuznetsov, CliDyn
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026, Ivan Kuznetsov
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Python: >=3.11
|
|
36
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
37
|
+
Requires-Dist: httpx<1,>=0.27
|
|
38
|
+
Requires-Dist: mcp<1.28,>=1.27.0
|
|
39
|
+
Requires-Dist: pydantic-settings<3,>=2.0
|
|
40
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
41
|
+
Requires-Dist: python-dateutil<3,>=2.8
|
|
42
|
+
Requires-Dist: pyyaml<7,>=6
|
|
43
|
+
Requires-Dist: rich<14,>=13
|
|
44
|
+
Requires-Dist: typer<1,>=0.12
|
|
45
|
+
Provides-Extra: all
|
|
46
|
+
Requires-Dist: copernicusmarine<3,>=2.4; extra == 'all'
|
|
47
|
+
Provides-Extra: cmems
|
|
48
|
+
Requires-Dist: copernicusmarine<3,>=2.4; extra == 'cmems'
|
|
49
|
+
Provides-Extra: dev
|
|
50
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
51
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
52
|
+
Requires-Dist: pytest-cov>=4; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest-timeout>=2.3; extra == 'dev'
|
|
54
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
55
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
56
|
+
Description-Content-Type: text/markdown
|
|
57
|
+
|
|
58
|
+
# copernicus-mcp
|
|
59
|
+
|
|
60
|
+
`copernicus-mcp` is a Model Context Protocol (MCP) server that gives LLM agents and CLI users safe, validated, cost-aware, reproducible access to Copernicus Earth observation data. It exposes discovery, estimation and subset-download workflows as MCP tools, returns large scientific data as file descriptors (filepath + metadata + provenance) rather than inline bytes, and produces a deterministic provenance record for every retrieval.
|
|
61
|
+
|
|
62
|
+
## Status
|
|
63
|
+
|
|
64
|
+
**Iteration 1 — Marine-first walking skeleton.** Iteration 1 ships the Copernicus Marine (CMEMS) backend through the official [`copernicusmarine`](https://help.marine.copernicus.eu/en/articles/7949409-copernicus-marine-toolbox-introduction) toolbox. Climate Data Store (CDS), Atmosphere Data Store (ADS), Early Warning Data Store (EWDS), Copernicus Data Space Ecosystem (CDSE), Sentinel Hub and WEkEO are planned for subsequent iterations and are explicitly out of scope for Iteration 1.
|
|
65
|
+
|
|
66
|
+
## Quick start
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# 1. Create and activate a virtual environment.
|
|
70
|
+
python -m venv .venv && source .venv/bin/activate
|
|
71
|
+
|
|
72
|
+
# 2. Install the package with the CMEMS backend.
|
|
73
|
+
pip install "copernicus-mcp[cmems]"
|
|
74
|
+
|
|
75
|
+
# 3. Configure CMEMS credentials (free account at
|
|
76
|
+
# https://data.marine.copernicus.eu/register).
|
|
77
|
+
# Recommended — the toolbox writes the credentials file once:
|
|
78
|
+
copernicusmarine login
|
|
79
|
+
# Alternative — environment variables in your shell profile:
|
|
80
|
+
# export COPERNICUSMARINE_SERVICE_USERNAME=your_user
|
|
81
|
+
# export COPERNICUSMARINE_SERVICE_PASSWORD=your_pass
|
|
82
|
+
|
|
83
|
+
# 4. Try a search from the terminal.
|
|
84
|
+
copernicus-mcp marine search-datasets --keyword temperature --limit 3
|
|
85
|
+
|
|
86
|
+
# 5. Run the MCP server (used by Claude Desktop / Claude Code / any
|
|
87
|
+
# MCP-compatible client over stdio). See "Claude Desktop integration"
|
|
88
|
+
# below.
|
|
89
|
+
copernicus-mcp serve
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Features
|
|
93
|
+
|
|
94
|
+
Iteration 1 implements the full MCP-core infrastructure so subsequent iterations add backends as small additive changes. Today the package provides:
|
|
95
|
+
|
|
96
|
+
- **Tools** (CMEMS): `marine_search_datasets`, `marine_describe_dataset`, `marine_estimate_subset`, `marine_subset_dataset`, plus a `copernicus_mcp_status` diagnostic.
|
|
97
|
+
- **Resources**: `copernicus://datasets/cmems/{id}`, `copernicus://files/{cache_key}`, `copernicus://provenance/{record_id}`.
|
|
98
|
+
- **CLI** (Typer + Rich): `copernicus-mcp {serve, version, status, marine ...}` with a global `--json` flag for scripting.
|
|
99
|
+
- **Confirmation flow**: large or approximate-estimate subsets gate on a structured confirmation prompt before any download.
|
|
100
|
+
- **Cache + provenance**: each retrieval produces a sidecar JSON record with file MD5, software versions, request envelope, and a deterministic cache key.
|
|
101
|
+
- **Sanitisation**: defence-in-depth redaction of credential-shaped strings on every outbound payload.
|
|
102
|
+
- **Structured errors**: eleven canonical error classes with recovery hints (e.g. `recovery_action="configure_credentials"`).
|
|
103
|
+
- **Cancellation discipline**: `asyncio.CancelledError` propagates without being wrapped, per project invariant.
|
|
104
|
+
|
|
105
|
+
## Why this exists
|
|
106
|
+
|
|
107
|
+
LLM agents can already call APIs, but for scientific data three properties matter and are easy to lose:
|
|
108
|
+
|
|
109
|
+
1. **Reproducibility** — the agent can hand a colleague the exact request and get the exact same file back tomorrow.
|
|
110
|
+
2. **Cost-awareness** — multi-gigabyte downloads should be confirmed, not silently triggered by a fuzzy prompt.
|
|
111
|
+
3. **Credential isolation** — credentials must never leak into tool output, logs, or provenance, regardless of the prompt or the upstream library's exception messages.
|
|
112
|
+
|
|
113
|
+
`copernicus-mcp` enforces all three at the protocol layer, so the agent does not need to.
|
|
114
|
+
|
|
115
|
+
## Tool reference, in brief
|
|
116
|
+
|
|
117
|
+
- **`marine_search_datasets`** (MCP tool) / `copernicus-mcp marine search-datasets` (CLI) — discover dataset ids by keyword, bbox, time range, or service type. Returns `{datasets, total_count}`.
|
|
118
|
+
- **`marine_describe_dataset`** / `marine describe DATASET_ID` — full metadata for a single dataset: variables, axes, services, terms.
|
|
119
|
+
- **`marine_estimate_subset`** / `marine estimate ...` — preview byte size and confirmation status for a subset request without downloading. Use this before large requests.
|
|
120
|
+
- **`marine_subset_dataset`** / `marine subset ...` — download a spatio-temporal subset. Returns `{filepath, uri, metadata, provenance}` — never inline bytes. Large requests gate on a structured confirmation prompt.
|
|
121
|
+
- **`copernicus_mcp_status`** / `status` — server diagnostics: backends, credential sources (without values), cache metrics, configuration snapshot.
|
|
122
|
+
|
|
123
|
+
For complete schemas, options and exit codes, run `copernicus-mcp marine subset --help` or read the inline tool descriptions surfaced by your MCP client (each tool's docstring is its protocol description).
|
|
124
|
+
|
|
125
|
+
## Claude Desktop integration
|
|
126
|
+
|
|
127
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or the equivalent on your platform:
|
|
128
|
+
|
|
129
|
+
```json
|
|
130
|
+
{
|
|
131
|
+
"mcpServers": {
|
|
132
|
+
"copernicus": {
|
|
133
|
+
"command": "copernicus-mcp",
|
|
134
|
+
"args": ["serve"]
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Restart Claude Desktop. The five tools listed above become available to the assistant. Tool results that wrap large data return a `filepath` plus metadata and provenance — never inline bytes.
|
|
141
|
+
|
|
142
|
+
### Credentials
|
|
143
|
+
|
|
144
|
+
`copernicus-mcp` resolves CMEMS credentials in this precedence:
|
|
145
|
+
|
|
146
|
+
1. **Toolbox credentials file** (recommended): `~/.copernicusmarine/.copernicusmarine-credentials`. Created by running `copernicusmarine login` once. The same file is used by the official CLI and by us — set it once, share it across tools.
|
|
147
|
+
2. **Environment variables** in your shell profile: `COPERNICUSMARINE_SERVICE_USERNAME` and `COPERNICUSMARINE_SERVICE_PASSWORD`. Convenient on CI or in a project-local `direnv` setup.
|
|
148
|
+
3. (Possible but **not recommended** for the desktop client) `env: {...}` block inside `claude_desktop_config.json`. The file lives in plain text and gets backed up by macOS / cloud sync, so credentials embedded there leave a wider trace than necessary.
|
|
149
|
+
|
|
150
|
+
Verify resolution: `copernicus-mcp status --json | jq '.backends.cmems'`. The output reports `credential_source` as `config_file`, `env`, or `missing` — the actual values are never printed.
|
|
151
|
+
|
|
152
|
+
## Configuration
|
|
153
|
+
|
|
154
|
+
The system is usable with no configuration file at all — every Pydantic field has a sensible default. Override via environment variables (`COPERNICUS_MCP_LOG_LEVEL`, `COPERNICUS_MCP_CACHE_DIR`, `COPERNICUS_MCP_STATE_DB`, plus `COPERNICUS_MCP_<SECTION>__<FIELD>` for nested fields), or with a YAML file at `~/.config/copernicus-mcp/config.yaml` or `~/.copernicus-mcp.yaml`.
|
|
155
|
+
|
|
156
|
+
State directories: `~/.cache/copernicus-mcp/` (downloaded files + `.provenance.json` sidecars), `~/.local/state/copernicus-mcp/state.db` (SQLite cache index, workflow rows, persisted provenance).
|
|
157
|
+
|
|
158
|
+
## Troubleshooting
|
|
159
|
+
|
|
160
|
+
- **`AuthError`** on tool call → run `copernicus-mcp status` and check `backends.cmems.configured`. If `false`, your env vars are not visible to the running process (common Claude Desktop pitfall — restart the client after editing config) or the credentials file is missing/unreadable.
|
|
161
|
+
- **`CoverageUnavailableError`** → bbox or time range is outside the dataset's actual extent. Use `marine_describe_dataset` to inspect coverage and narrow the request.
|
|
162
|
+
- **`ValidationError` with `recovery_action="modify_request_parameters"`** → request was structurally invalid (e.g. inverted bbox, antimeridian-crossing bbox, naive datetime). The `next_action_hint` field tells you exactly how to fix it.
|
|
163
|
+
- **Subset hangs** → set `COPERNICUS_MCP_LOG_LEVEL=DEBUG` and watch for retry messages. Reduce bbox or time range if request is genuinely large.
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
BSD 3-Clause. See [`LICENSE`](LICENSE). Dependencies are EUPL-1.2 (`copernicusmarine`), Apache-2.0, MIT or BSD. Iteration 1 does not depend on `sentinelhub-py`; when the Sentinel Hub backend lands in a later iteration, this section will document the relevant CC BY-NC restriction on its SDK.
|
|
168
|
+
|
|
169
|
+
## Acknowledgements
|
|
170
|
+
|
|
171
|
+
- [Mercator Ocean International](https://www.mercator-ocean.eu/) for the [`copernicusmarine`](https://github.com/mercator-ocean/copernicus-marine-toolbox) Python toolbox.
|
|
172
|
+
- The [Copernicus Marine Service](https://marine.copernicus.eu/) and the European Commission's Copernicus programme for the underlying data.
|
|
173
|
+
- The Anthropic team for the [Model Context Protocol](https://modelcontextprotocol.io/) specification and Python SDK.
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# copernicus-mcp
|
|
2
|
+
|
|
3
|
+
`copernicus-mcp` is a Model Context Protocol (MCP) server that gives LLM agents and CLI users safe, validated, cost-aware, reproducible access to Copernicus Earth observation data. It exposes discovery, estimation and subset-download workflows as MCP tools, returns large scientific data as file descriptors (filepath + metadata + provenance) rather than inline bytes, and produces a deterministic provenance record for every retrieval.
|
|
4
|
+
|
|
5
|
+
## Status
|
|
6
|
+
|
|
7
|
+
**Iteration 1 — Marine-first walking skeleton.** Iteration 1 ships the Copernicus Marine (CMEMS) backend through the official [`copernicusmarine`](https://help.marine.copernicus.eu/en/articles/7949409-copernicus-marine-toolbox-introduction) toolbox. Climate Data Store (CDS), Atmosphere Data Store (ADS), Early Warning Data Store (EWDS), Copernicus Data Space Ecosystem (CDSE), Sentinel Hub and WEkEO are planned for subsequent iterations and are explicitly out of scope for Iteration 1.
|
|
8
|
+
|
|
9
|
+
## Quick start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# 1. Create and activate a virtual environment.
|
|
13
|
+
python -m venv .venv && source .venv/bin/activate
|
|
14
|
+
|
|
15
|
+
# 2. Install the package with the CMEMS backend.
|
|
16
|
+
pip install "copernicus-mcp[cmems]"
|
|
17
|
+
|
|
18
|
+
# 3. Configure CMEMS credentials (free account at
|
|
19
|
+
# https://data.marine.copernicus.eu/register).
|
|
20
|
+
# Recommended — the toolbox writes the credentials file once:
|
|
21
|
+
copernicusmarine login
|
|
22
|
+
# Alternative — environment variables in your shell profile:
|
|
23
|
+
# export COPERNICUSMARINE_SERVICE_USERNAME=your_user
|
|
24
|
+
# export COPERNICUSMARINE_SERVICE_PASSWORD=your_pass
|
|
25
|
+
|
|
26
|
+
# 4. Try a search from the terminal.
|
|
27
|
+
copernicus-mcp marine search-datasets --keyword temperature --limit 3
|
|
28
|
+
|
|
29
|
+
# 5. Run the MCP server (used by Claude Desktop / Claude Code / any
|
|
30
|
+
# MCP-compatible client over stdio). See "Claude Desktop integration"
|
|
31
|
+
# below.
|
|
32
|
+
copernicus-mcp serve
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Features
|
|
36
|
+
|
|
37
|
+
Iteration 1 implements the full MCP-core infrastructure so subsequent iterations add backends as small additive changes. Today the package provides:
|
|
38
|
+
|
|
39
|
+
- **Tools** (CMEMS): `marine_search_datasets`, `marine_describe_dataset`, `marine_estimate_subset`, `marine_subset_dataset`, plus a `copernicus_mcp_status` diagnostic.
|
|
40
|
+
- **Resources**: `copernicus://datasets/cmems/{id}`, `copernicus://files/{cache_key}`, `copernicus://provenance/{record_id}`.
|
|
41
|
+
- **CLI** (Typer + Rich): `copernicus-mcp {serve, version, status, marine ...}` with a global `--json` flag for scripting.
|
|
42
|
+
- **Confirmation flow**: large or approximate-estimate subsets gate on a structured confirmation prompt before any download.
|
|
43
|
+
- **Cache + provenance**: each retrieval produces a sidecar JSON record with file MD5, software versions, request envelope, and a deterministic cache key.
|
|
44
|
+
- **Sanitisation**: defence-in-depth redaction of credential-shaped strings on every outbound payload.
|
|
45
|
+
- **Structured errors**: eleven canonical error classes with recovery hints (e.g. `recovery_action="configure_credentials"`).
|
|
46
|
+
- **Cancellation discipline**: `asyncio.CancelledError` propagates without being wrapped, per project invariant.
|
|
47
|
+
|
|
48
|
+
## Why this exists
|
|
49
|
+
|
|
50
|
+
LLM agents can already call APIs, but for scientific data three properties matter and are easy to lose:
|
|
51
|
+
|
|
52
|
+
1. **Reproducibility** — the agent can hand a colleague the exact request and get the exact same file back tomorrow.
|
|
53
|
+
2. **Cost-awareness** — multi-gigabyte downloads should be confirmed, not silently triggered by a fuzzy prompt.
|
|
54
|
+
3. **Credential isolation** — credentials must never leak into tool output, logs, or provenance, regardless of the prompt or the upstream library's exception messages.
|
|
55
|
+
|
|
56
|
+
`copernicus-mcp` enforces all three at the protocol layer, so the agent does not need to.
|
|
57
|
+
|
|
58
|
+
## Tool reference, in brief
|
|
59
|
+
|
|
60
|
+
- **`marine_search_datasets`** (MCP tool) / `copernicus-mcp marine search-datasets` (CLI) — discover dataset ids by keyword, bbox, time range, or service type. Returns `{datasets, total_count}`.
|
|
61
|
+
- **`marine_describe_dataset`** / `marine describe DATASET_ID` — full metadata for a single dataset: variables, axes, services, terms.
|
|
62
|
+
- **`marine_estimate_subset`** / `marine estimate ...` — preview byte size and confirmation status for a subset request without downloading. Use this before large requests.
|
|
63
|
+
- **`marine_subset_dataset`** / `marine subset ...` — download a spatio-temporal subset. Returns `{filepath, uri, metadata, provenance}` — never inline bytes. Large requests gate on a structured confirmation prompt.
|
|
64
|
+
- **`copernicus_mcp_status`** / `status` — server diagnostics: backends, credential sources (without values), cache metrics, configuration snapshot.
|
|
65
|
+
|
|
66
|
+
For complete schemas, options and exit codes, run `copernicus-mcp marine subset --help` or read the inline tool descriptions surfaced by your MCP client (each tool's docstring is its protocol description).
|
|
67
|
+
|
|
68
|
+
## Claude Desktop integration
|
|
69
|
+
|
|
70
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or the equivalent on your platform:
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"mcpServers": {
|
|
75
|
+
"copernicus": {
|
|
76
|
+
"command": "copernicus-mcp",
|
|
77
|
+
"args": ["serve"]
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Restart Claude Desktop. The five tools listed above become available to the assistant. Tool results that wrap large data return a `filepath` plus metadata and provenance — never inline bytes.
|
|
84
|
+
|
|
85
|
+
### Credentials
|
|
86
|
+
|
|
87
|
+
`copernicus-mcp` resolves CMEMS credentials in this precedence:
|
|
88
|
+
|
|
89
|
+
1. **Toolbox credentials file** (recommended): `~/.copernicusmarine/.copernicusmarine-credentials`. Created by running `copernicusmarine login` once. The same file is used by the official CLI and by us — set it once, share it across tools.
|
|
90
|
+
2. **Environment variables** in your shell profile: `COPERNICUSMARINE_SERVICE_USERNAME` and `COPERNICUSMARINE_SERVICE_PASSWORD`. Convenient on CI or in a project-local `direnv` setup.
|
|
91
|
+
3. (Possible but **not recommended** for the desktop client) `env: {...}` block inside `claude_desktop_config.json`. The file lives in plain text and gets backed up by macOS / cloud sync, so credentials embedded there leave a wider trace than necessary.
|
|
92
|
+
|
|
93
|
+
Verify resolution: `copernicus-mcp status --json | jq '.backends.cmems'`. The output reports `credential_source` as `config_file`, `env`, or `missing` — the actual values are never printed.
|
|
94
|
+
|
|
95
|
+
## Configuration
|
|
96
|
+
|
|
97
|
+
The system is usable with no configuration file at all — every Pydantic field has a sensible default. Override via environment variables (`COPERNICUS_MCP_LOG_LEVEL`, `COPERNICUS_MCP_CACHE_DIR`, `COPERNICUS_MCP_STATE_DB`, plus `COPERNICUS_MCP_<SECTION>__<FIELD>` for nested fields), or with a YAML file at `~/.config/copernicus-mcp/config.yaml` or `~/.copernicus-mcp.yaml`.
|
|
98
|
+
|
|
99
|
+
State directories: `~/.cache/copernicus-mcp/` (downloaded files + `.provenance.json` sidecars), `~/.local/state/copernicus-mcp/state.db` (SQLite cache index, workflow rows, persisted provenance).
|
|
100
|
+
|
|
101
|
+
## Troubleshooting
|
|
102
|
+
|
|
103
|
+
- **`AuthError`** on tool call → run `copernicus-mcp status` and check `backends.cmems.configured`. If `false`, your env vars are not visible to the running process (common Claude Desktop pitfall — restart the client after editing config) or the credentials file is missing/unreadable.
|
|
104
|
+
- **`CoverageUnavailableError`** → bbox or time range is outside the dataset's actual extent. Use `marine_describe_dataset` to inspect coverage and narrow the request.
|
|
105
|
+
- **`ValidationError` with `recovery_action="modify_request_parameters"`** → request was structurally invalid (e.g. inverted bbox, antimeridian-crossing bbox, naive datetime). The `next_action_hint` field tells you exactly how to fix it.
|
|
106
|
+
- **Subset hangs** → set `COPERNICUS_MCP_LOG_LEVEL=DEBUG` and watch for retry messages. Reduce bbox or time range if request is genuinely large.
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
BSD 3-Clause. See [`LICENSE`](LICENSE). Dependencies are EUPL-1.2 (`copernicusmarine`), Apache-2.0, MIT or BSD. Iteration 1 does not depend on `sentinelhub-py`; when the Sentinel Hub backend lands in a later iteration, this section will document the relevant CC BY-NC restriction on its SDK.
|
|
111
|
+
|
|
112
|
+
## Acknowledgements
|
|
113
|
+
|
|
114
|
+
- [Mercator Ocean International](https://www.mercator-ocean.eu/) for the [`copernicusmarine`](https://github.com/mercator-ocean/copernicus-marine-toolbox) Python toolbox.
|
|
115
|
+
- The [Copernicus Marine Service](https://marine.copernicus.eu/) and the European Commission's Copernicus programme for the underlying data.
|
|
116
|
+
- The Anthropic team for the [Model Context Protocol](https://modelcontextprotocol.io/) specification and Python SDK.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "copernicus-mcp"
|
|
7
|
+
description = "MCP server for safe, validated, cost-aware access to Copernicus Earth observation data."
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Ivan Kuznetsov" },
|
|
13
|
+
{ name = "CliDyn" },
|
|
14
|
+
]
|
|
15
|
+
dynamic = ["version"]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"mcp>=1.27.0,<1.28",
|
|
18
|
+
"httpx>=0.27,<1",
|
|
19
|
+
"pydantic>=2.0,<3",
|
|
20
|
+
"pydantic-settings>=2.0,<3",
|
|
21
|
+
"pyyaml>=6,<7",
|
|
22
|
+
"typer>=0.12,<1",
|
|
23
|
+
"rich>=13,<14",
|
|
24
|
+
"aiosqlite>=0.20,<1",
|
|
25
|
+
"python-dateutil>=2.8,<3",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = [
|
|
30
|
+
"pytest>=8",
|
|
31
|
+
"pytest-asyncio>=0.23",
|
|
32
|
+
"pytest-cov>=4",
|
|
33
|
+
"pytest-timeout>=2.3",
|
|
34
|
+
"ruff>=0.5",
|
|
35
|
+
"mypy>=1.10",
|
|
36
|
+
]
|
|
37
|
+
cmems = ["copernicusmarine>=2.4,<3"]
|
|
38
|
+
all = ["copernicus-mcp[cmems]"]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
copernicus-mcp = "copernicus_mcp.cli:app"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.version]
|
|
44
|
+
path = "src/copernicus_mcp/version.py"
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build.targets.wheel]
|
|
47
|
+
packages = ["src/copernicus_mcp"]
|
|
48
|
+
|
|
49
|
+
[tool.hatch.build.targets.sdist]
|
|
50
|
+
# Ship ONLY the source tree, packaging metadata, README and LICENSE.
|
|
51
|
+
# Internal docs, research notes, plans, spikes, IDE config and
|
|
52
|
+
# example dotenv files do not belong on PyPI.
|
|
53
|
+
include = [
|
|
54
|
+
"/src/copernicus_mcp/**",
|
|
55
|
+
"/pyproject.toml",
|
|
56
|
+
"/README.md",
|
|
57
|
+
"/LICENSE",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.pytest.ini_options]
|
|
61
|
+
testpaths = ["tests"]
|
|
62
|
+
asyncio_mode = "auto"
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
src = ["src", "tests"]
|
|
67
|
+
|
|
68
|
+
[tool.ruff.lint]
|
|
69
|
+
select = ["E", "F", "I", "B", "UP", "W"]
|
|
70
|
+
ignore = ["E501"]
|
|
71
|
+
|
|
72
|
+
[tool.mypy]
|
|
73
|
+
python_version = "3.11"
|
|
74
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Allow ``python -m copernicus_mcp ...`` to invoke the Typer CLI.
|
|
2
|
+
|
|
3
|
+
The installed console script ``copernicus-mcp`` (defined in
|
|
4
|
+
``pyproject.toml``) is the primary entrypoint; this module exists so
|
|
5
|
+
tests and tooling that use ``-m`` (without relying on PATH) reach the
|
|
6
|
+
same app.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from copernicus_mcp.cli import app
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
app()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from copernicus_mcp.auth.adapter import AuthAdapter
|
|
2
|
+
from copernicus_mcp.auth.cmems import CmemsBasicAuthAdapter
|
|
3
|
+
from copernicus_mcp.auth.resolver import (
|
|
4
|
+
CredentialResolver,
|
|
5
|
+
CredentialSource,
|
|
6
|
+
ResolvedCredentials,
|
|
7
|
+
SecretManagerProvider,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"AuthAdapter",
|
|
12
|
+
"CmemsBasicAuthAdapter",
|
|
13
|
+
"CredentialResolver",
|
|
14
|
+
"CredentialSource",
|
|
15
|
+
"ResolvedCredentials",
|
|
16
|
+
"SecretManagerProvider",
|
|
17
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AuthAdapter(Protocol):
|
|
10
|
+
"""Backend-specific authorization handler.
|
|
11
|
+
|
|
12
|
+
Concrete adapters live next to their backend (e.g. ``CmemsBasicAuthAdapter``).
|
|
13
|
+
Implementations must not log credential values; ``credentials_summary``
|
|
14
|
+
is the only public introspection surface and returns ``<set>`` / ``<unset>``
|
|
15
|
+
placeholders only.
|
|
16
|
+
|
|
17
|
+
See ``research/08_auth_matrix.md`` §9.6 for the canonical protocol shape.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
backend_id: str
|
|
21
|
+
supports_refresh: bool
|
|
22
|
+
|
|
23
|
+
async def apply_credentials(
|
|
24
|
+
self, request: httpx.Request
|
|
25
|
+
) -> httpx.Request: ...
|
|
26
|
+
|
|
27
|
+
async def handle_unauthorized(self, response: httpx.Response) -> bool:
|
|
28
|
+
"""Return ``True`` if a retry should be attempted after re-auth."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
async def close(self) -> None: ...
|
|
32
|
+
|
|
33
|
+
def credentials_summary(self) -> Mapping[str, str]: ...
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""CMEMS basic-auth adapter.
|
|
2
|
+
|
|
3
|
+
This module is the **only** in-process exit point for raw CMEMS credential
|
|
4
|
+
values, via ``CmemsBasicAuthAdapter.get_username_password()``. The CMEMS
|
|
5
|
+
backend layer calls that method during initialization to hand the values to
|
|
6
|
+
the ``copernicusmarine`` toolbox; nothing else may.
|
|
7
|
+
|
|
8
|
+
CLAUDE.md invariant #2: credentials live in ``CredentialResolver`` and
|
|
9
|
+
``AuthAdapter`` instances only — never tool input/output, logs, cache keys,
|
|
10
|
+
provenance records, or error records.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from collections.abc import Mapping
|
|
16
|
+
from types import MappingProxyType
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
from copernicus_mcp.auth.resolver import ResolvedCredentials
|
|
21
|
+
|
|
22
|
+
_REDACTED_SUMMARY: Mapping[str, str] = MappingProxyType(
|
|
23
|
+
{"username": "<set>", "password": "<set>"}
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CmemsBasicAuthAdapter:
|
|
28
|
+
"""Static-credential adapter for CMEMS.
|
|
29
|
+
|
|
30
|
+
The ``copernicusmarine`` toolbox performs HTTP-level auth itself, so
|
|
31
|
+
``apply_credentials`` is a no-op and our ``httpx`` client never sees
|
|
32
|
+
these credentials. The backend layer pulls username/password via
|
|
33
|
+
``get_username_password`` during construction and passes them to the
|
|
34
|
+
toolbox.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
backend_id: str = "cmems"
|
|
38
|
+
supports_refresh: bool = False
|
|
39
|
+
|
|
40
|
+
def __init__(self, credentials: ResolvedCredentials) -> None:
|
|
41
|
+
if credentials.backend != "cmems":
|
|
42
|
+
# Do not echo the rejected backend value — defense in depth in
|
|
43
|
+
# case it is ever something secret-shaped.
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"CmemsBasicAuthAdapter requires credentials for backend 'cmems'."
|
|
46
|
+
)
|
|
47
|
+
if credentials.source == "missing":
|
|
48
|
+
raise ValueError("CmemsBasicAuthAdapter requires resolved credentials.")
|
|
49
|
+
username = credentials.fields.get("username", "")
|
|
50
|
+
password = credentials.fields.get("password", "")
|
|
51
|
+
if not username or not password:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"CmemsBasicAuthAdapter requires non-empty 'username' and "
|
|
54
|
+
"'password' credential fields."
|
|
55
|
+
)
|
|
56
|
+
self._username = username
|
|
57
|
+
self._password = password
|
|
58
|
+
self._closed = False
|
|
59
|
+
|
|
60
|
+
async def apply_credentials(self, request: httpx.Request) -> httpx.Request:
|
|
61
|
+
# CMEMS toolbox handles HTTP-level auth itself; nothing to apply here.
|
|
62
|
+
return request
|
|
63
|
+
|
|
64
|
+
async def handle_unauthorized(self, response: httpx.Response) -> bool:
|
|
65
|
+
# Static basic auth has no refreshable state.
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
async def close(self) -> None:
|
|
69
|
+
# Idempotent — nothing to release.
|
|
70
|
+
self._closed = True
|
|
71
|
+
|
|
72
|
+
def credentials_summary(self) -> Mapping[str, str]:
|
|
73
|
+
return _REDACTED_SUMMARY
|
|
74
|
+
|
|
75
|
+
def get_username_password(self) -> tuple[str, str]:
|
|
76
|
+
"""Return raw ``(username, password)`` for the in-process backend layer.
|
|
77
|
+
|
|
78
|
+
SENSITIVE: this is the **only** authorised exit point for raw CMEMS
|
|
79
|
+
credentials. Must never be exposed through MCP tool results, logs,
|
|
80
|
+
cache keys, provenance, or error messages.
|
|
81
|
+
"""
|
|
82
|
+
return self._username, self._password
|
|
83
|
+
|
|
84
|
+
def __repr__(self) -> str:
|
|
85
|
+
return (
|
|
86
|
+
f"CmemsBasicAuthAdapter(backend_id={self.backend_id!r}, "
|
|
87
|
+
f"supports_refresh={self.supports_refresh!r})"
|
|
88
|
+
)
|