mcp-data-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_data_core-0.1.0/.github/workflows/publish.yml +120 -0
- mcp_data_core-0.1.0/.gitignore +43 -0
- mcp_data_core-0.1.0/LICENSE +21 -0
- mcp_data_core-0.1.0/PKG-INFO +180 -0
- mcp_data_core-0.1.0/README.md +144 -0
- mcp_data_core-0.1.0/pyproject.toml +89 -0
- mcp_data_core-0.1.0/src/mcp_data_core/__init__.py +78 -0
- mcp_data_core-0.1.0/src/mcp_data_core/base_client.py +348 -0
- mcp_data_core-0.1.0/src/mcp_data_core/cache.py +367 -0
- mcp_data_core-0.1.0/src/mcp_data_core/corpus_compression.py +160 -0
- mcp_data_core-0.1.0/src/mcp_data_core/corpus_db.py +371 -0
- mcp_data_core-0.1.0/src/mcp_data_core/envelope.py +211 -0
- mcp_data_core-0.1.0/src/mcp_data_core/exceptions.py +110 -0
- mcp_data_core-0.1.0/src/mcp_data_core/filenames.py +233 -0
- mcp_data_core-0.1.0/src/mcp_data_core/logging.py +94 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/__init__.py +74 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/_env.py +24 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/annotations.py +18 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/auth.py +238 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/conditional.py +169 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/downloads.py +972 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/middleware.py +201 -0
- mcp_data_core-0.1.0/src/mcp_data_core/mcp/server_factory.py +113 -0
- mcp_data_core-0.1.0/src/mcp_data_core/oauth2.py +171 -0
- mcp_data_core-0.1.0/src/mcp_data_core/py.typed +0 -0
- mcp_data_core-0.1.0/src/mcp_data_core/resilience.py +99 -0
- mcp_data_core-0.1.0/tests/__init__.py +0 -0
- mcp_data_core-0.1.0/tests/test_auth.py +350 -0
- mcp_data_core-0.1.0/tests/test_conditional.py +185 -0
- mcp_data_core-0.1.0/tests/test_corpus_db.py +272 -0
- mcp_data_core-0.1.0/tests/test_envelope.py +175 -0
- mcp_data_core-0.1.0/tests/test_filenames.py +149 -0
- mcp_data_core-0.1.0/tests/test_logging.py +39 -0
- mcp_data_core-0.1.0/tests/test_mcp_downloads.py +679 -0
- mcp_data_core-0.1.0/tests/test_middleware.py +96 -0
- mcp_data_core-0.1.0/tests/test_oauth2.py +293 -0
- mcp_data_core-0.1.0/tests/test_server_factory.py +42 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Tag-triggered release pipeline. Tagging `vX.Y.Z` on main builds the wheel
|
|
4
|
+
# + sdist, verifies the tag matches the pyproject version, runs the test
|
|
5
|
+
# suite one more time, and publishes to PyPI via OIDC trusted publishing
|
|
6
|
+
# (no long-lived API token stored anywhere).
|
|
7
|
+
#
|
|
8
|
+
# To ship a new release:
|
|
9
|
+
# 1. Bump `version` in pyproject.toml on main.
|
|
10
|
+
# 2. `git tag v0.2.0 && git push origin v0.2.0` (version must match).
|
|
11
|
+
# 3. Watch this workflow run.
|
|
12
|
+
|
|
13
|
+
on:
|
|
14
|
+
push:
|
|
15
|
+
tags:
|
|
16
|
+
- 'v*'
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
verify:
|
|
23
|
+
name: Verify tag matches pyproject version
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
outputs:
|
|
26
|
+
version: ${{ steps.version.outputs.version }}
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@v4
|
|
29
|
+
|
|
30
|
+
- name: Extract versions and compare
|
|
31
|
+
id: version
|
|
32
|
+
run: |
|
|
33
|
+
TAG="${GITHUB_REF_NAME#v}"
|
|
34
|
+
PY_VERSION=$(grep -E '^version\s*=' pyproject.toml | sed -E 's/.*"([^"]+)".*/\1/' | head -n1)
|
|
35
|
+
echo "Tag version: $TAG"
|
|
36
|
+
echo "pyproject version: $PY_VERSION"
|
|
37
|
+
if [ "$TAG" != "$PY_VERSION" ]; then
|
|
38
|
+
echo "::error::Tag $GITHUB_REF_NAME (parsed version $TAG) does not match pyproject version $PY_VERSION. Bump pyproject first, commit, then retag."
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
echo "version=$PY_VERSION" >> "$GITHUB_OUTPUT"
|
|
42
|
+
|
|
43
|
+
test:
|
|
44
|
+
name: Test on Python ${{ matrix.python-version }}
|
|
45
|
+
needs: verify
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
strategy:
|
|
48
|
+
matrix:
|
|
49
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
50
|
+
steps:
|
|
51
|
+
- uses: actions/checkout@v4
|
|
52
|
+
|
|
53
|
+
- name: Install uv
|
|
54
|
+
uses: astral-sh/setup-uv@v4
|
|
55
|
+
with:
|
|
56
|
+
version: "latest"
|
|
57
|
+
|
|
58
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
59
|
+
run: uv python install ${{ matrix.python-version }}
|
|
60
|
+
|
|
61
|
+
- name: Install dependencies (library + MCP extra + dev group)
|
|
62
|
+
run: uv sync --all-extras --group dev
|
|
63
|
+
|
|
64
|
+
- name: Run test suite
|
|
65
|
+
run: uv run pytest -q
|
|
66
|
+
|
|
67
|
+
build:
|
|
68
|
+
name: Build wheel + sdist
|
|
69
|
+
needs: test
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v4
|
|
73
|
+
|
|
74
|
+
- name: Install uv
|
|
75
|
+
uses: astral-sh/setup-uv@v4
|
|
76
|
+
with:
|
|
77
|
+
version: "latest"
|
|
78
|
+
|
|
79
|
+
- name: Set up Python
|
|
80
|
+
run: uv python install 3.11
|
|
81
|
+
|
|
82
|
+
- name: Build distributions
|
|
83
|
+
run: uv build
|
|
84
|
+
|
|
85
|
+
- name: List artifacts
|
|
86
|
+
run: ls -la dist/
|
|
87
|
+
|
|
88
|
+
- name: Upload artifacts
|
|
89
|
+
uses: actions/upload-artifact@v4
|
|
90
|
+
with:
|
|
91
|
+
name: dist
|
|
92
|
+
path: dist/
|
|
93
|
+
retention-days: 7
|
|
94
|
+
|
|
95
|
+
publish:
|
|
96
|
+
name: Publish ${{ needs.verify.outputs.version }} to PyPI
|
|
97
|
+
needs: [verify, build]
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
# The `pypi` environment matches the one configured on PyPI's trusted-
|
|
100
|
+
# publisher page. GitHub enforces any protection rules on it (approval,
|
|
101
|
+
# required reviewers, etc.) before this job runs.
|
|
102
|
+
environment:
|
|
103
|
+
name: pypi
|
|
104
|
+
url: https://pypi.org/project/mcp-data-core/${{ needs.verify.outputs.version }}/
|
|
105
|
+
permissions:
|
|
106
|
+
# Required for OIDC trusted publishing. No PyPI token needed.
|
|
107
|
+
id-token: write
|
|
108
|
+
steps:
|
|
109
|
+
- name: Download built distributions
|
|
110
|
+
uses: actions/download-artifact@v4
|
|
111
|
+
with:
|
|
112
|
+
name: dist
|
|
113
|
+
path: dist/
|
|
114
|
+
|
|
115
|
+
- name: Publish to PyPI
|
|
116
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
117
|
+
with:
|
|
118
|
+
# Skip re-publishing if the version already exists (makes the
|
|
119
|
+
# workflow idempotent for retries).
|
|
120
|
+
skip-existing: true
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Testing
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
htmlcov/
|
|
17
|
+
.tox/
|
|
18
|
+
.nox/
|
|
19
|
+
coverage.xml
|
|
20
|
+
|
|
21
|
+
# Type checkers
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
.pyright/
|
|
24
|
+
.ty/
|
|
25
|
+
|
|
26
|
+
# uv / venvs
|
|
27
|
+
.venv/
|
|
28
|
+
venv/
|
|
29
|
+
env/
|
|
30
|
+
.python-version
|
|
31
|
+
|
|
32
|
+
# Editors
|
|
33
|
+
.vscode/
|
|
34
|
+
.idea/
|
|
35
|
+
*.swp
|
|
36
|
+
*.swo
|
|
37
|
+
.DS_Store
|
|
38
|
+
|
|
39
|
+
# Caches that may be produced by the package itself during local runs
|
|
40
|
+
.cache/
|
|
41
|
+
|
|
42
|
+
# Hatch
|
|
43
|
+
.hatch/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Parker Hancock
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-data-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared async HTTP scaffolding, response envelopes, corpus storage, and MCP server plumbing for data-fetching research toolkits.
|
|
5
|
+
Project-URL: Homepage, https://github.com/parkerhancock/mcp-data-core
|
|
6
|
+
Project-URL: Repository, https://github.com/parkerhancock/mcp-data-core
|
|
7
|
+
Author-email: Parker Hancock <633163+parkerhancock@users.noreply.github.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: async,cache,httpx,mcp,research,retry,scaffolding
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Requires-Dist: anyio>=4.4
|
|
21
|
+
Requires-Dist: h2>=4.1
|
|
22
|
+
Requires-Dist: hishel[async]>=1.1.3
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: python-dateutil>=2.9
|
|
26
|
+
Requires-Dist: pyyaml>=6.0
|
|
27
|
+
Requires-Dist: tenacity>=8.4
|
|
28
|
+
Requires-Dist: zstandard>=0.22
|
|
29
|
+
Provides-Extra: gcs
|
|
30
|
+
Requires-Dist: google-cloud-storage>=2.18; extra == 'gcs'
|
|
31
|
+
Provides-Extra: mcp
|
|
32
|
+
Requires-Dist: fastmcp>=3.2.3; extra == 'mcp'
|
|
33
|
+
Requires-Dist: griffe<2; extra == 'mcp'
|
|
34
|
+
Requires-Dist: starlette>=0.37; extra == 'mcp'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# mcp-data-core
|
|
38
|
+
|
|
39
|
+
**Batteries-included async HTTP scaffolding and MCP server plumbing for Python data-fetching libraries.**
|
|
40
|
+
|
|
41
|
+
If you're writing a Python library that pulls structured data from an API — patents, court filings, FDA records, financial filings, anything — you end up rebuilding the same eight things: an `httpx` client with retry, an HTTP cache, a tenacity policy, an OAuth helper, response envelopes, a typed exception hierarchy, per-app logging, and (if you ship an MCP server) tool registration, auth, and signed downloads. `mcp-data-core` is those eight things, packaged.
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv add mcp-data-core # core scaffolding
|
|
47
|
+
uv add "mcp-data-core[mcp]" # + FastMCP server helpers
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from mcp_data_core import BaseAsyncClient
|
|
52
|
+
|
|
53
|
+
class MyApiClient(BaseAsyncClient):
|
|
54
|
+
DEFAULT_BASE_URL = "https://api.example.com"
|
|
55
|
+
CACHE_NAME = "my_api"
|
|
56
|
+
|
|
57
|
+
async def get_thing(self, id: str) -> dict:
|
|
58
|
+
return await self._request_json("GET", f"/things/{id}")
|
|
59
|
+
|
|
60
|
+
async with MyApiClient() as client:
|
|
61
|
+
result = await client.get_thing("42")
|
|
62
|
+
stats = await client.cache_stats()
|
|
63
|
+
print(f"Cache hit rate: {stats.hit_rate:.1f}%")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
That's the full surface. Retry, caching, error mapping, and connection pooling are already wired up.
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
| Feature | What you get |
|
|
71
|
+
|---|---|
|
|
72
|
+
| **`BaseAsyncClient`** | `httpx.AsyncClient` subclass with retry, caching, error mapping, and cache-management methods. Override `DEFAULT_BASE_URL` + `CACHE_NAME`; the rest is inherited. |
|
|
73
|
+
| **HTTP caching** | `hishel`-backed cache with a custom SQLite/WAL storage layer. Respects HTTP cache headers by default, with TTL override. Inspection (`cache_stats`), eviction (`cache_clear_expired`), and pattern-based invalidation (`cache_invalidate`) built in. |
|
|
74
|
+
| **Retry policy** | `tenacity`-based exponential-jitter retry (4 attempts default). Retryable status set covers 408, 429, 500-504. Honors `Retry-After` headers. |
|
|
75
|
+
| **OAuth2 client credentials** | `OAuth2ClientCredentialsAuth` — drop-in `httpx.Auth` that handles token refresh, retries on 401, and works behind the cache layer. |
|
|
76
|
+
| **Response envelopes** | `ResponseEnvelope`, `ListEnvelope`, `Provenance`. Cursor-based pagination helpers (`encode_cursor` / `decode_cursor`). Every response carries source provenance so downstream consumers can cite. |
|
|
77
|
+
| **Typed exceptions** | `McpDataCoreError` base + `ApiError`, `RateLimitError`, `NotFoundError`, `AuthenticationError`, `ServerError`, `ConfigurationError`, `ValidationError`, `ParseError`. Log-first error formatting: `str(err)` appends the log path so agents can inspect without keeping stacktraces in context. |
|
|
78
|
+
| **Per-app file logging** | `logging.configure("my_app")` attaches a file handler under the `my_app` logger tree, writing to `~/.cache/my_app/my_app.log`. Idempotent; each consumer library logs to its own file. |
|
|
79
|
+
| **Bundled corpora** | `corpus_db` (SQLite/FTS5 reader) and `corpus_compression` (zstd) for libraries that ship statutes, manuals, or other reference text alongside their API client. |
|
|
80
|
+
| **MCP server scaffolding** *(opt-in)* | FastMCP server factory, bearer-token auth, domain gating middleware, conditional tool registration, signed HMAC download URLs with on-disk cache, and OAuth 2.1 + PKCE + DCR helpers. |
|
|
81
|
+
|
|
82
|
+
## Real-world usage
|
|
83
|
+
|
|
84
|
+
A trimmed-down version of how [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) wires up a USPTO connector:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import os
|
|
88
|
+
from mcp_data_core import (
|
|
89
|
+
BaseAsyncClient,
|
|
90
|
+
ConfigurationError,
|
|
91
|
+
ListEnvelope,
|
|
92
|
+
make_provenance,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
BASE_URL = "https://api.uspto.gov"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class UsptoOdpClient(BaseAsyncClient):
|
|
99
|
+
DEFAULT_BASE_URL = BASE_URL
|
|
100
|
+
CACHE_NAME = "uspto_odp"
|
|
101
|
+
|
|
102
|
+
def __init__(self, *, api_key: str | None = None, **kwargs) -> None:
|
|
103
|
+
api_key = api_key or os.environ.get("USPTO_ODP_API_KEY")
|
|
104
|
+
if not api_key:
|
|
105
|
+
raise ConfigurationError("USPTO_ODP_API_KEY required")
|
|
106
|
+
super().__init__(headers={"X-API-KEY": api_key}, **kwargs)
|
|
107
|
+
|
|
108
|
+
async def search_applications(
|
|
109
|
+
self, query: str, *, limit: int = 25
|
|
110
|
+
) -> ListEnvelope[dict]:
|
|
111
|
+
payload = await self._request_json(
|
|
112
|
+
"POST",
|
|
113
|
+
"/api/v1/patent/applications/search",
|
|
114
|
+
json={"q": query, "pagination": {"limit": limit}},
|
|
115
|
+
)
|
|
116
|
+
return ListEnvelope(
|
|
117
|
+
summary=f"{payload['count']} applications matching {query!r}",
|
|
118
|
+
items=payload["patentFileWrapperDataBag"],
|
|
119
|
+
provenance=make_provenance(
|
|
120
|
+
source_url=f"{BASE_URL}/api/v1/patent/applications/search",
|
|
121
|
+
source_name="USPTO Open Data Portal",
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
No retry loop. No cache invalidation. No exception remapping. No connection lifecycle. The library author writes the API-shaped methods; `mcp-data-core` handles everything else.
|
|
127
|
+
|
|
128
|
+
## What's inside
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
mcp_data_core/
|
|
132
|
+
├── base_client.py # BaseAsyncClient
|
|
133
|
+
├── cache.py # CacheManager, build_cached_http_client, SQLite/WAL storage
|
|
134
|
+
├── resilience.py # default_retryer, with_retry, RETRYABLE_STATUS_CODES
|
|
135
|
+
├── oauth2.py # OAuth2ClientCredentialsAuth
|
|
136
|
+
├── envelope.py # ResponseEnvelope, ListEnvelope, Provenance, cursor helpers
|
|
137
|
+
├── exceptions.py # McpDataCoreError + 8 subclasses
|
|
138
|
+
├── logging.py # configure() — per-app file logging
|
|
139
|
+
├── filenames.py # Download filename conventions
|
|
140
|
+
├── corpus_db.py # SQLite/FTS5 corpus reader
|
|
141
|
+
├── corpus_compression.py # zstd helpers
|
|
142
|
+
└── mcp/ # Optional — installed via [mcp] extra
|
|
143
|
+
├── server_factory.py # FastMCP app factory
|
|
144
|
+
├── auth.py # OAuth 2.1 + bearer-token helpers
|
|
145
|
+
├── middleware.py # Domain gate, friendly errors, logging
|
|
146
|
+
├── conditional.py # Conditional tool registration
|
|
147
|
+
├── downloads.py # Signed HMAC download URLs + on-disk cache
|
|
148
|
+
└── annotations.py # Tool annotations (READ_ONLY, DESTRUCTIVE)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Provenance
|
|
152
|
+
|
|
153
|
+
Extracted from [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) 0.20.0 (May 2026) where it had matured as the shared infrastructure across multiple law and patent connectors. Split out as a standalone package so non-IP toolkits — regulatory (FDA), financial, scientific — can use the same scaffolding without pulling the IP-specific connector surface.
|
|
154
|
+
|
|
155
|
+
### Used by
|
|
156
|
+
|
|
157
|
+
- [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) — IP-registry connectors (USPTO, EPO, JPO, EUIPO, IP Australia, …)
|
|
158
|
+
|
|
159
|
+
## Compatibility
|
|
160
|
+
|
|
161
|
+
- Python 3.11, 3.12, 3.13
|
|
162
|
+
- macOS, Linux. Windows untested.
|
|
163
|
+
- `httpx` 0.27+, `pydantic` 2.7+, `tenacity` 8.4+
|
|
164
|
+
|
|
165
|
+
## Development
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
git clone https://github.com/parkerhancock/mcp-data-core
|
|
169
|
+
cd mcp-data-core
|
|
170
|
+
uv sync --all-extras --dev
|
|
171
|
+
uv run pytest # 166 tests
|
|
172
|
+
uv run ruff check src tests
|
|
173
|
+
uv run ruff format src tests
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Tests are pure-Python — no network, no fixtures, no live APIs. They exercise the cache, retry policy, OAuth refresh, MCP middleware, signed download URLs, and corpus reader against an in-memory transport.
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# mcp-data-core
|
|
2
|
+
|
|
3
|
+
**Batteries-included async HTTP scaffolding and MCP server plumbing for Python data-fetching libraries.**
|
|
4
|
+
|
|
5
|
+
If you're writing a Python library that pulls structured data from an API — patents, court filings, FDA records, financial filings, anything — you end up rebuilding the same eight things: an `httpx` client with retry, an HTTP cache, a tenacity policy, an OAuth helper, response envelopes, a typed exception hierarchy, per-app logging, and (if you ship an MCP server) tool registration, auth, and signed downloads. `mcp-data-core` is those eight things, packaged.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv add mcp-data-core # core scaffolding
|
|
11
|
+
uv add "mcp-data-core[mcp]" # + FastMCP server helpers
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from mcp_data_core import BaseAsyncClient
|
|
16
|
+
|
|
17
|
+
class MyApiClient(BaseAsyncClient):
|
|
18
|
+
DEFAULT_BASE_URL = "https://api.example.com"
|
|
19
|
+
CACHE_NAME = "my_api"
|
|
20
|
+
|
|
21
|
+
async def get_thing(self, id: str) -> dict:
|
|
22
|
+
return await self._request_json("GET", f"/things/{id}")
|
|
23
|
+
|
|
24
|
+
async with MyApiClient() as client:
|
|
25
|
+
result = await client.get_thing("42")
|
|
26
|
+
stats = await client.cache_stats()
|
|
27
|
+
print(f"Cache hit rate: {stats.hit_rate:.1f}%")
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
That's the full surface. Retry, caching, error mapping, and connection pooling are already wired up.
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
| Feature | What you get |
|
|
35
|
+
|---|---|
|
|
36
|
+
| **`BaseAsyncClient`** | `httpx.AsyncClient` subclass with retry, caching, error mapping, and cache-management methods. Override `DEFAULT_BASE_URL` + `CACHE_NAME`; the rest is inherited. |
|
|
37
|
+
| **HTTP caching** | `hishel`-backed cache with a custom SQLite/WAL storage layer. Respects HTTP cache headers by default, with TTL override. Inspection (`cache_stats`), eviction (`cache_clear_expired`), and pattern-based invalidation (`cache_invalidate`) built in. |
|
|
38
|
+
| **Retry policy** | `tenacity`-based exponential-jitter retry (4 attempts default). Retryable status set covers 408, 429, 500-504. Honors `Retry-After` headers. |
|
|
39
|
+
| **OAuth2 client credentials** | `OAuth2ClientCredentialsAuth` — drop-in `httpx.Auth` that handles token refresh, retries on 401, and works behind the cache layer. |
|
|
40
|
+
| **Response envelopes** | `ResponseEnvelope`, `ListEnvelope`, `Provenance`. Cursor-based pagination helpers (`encode_cursor` / `decode_cursor`). Every response carries source provenance so downstream consumers can cite. |
|
|
41
|
+
| **Typed exceptions** | `McpDataCoreError` base + `ApiError`, `RateLimitError`, `NotFoundError`, `AuthenticationError`, `ServerError`, `ConfigurationError`, `ValidationError`, `ParseError`. Log-first error formatting: `str(err)` appends the log path so agents can inspect without keeping stacktraces in context. |
|
|
42
|
+
| **Per-app file logging** | `logging.configure("my_app")` attaches a file handler under the `my_app` logger tree, writing to `~/.cache/my_app/my_app.log`. Idempotent; each consumer library logs to its own file. |
|
|
43
|
+
| **Bundled corpora** | `corpus_db` (SQLite/FTS5 reader) and `corpus_compression` (zstd) for libraries that ship statutes, manuals, or other reference text alongside their API client. |
|
|
44
|
+
| **MCP server scaffolding** *(opt-in)* | FastMCP server factory, bearer-token auth, domain gating middleware, conditional tool registration, signed HMAC download URLs with on-disk cache, and OAuth 2.1 + PKCE + DCR helpers. |
|
|
45
|
+
|
|
46
|
+
## Real-world usage
|
|
47
|
+
|
|
48
|
+
A trimmed-down version of how [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) wires up a USPTO connector:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import os
|
|
52
|
+
from mcp_data_core import (
|
|
53
|
+
BaseAsyncClient,
|
|
54
|
+
ConfigurationError,
|
|
55
|
+
ListEnvelope,
|
|
56
|
+
make_provenance,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
BASE_URL = "https://api.uspto.gov"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class UsptoOdpClient(BaseAsyncClient):
|
|
63
|
+
DEFAULT_BASE_URL = BASE_URL
|
|
64
|
+
CACHE_NAME = "uspto_odp"
|
|
65
|
+
|
|
66
|
+
def __init__(self, *, api_key: str | None = None, **kwargs) -> None:
|
|
67
|
+
api_key = api_key or os.environ.get("USPTO_ODP_API_KEY")
|
|
68
|
+
if not api_key:
|
|
69
|
+
raise ConfigurationError("USPTO_ODP_API_KEY required")
|
|
70
|
+
super().__init__(headers={"X-API-KEY": api_key}, **kwargs)
|
|
71
|
+
|
|
72
|
+
async def search_applications(
|
|
73
|
+
self, query: str, *, limit: int = 25
|
|
74
|
+
) -> ListEnvelope[dict]:
|
|
75
|
+
payload = await self._request_json(
|
|
76
|
+
"POST",
|
|
77
|
+
"/api/v1/patent/applications/search",
|
|
78
|
+
json={"q": query, "pagination": {"limit": limit}},
|
|
79
|
+
)
|
|
80
|
+
return ListEnvelope(
|
|
81
|
+
summary=f"{payload['count']} applications matching {query!r}",
|
|
82
|
+
items=payload["patentFileWrapperDataBag"],
|
|
83
|
+
provenance=make_provenance(
|
|
84
|
+
source_url=f"{BASE_URL}/api/v1/patent/applications/search",
|
|
85
|
+
source_name="USPTO Open Data Portal",
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
No retry loop. No cache invalidation. No exception remapping. No connection lifecycle. The library author writes the API-shaped methods; `mcp-data-core` handles everything else.
|
|
91
|
+
|
|
92
|
+
## What's inside
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
mcp_data_core/
|
|
96
|
+
├── base_client.py # BaseAsyncClient
|
|
97
|
+
├── cache.py # CacheManager, build_cached_http_client, SQLite/WAL storage
|
|
98
|
+
├── resilience.py # default_retryer, with_retry, RETRYABLE_STATUS_CODES
|
|
99
|
+
├── oauth2.py # OAuth2ClientCredentialsAuth
|
|
100
|
+
├── envelope.py # ResponseEnvelope, ListEnvelope, Provenance, cursor helpers
|
|
101
|
+
├── exceptions.py # McpDataCoreError + 8 subclasses
|
|
102
|
+
├── logging.py # configure() — per-app file logging
|
|
103
|
+
├── filenames.py # Download filename conventions
|
|
104
|
+
├── corpus_db.py # SQLite/FTS5 corpus reader
|
|
105
|
+
├── corpus_compression.py # zstd helpers
|
|
106
|
+
└── mcp/ # Optional — installed via [mcp] extra
|
|
107
|
+
├── server_factory.py # FastMCP app factory
|
|
108
|
+
├── auth.py # OAuth 2.1 + bearer-token helpers
|
|
109
|
+
├── middleware.py # Domain gate, friendly errors, logging
|
|
110
|
+
├── conditional.py # Conditional tool registration
|
|
111
|
+
├── downloads.py # Signed HMAC download URLs + on-disk cache
|
|
112
|
+
└── annotations.py # Tool annotations (READ_ONLY, DESTRUCTIVE)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Provenance
|
|
116
|
+
|
|
117
|
+
Extracted from [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) 0.20.0 (May 2026) where it had matured as the shared infrastructure across multiple law and patent connectors. Split out as a standalone package so non-IP toolkits — regulatory (FDA), financial, scientific — can use the same scaffolding without pulling the IP-specific connector surface.
|
|
118
|
+
|
|
119
|
+
### Used by
|
|
120
|
+
|
|
121
|
+
- [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) — IP-registry connectors (USPTO, EPO, JPO, EUIPO, IP Australia, …)
|
|
122
|
+
|
|
123
|
+
## Compatibility
|
|
124
|
+
|
|
125
|
+
- Python 3.11, 3.12, 3.13
|
|
126
|
+
- macOS, Linux. Windows untested.
|
|
127
|
+
- `httpx` 0.27+, `pydantic` 2.7+, `tenacity` 8.4+
|
|
128
|
+
|
|
129
|
+
## Development
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
git clone https://github.com/parkerhancock/mcp-data-core
|
|
133
|
+
cd mcp-data-core
|
|
134
|
+
uv sync --all-extras --dev
|
|
135
|
+
uv run pytest # 166 tests
|
|
136
|
+
uv run ruff check src tests
|
|
137
|
+
uv run ruff format src tests
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Tests are pure-Python — no network, no fixtures, no live APIs. They exercise the cache, retry policy, OAuth refresh, MCP middleware, signed download URLs, and corpus reader against an in-memory transport.
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
MIT
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-data-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Shared async HTTP scaffolding, response envelopes, corpus storage, and MCP server plumbing for data-fetching research toolkits."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Parker Hancock", email = "633163+parkerhancock@users.noreply.github.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
keywords = ["mcp", "httpx", "async", "research", "cache", "retry", "scaffolding"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Programming Language :: Python :: 3.13",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
"Typing :: Typed",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"anyio>=4.4",
|
|
24
|
+
"httpx>=0.27",
|
|
25
|
+
"h2>=4.1",
|
|
26
|
+
"pydantic>=2.7",
|
|
27
|
+
"python-dateutil>=2.9",
|
|
28
|
+
"tenacity>=8.4",
|
|
29
|
+
"hishel[async]>=1.1.3",
|
|
30
|
+
"pyyaml>=6.0",
|
|
31
|
+
"zstandard>=0.22",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
mcp = [
|
|
36
|
+
"fastmcp>=3.2.3",
|
|
37
|
+
"starlette>=0.37",
|
|
38
|
+
# fastmcp imports `from griffe import ...` — the 2.x release split the
|
|
39
|
+
# top-level module out into a separate `griffelib` distribution, so
|
|
40
|
+
# fastmcp's import fails on griffe>=2. Pin here until upstream fastmcp
|
|
41
|
+
# either migrates to griffelib or re-adds the compat shim.
|
|
42
|
+
"griffe<2",
|
|
43
|
+
]
|
|
44
|
+
gcs = [
|
|
45
|
+
"google-cloud-storage>=2.18",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/parkerhancock/mcp-data-core"
|
|
50
|
+
Repository = "https://github.com/parkerhancock/mcp-data-core"
|
|
51
|
+
|
|
52
|
+
[build-system]
|
|
53
|
+
requires = ["hatchling"]
|
|
54
|
+
build-backend = "hatchling.build"
|
|
55
|
+
|
|
56
|
+
[tool.hatch.build.targets.wheel]
|
|
57
|
+
packages = ["src/mcp_data_core"]
|
|
58
|
+
|
|
59
|
+
[dependency-groups]
|
|
60
|
+
dev = [
|
|
61
|
+
"pytest>=8.0.0",
|
|
62
|
+
"pytest-asyncio>=0.24.0",
|
|
63
|
+
"pytest-cov>=7.0.0",
|
|
64
|
+
"ruff>=0.9.0",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 100
|
|
69
|
+
target-version = "py311"
|
|
70
|
+
|
|
71
|
+
[tool.ruff.lint]
|
|
72
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
73
|
+
ignore = [
|
|
74
|
+
"E501",
|
|
75
|
+
"B008",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[tool.ruff.lint.isort]
|
|
79
|
+
known-first-party = ["mcp_data_core"]
|
|
80
|
+
|
|
81
|
+
[tool.ruff.format]
|
|
82
|
+
quote-style = "double"
|
|
83
|
+
indent-style = "space"
|
|
84
|
+
skip-magic-trailing-comma = false
|
|
85
|
+
line-ending = "auto"
|
|
86
|
+
|
|
87
|
+
[tool.pytest.ini_options]
|
|
88
|
+
asyncio_mode = "auto"
|
|
89
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Shared HTTP and MCP scaffolding for consumer libraries.
|
|
2
|
+
|
|
3
|
+
Provides the infrastructure that consumers build on:
|
|
4
|
+
|
|
5
|
+
- Exception hierarchy for API errors (``McpDataCoreError`` and subclasses)
|
|
6
|
+
- ``BaseAsyncClient`` with caching and retry support
|
|
7
|
+
- HTTP caching utilities (``CacheManager``, ``build_cached_http_client``)
|
|
8
|
+
- Resilience utilities (``default_retryer``, ``with_retry``)
|
|
9
|
+
- File-based logging configured per consumer app (``configure``)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .base_client import BaseAsyncClient
|
|
13
|
+
from .cache import CacheManager, CacheStats, build_cached_http_client
|
|
14
|
+
from .envelope import (
|
|
15
|
+
ListEnvelope,
|
|
16
|
+
Provenance,
|
|
17
|
+
ResponseEnvelope,
|
|
18
|
+
decode_cursor,
|
|
19
|
+
encode_cursor,
|
|
20
|
+
make_provenance,
|
|
21
|
+
)
|
|
22
|
+
from .envelope import configure as configure_envelope
|
|
23
|
+
from .exceptions import (
|
|
24
|
+
ApiError,
|
|
25
|
+
AuthenticationError,
|
|
26
|
+
ConfigurationError,
|
|
27
|
+
McpDataCoreError,
|
|
28
|
+
NotFoundError,
|
|
29
|
+
ParseError,
|
|
30
|
+
RateLimitError,
|
|
31
|
+
ServerError,
|
|
32
|
+
ValidationError,
|
|
33
|
+
)
|
|
34
|
+
from .logging import configure, log_file_hint
|
|
35
|
+
from .oauth2 import OAuth2ClientCredentialsAuth
|
|
36
|
+
from .resilience import (
|
|
37
|
+
RETRYABLE_STATUS_CODES,
|
|
38
|
+
default_retryer,
|
|
39
|
+
is_retryable_error,
|
|
40
|
+
with_retry,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
# Base client
|
|
45
|
+
"BaseAsyncClient",
|
|
46
|
+
# Caching
|
|
47
|
+
"build_cached_http_client",
|
|
48
|
+
"CacheManager",
|
|
49
|
+
"CacheStats",
|
|
50
|
+
# Envelope
|
|
51
|
+
"Provenance",
|
|
52
|
+
"ResponseEnvelope",
|
|
53
|
+
"ListEnvelope",
|
|
54
|
+
"configure_envelope",
|
|
55
|
+
"make_provenance",
|
|
56
|
+
"encode_cursor",
|
|
57
|
+
"decode_cursor",
|
|
58
|
+
# Exceptions
|
|
59
|
+
"McpDataCoreError",
|
|
60
|
+
"ApiError",
|
|
61
|
+
"NotFoundError",
|
|
62
|
+
"RateLimitError",
|
|
63
|
+
"AuthenticationError",
|
|
64
|
+
"ServerError",
|
|
65
|
+
"ValidationError",
|
|
66
|
+
"ConfigurationError",
|
|
67
|
+
"ParseError",
|
|
68
|
+
# Logging
|
|
69
|
+
"configure",
|
|
70
|
+
"log_file_hint",
|
|
71
|
+
# OAuth2
|
|
72
|
+
"OAuth2ClientCredentialsAuth",
|
|
73
|
+
# Resilience
|
|
74
|
+
"RETRYABLE_STATUS_CODES",
|
|
75
|
+
"is_retryable_error",
|
|
76
|
+
"default_retryer",
|
|
77
|
+
"with_retry",
|
|
78
|
+
]
|