mcp-data-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. mcp_data_core-0.1.0/.github/workflows/publish.yml +120 -0
  2. mcp_data_core-0.1.0/.gitignore +43 -0
  3. mcp_data_core-0.1.0/LICENSE +21 -0
  4. mcp_data_core-0.1.0/PKG-INFO +180 -0
  5. mcp_data_core-0.1.0/README.md +144 -0
  6. mcp_data_core-0.1.0/pyproject.toml +89 -0
  7. mcp_data_core-0.1.0/src/mcp_data_core/__init__.py +78 -0
  8. mcp_data_core-0.1.0/src/mcp_data_core/base_client.py +348 -0
  9. mcp_data_core-0.1.0/src/mcp_data_core/cache.py +367 -0
  10. mcp_data_core-0.1.0/src/mcp_data_core/corpus_compression.py +160 -0
  11. mcp_data_core-0.1.0/src/mcp_data_core/corpus_db.py +371 -0
  12. mcp_data_core-0.1.0/src/mcp_data_core/envelope.py +211 -0
  13. mcp_data_core-0.1.0/src/mcp_data_core/exceptions.py +110 -0
  14. mcp_data_core-0.1.0/src/mcp_data_core/filenames.py +233 -0
  15. mcp_data_core-0.1.0/src/mcp_data_core/logging.py +94 -0
  16. mcp_data_core-0.1.0/src/mcp_data_core/mcp/__init__.py +74 -0
  17. mcp_data_core-0.1.0/src/mcp_data_core/mcp/_env.py +24 -0
  18. mcp_data_core-0.1.0/src/mcp_data_core/mcp/annotations.py +18 -0
  19. mcp_data_core-0.1.0/src/mcp_data_core/mcp/auth.py +238 -0
  20. mcp_data_core-0.1.0/src/mcp_data_core/mcp/conditional.py +169 -0
  21. mcp_data_core-0.1.0/src/mcp_data_core/mcp/downloads.py +972 -0
  22. mcp_data_core-0.1.0/src/mcp_data_core/mcp/middleware.py +201 -0
  23. mcp_data_core-0.1.0/src/mcp_data_core/mcp/server_factory.py +113 -0
  24. mcp_data_core-0.1.0/src/mcp_data_core/oauth2.py +171 -0
  25. mcp_data_core-0.1.0/src/mcp_data_core/py.typed +0 -0
  26. mcp_data_core-0.1.0/src/mcp_data_core/resilience.py +99 -0
  27. mcp_data_core-0.1.0/tests/__init__.py +0 -0
  28. mcp_data_core-0.1.0/tests/test_auth.py +350 -0
  29. mcp_data_core-0.1.0/tests/test_conditional.py +185 -0
  30. mcp_data_core-0.1.0/tests/test_corpus_db.py +272 -0
  31. mcp_data_core-0.1.0/tests/test_envelope.py +175 -0
  32. mcp_data_core-0.1.0/tests/test_filenames.py +149 -0
  33. mcp_data_core-0.1.0/tests/test_logging.py +39 -0
  34. mcp_data_core-0.1.0/tests/test_mcp_downloads.py +679 -0
  35. mcp_data_core-0.1.0/tests/test_middleware.py +96 -0
  36. mcp_data_core-0.1.0/tests/test_oauth2.py +293 -0
  37. mcp_data_core-0.1.0/tests/test_server_factory.py +42 -0
@@ -0,0 +1,120 @@
1
+ name: Publish to PyPI
2
+
3
+ # Tag-triggered release pipeline. Tagging `vX.Y.Z` on main builds the wheel
4
+ # + sdist, verifies the tag matches the pyproject version, runs the test
5
+ # suite one more time, and publishes to PyPI via OIDC trusted publishing
6
+ # (no long-lived API token stored anywhere).
7
+ #
8
+ # To ship a new release:
9
+ # 1. Bump `version` in pyproject.toml on main.
10
+ # 2. `git tag v0.2.0 && git push origin v0.2.0` (version must match).
11
+ # 3. Watch this workflow run.
12
+
13
+ on:
14
+ push:
15
+ tags:
16
+ - 'v*'
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ jobs:
22
+ verify:
23
+ name: Verify tag matches pyproject version
24
+ runs-on: ubuntu-latest
25
+ outputs:
26
+ version: ${{ steps.version.outputs.version }}
27
+ steps:
28
+ - uses: actions/checkout@v4
29
+
30
+ - name: Extract versions and compare
31
+ id: version
32
+ run: |
33
+ TAG="${GITHUB_REF_NAME#v}"
34
+ PY_VERSION=$(grep -E '^version\s*=' pyproject.toml | sed -E 's/.*"([^"]+)".*/\1/' | head -n1)
35
+ echo "Tag version: $TAG"
36
+ echo "pyproject version: $PY_VERSION"
37
+ if [ "$TAG" != "$PY_VERSION" ]; then
38
+ echo "::error::Tag $GITHUB_REF_NAME (parsed version $TAG) does not match pyproject version $PY_VERSION. Bump pyproject first, commit, then retag."
39
+ exit 1
40
+ fi
41
+ echo "version=$PY_VERSION" >> "$GITHUB_OUTPUT"
42
+
43
+ test:
44
+ name: Test on Python ${{ matrix.python-version }}
45
+ needs: verify
46
+ runs-on: ubuntu-latest
47
+ strategy:
48
+ matrix:
49
+ python-version: ["3.11", "3.12", "3.13"]
50
+ steps:
51
+ - uses: actions/checkout@v4
52
+
53
+ - name: Install uv
54
+ uses: astral-sh/setup-uv@v4
55
+ with:
56
+ version: "latest"
57
+
58
+ - name: Set up Python ${{ matrix.python-version }}
59
+ run: uv python install ${{ matrix.python-version }}
60
+
61
+ - name: Install dependencies (library + MCP extra + dev group)
62
+ run: uv sync --all-extras --group dev
63
+
64
+ - name: Run test suite
65
+ run: uv run pytest -q
66
+
67
+ build:
68
+ name: Build wheel + sdist
69
+ needs: test
70
+ runs-on: ubuntu-latest
71
+ steps:
72
+ - uses: actions/checkout@v4
73
+
74
+ - name: Install uv
75
+ uses: astral-sh/setup-uv@v4
76
+ with:
77
+ version: "latest"
78
+
79
+ - name: Set up Python
80
+ run: uv python install 3.11
81
+
82
+ - name: Build distributions
83
+ run: uv build
84
+
85
+ - name: List artifacts
86
+ run: ls -la dist/
87
+
88
+ - name: Upload artifacts
89
+ uses: actions/upload-artifact@v4
90
+ with:
91
+ name: dist
92
+ path: dist/
93
+ retention-days: 7
94
+
95
+ publish:
96
+ name: Publish ${{ needs.verify.outputs.version }} to PyPI
97
+ needs: [verify, build]
98
+ runs-on: ubuntu-latest
99
+ # The `pypi` environment matches the one configured on PyPI's trusted-
100
+ # publisher page. GitHub enforces any protection rules on it (approval,
101
+ # required reviewers, etc.) before this job runs.
102
+ environment:
103
+ name: pypi
104
+ url: https://pypi.org/project/mcp-data-core/${{ needs.verify.outputs.version }}/
105
+ permissions:
106
+ # Required for OIDC trusted publishing. No PyPI token needed.
107
+ id-token: write
108
+ steps:
109
+ - name: Download built distributions
110
+ uses: actions/download-artifact@v4
111
+ with:
112
+ name: dist
113
+ path: dist/
114
+
115
+ - name: Publish to PyPI
116
+ uses: pypa/gh-action-pypi-publish@release/v1
117
+ with:
118
+ # Skip re-publishing if the version already exists (makes the
119
+ # workflow idempotent for retries).
120
+ skip-existing: true
@@ -0,0 +1,43 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+ .eggs/
12
+
13
+ # Testing
14
+ .pytest_cache/
15
+ .coverage
16
+ htmlcov/
17
+ .tox/
18
+ .nox/
19
+ coverage.xml
20
+
21
+ # Type checkers
22
+ .mypy_cache/
23
+ .pyright/
24
+ .ty/
25
+
26
+ # uv / venvs
27
+ .venv/
28
+ venv/
29
+ env/
30
+ .python-version
31
+
32
+ # Editors
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ .DS_Store
38
+
39
+ # Caches that may be produced by the package itself during local runs
40
+ .cache/
41
+
42
+ # Hatch
43
+ .hatch/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Parker Hancock
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,180 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-data-core
3
+ Version: 0.1.0
4
+ Summary: Shared async HTTP scaffolding, response envelopes, corpus storage, and MCP server plumbing for data-fetching research toolkits.
5
+ Project-URL: Homepage, https://github.com/parkerhancock/mcp-data-core
6
+ Project-URL: Repository, https://github.com/parkerhancock/mcp-data-core
7
+ Author-email: Parker Hancock <633163+parkerhancock@users.noreply.github.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: async,cache,httpx,mcp,research,retry,scaffolding
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.11
20
+ Requires-Dist: anyio>=4.4
21
+ Requires-Dist: h2>=4.1
22
+ Requires-Dist: hishel[async]>=1.1.3
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: python-dateutil>=2.9
26
+ Requires-Dist: pyyaml>=6.0
27
+ Requires-Dist: tenacity>=8.4
28
+ Requires-Dist: zstandard>=0.22
29
+ Provides-Extra: gcs
30
+ Requires-Dist: google-cloud-storage>=2.18; extra == 'gcs'
31
+ Provides-Extra: mcp
32
+ Requires-Dist: fastmcp>=3.2.3; extra == 'mcp'
33
+ Requires-Dist: griffe<2; extra == 'mcp'
34
+ Requires-Dist: starlette>=0.37; extra == 'mcp'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # mcp-data-core
38
+
39
+ **Batteries-included async HTTP scaffolding and MCP server plumbing for Python data-fetching libraries.**
40
+
41
+ If you're writing a Python library that pulls structured data from an API — patents, court filings, FDA records, financial filings, anything — you end up rebuilding the same eight things: an `httpx` client with retry, an HTTP cache, a tenacity policy, an OAuth helper, response envelopes, a typed exception hierarchy, per-app logging, and (if you ship an MCP server) tool registration, auth, and signed downloads. `mcp-data-core` is those eight things, packaged.
42
+
43
+ ## Quick Start
44
+
45
+ ```bash
46
+ uv add mcp-data-core # core scaffolding
47
+ uv add "mcp-data-core[mcp]" # + FastMCP server helpers
48
+ ```
49
+
50
+ ```python
51
+ from mcp_data_core import BaseAsyncClient
52
+
53
+ class MyApiClient(BaseAsyncClient):
54
+ DEFAULT_BASE_URL = "https://api.example.com"
55
+ CACHE_NAME = "my_api"
56
+
57
+ async def get_thing(self, id: str) -> dict:
58
+ return await self._request_json("GET", f"/things/{id}")
59
+
60
+ async with MyApiClient() as client:
61
+ result = await client.get_thing("42")
62
+ stats = await client.cache_stats()
63
+ print(f"Cache hit rate: {stats.hit_rate:.1f}%")
64
+ ```
65
+
66
+ That's the full surface. Retry, caching, error mapping, and connection pooling are already wired up.
67
+
68
+ ## Features
69
+
70
+ | Feature | What you get |
71
+ |---|---|
72
+ | **`BaseAsyncClient`** | `httpx.AsyncClient` subclass with retry, caching, error mapping, and cache-management methods. Override `DEFAULT_BASE_URL` + `CACHE_NAME`; the rest is inherited. |
73
+ | **HTTP caching** | `hishel`-backed cache with a custom SQLite/WAL storage layer. Respects HTTP cache headers by default, with TTL override. Inspection (`cache_stats`), eviction (`cache_clear_expired`), and pattern-based invalidation (`cache_invalidate`) built in. |
74
+ | **Retry policy** | `tenacity`-based exponential-jitter retry (4 attempts default). Retryable status set covers 408, 429, 500-504. Honors `Retry-After` headers. |
75
+ | **OAuth2 client credentials** | `OAuth2ClientCredentialsAuth` — drop-in `httpx.Auth` that handles token refresh, retries on 401, and works behind the cache layer. |
76
+ | **Response envelopes** | `ResponseEnvelope`, `ListEnvelope`, `Provenance`. Cursor-based pagination helpers (`encode_cursor` / `decode_cursor`). Every response carries source provenance so downstream consumers can cite. |
77
+ | **Typed exceptions** | `McpDataCoreError` base + `ApiError`, `RateLimitError`, `NotFoundError`, `AuthenticationError`, `ServerError`, `ConfigurationError`, `ValidationError`, `ParseError`. Log-first error formatting: `str(err)` appends the log path so agents can inspect without keeping stacktraces in context. |
78
+ | **Per-app file logging** | `logging.configure("my_app")` attaches a file handler under the `my_app` logger tree, writing to `~/.cache/my_app/my_app.log`. Idempotent; each consumer library logs to its own file. |
79
+ | **Bundled corpora** | `corpus_db` (SQLite/FTS5 reader) and `corpus_compression` (zstd) for libraries that ship statutes, manuals, or other reference text alongside their API client. |
80
+ | **MCP server scaffolding** *(opt-in)* | FastMCP server factory, bearer-token auth, domain gating middleware, conditional tool registration, signed HMAC download URLs with on-disk cache, and OAuth 2.1 + PKCE + DCR helpers. |
81
+
82
+ ## Real-world usage
83
+
84
+ A trimmed-down version of how [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) wires up a USPTO connector:
85
+
86
+ ```python
87
+ import os
88
+ from mcp_data_core import (
89
+ BaseAsyncClient,
90
+ ConfigurationError,
91
+ ListEnvelope,
92
+ make_provenance,
93
+ )
94
+
95
+ BASE_URL = "https://api.uspto.gov"
96
+
97
+
98
+ class UsptoOdpClient(BaseAsyncClient):
99
+ DEFAULT_BASE_URL = BASE_URL
100
+ CACHE_NAME = "uspto_odp"
101
+
102
+ def __init__(self, *, api_key: str | None = None, **kwargs) -> None:
103
+ api_key = api_key or os.environ.get("USPTO_ODP_API_KEY")
104
+ if not api_key:
105
+ raise ConfigurationError("USPTO_ODP_API_KEY required")
106
+ super().__init__(headers={"X-API-KEY": api_key}, **kwargs)
107
+
108
+ async def search_applications(
109
+ self, query: str, *, limit: int = 25
110
+ ) -> ListEnvelope[dict]:
111
+ payload = await self._request_json(
112
+ "POST",
113
+ "/api/v1/patent/applications/search",
114
+ json={"q": query, "pagination": {"limit": limit}},
115
+ )
116
+ return ListEnvelope(
117
+ summary=f"{payload['count']} applications matching {query!r}",
118
+ items=payload["patentFileWrapperDataBag"],
119
+ provenance=make_provenance(
120
+ source_url=f"{BASE_URL}/api/v1/patent/applications/search",
121
+ source_name="USPTO Open Data Portal",
122
+ ),
123
+ )
124
+ ```
125
+
126
+ No retry loop. No cache invalidation. No exception remapping. No connection lifecycle. The library author writes the API-shaped methods; `mcp-data-core` handles everything else.
127
+
128
+ ## What's inside
129
+
130
+ ```
131
+ mcp_data_core/
132
+ ├── base_client.py # BaseAsyncClient
133
+ ├── cache.py # CacheManager, build_cached_http_client, SQLite/WAL storage
134
+ ├── resilience.py # default_retryer, with_retry, RETRYABLE_STATUS_CODES
135
+ ├── oauth2.py # OAuth2ClientCredentialsAuth
136
+ ├── envelope.py # ResponseEnvelope, ListEnvelope, Provenance, cursor helpers
137
+ ├── exceptions.py # McpDataCoreError + 8 subclasses
138
+ ├── logging.py # configure() — per-app file logging
139
+ ├── filenames.py # Download filename conventions
140
+ ├── corpus_db.py # SQLite/FTS5 corpus reader
141
+ ├── corpus_compression.py # zstd helpers
142
+ └── mcp/ # Optional — installed via [mcp] extra
143
+ ├── server_factory.py # FastMCP app factory
144
+ ├── auth.py # OAuth 2.1 + bearer-token helpers
145
+ ├── middleware.py # Domain gate, friendly errors, logging
146
+ ├── conditional.py # Conditional tool registration
147
+ ├── downloads.py # Signed HMAC download URLs + on-disk cache
148
+ └── annotations.py # Tool annotations (READ_ONLY, DESTRUCTIVE)
149
+ ```
150
+
151
+ ## Provenance
152
+
153
+ Extracted from [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) 0.20.0 (May 2026) where it had matured as the shared infrastructure across multiple law and patent connectors. Split out as a standalone package so non-IP toolkits — regulatory (FDA), financial, scientific — can use the same scaffolding without pulling the IP-specific connector surface.
154
+
155
+ ### Used by
156
+
157
+ - [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) — IP-registry connectors (USPTO, EPO, JPO, EUIPO, IP Australia, …)
158
+
159
+ ## Compatibility
160
+
161
+ - Python 3.11, 3.12, 3.13
162
+ - macOS, Linux. Windows untested.
163
+ - `httpx` 0.27+, `pydantic` 2.7+, `tenacity` 8.4+
164
+
165
+ ## Development
166
+
167
+ ```bash
168
+ git clone https://github.com/parkerhancock/mcp-data-core
169
+ cd mcp-data-core
170
+ uv sync --all-extras --dev
171
+ uv run pytest # 166 tests
172
+ uv run ruff check src tests
173
+ uv run ruff format src tests
174
+ ```
175
+
176
+ Tests are pure-Python — no network, no fixtures, no live APIs. They exercise the cache, retry policy, OAuth refresh, MCP middleware, signed download URLs, and corpus reader against an in-memory transport.
177
+
178
+ ## License
179
+
180
+ MIT
@@ -0,0 +1,144 @@
1
+ # mcp-data-core
2
+
3
+ **Batteries-included async HTTP scaffolding and MCP server plumbing for Python data-fetching libraries.**
4
+
5
+ If you're writing a Python library that pulls structured data from an API — patents, court filings, FDA records, financial filings, anything — you end up rebuilding the same eight things: an `httpx` client with retry, an HTTP cache, a tenacity policy, an OAuth helper, response envelopes, a typed exception hierarchy, per-app logging, and (if you ship an MCP server) tool registration, auth, and signed downloads. `mcp-data-core` is those eight things, packaged.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ uv add mcp-data-core # core scaffolding
11
+ uv add "mcp-data-core[mcp]" # + FastMCP server helpers
12
+ ```
13
+
14
+ ```python
15
+ from mcp_data_core import BaseAsyncClient
16
+
17
+ class MyApiClient(BaseAsyncClient):
18
+ DEFAULT_BASE_URL = "https://api.example.com"
19
+ CACHE_NAME = "my_api"
20
+
21
+ async def get_thing(self, id: str) -> dict:
22
+ return await self._request_json("GET", f"/things/{id}")
23
+
24
+ async with MyApiClient() as client:
25
+ result = await client.get_thing("42")
26
+ stats = await client.cache_stats()
27
+ print(f"Cache hit rate: {stats.hit_rate:.1f}%")
28
+ ```
29
+
30
+ That's the full surface. Retry, caching, error mapping, and connection pooling are already wired up.
31
+
32
+ ## Features
33
+
34
+ | Feature | What you get |
35
+ |---|---|
36
+ | **`BaseAsyncClient`** | `httpx.AsyncClient` subclass with retry, caching, error mapping, and cache-management methods. Override `DEFAULT_BASE_URL` + `CACHE_NAME`; the rest is inherited. |
37
+ | **HTTP caching** | `hishel`-backed cache with a custom SQLite/WAL storage layer. Respects HTTP cache headers by default, with TTL override. Inspection (`cache_stats`), eviction (`cache_clear_expired`), and pattern-based invalidation (`cache_invalidate`) built in. |
38
+ | **Retry policy** | `tenacity`-based exponential-jitter retry (4 attempts default). Retryable status set covers 408, 429, 500-504. Honors `Retry-After` headers. |
39
+ | **OAuth2 client credentials** | `OAuth2ClientCredentialsAuth` — drop-in `httpx.Auth` that handles token refresh, retries on 401, and works behind the cache layer. |
40
+ | **Response envelopes** | `ResponseEnvelope`, `ListEnvelope`, `Provenance`. Cursor-based pagination helpers (`encode_cursor` / `decode_cursor`). Every response carries source provenance so downstream consumers can cite. |
41
+ | **Typed exceptions** | `McpDataCoreError` base + `ApiError`, `RateLimitError`, `NotFoundError`, `AuthenticationError`, `ServerError`, `ConfigurationError`, `ValidationError`, `ParseError`. Log-first error formatting: `str(err)` appends the log path so agents can inspect without keeping stacktraces in context. |
42
+ | **Per-app file logging** | `logging.configure("my_app")` attaches a file handler under the `my_app` logger tree, writing to `~/.cache/my_app/my_app.log`. Idempotent; each consumer library logs to its own file. |
43
+ | **Bundled corpora** | `corpus_db` (SQLite/FTS5 reader) and `corpus_compression` (zstd) for libraries that ship statutes, manuals, or other reference text alongside their API client. |
44
+ | **MCP server scaffolding** *(opt-in)* | FastMCP server factory, bearer-token auth, domain gating middleware, conditional tool registration, signed HMAC download URLs with on-disk cache, and OAuth 2.1 + PKCE + DCR helpers. |
45
+
46
+ ## Real-world usage
47
+
48
+ A trimmed-down version of how [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) wires up a USPTO connector:
49
+
50
+ ```python
51
+ import os
52
+ from mcp_data_core import (
53
+ BaseAsyncClient,
54
+ ConfigurationError,
55
+ ListEnvelope,
56
+ make_provenance,
57
+ )
58
+
59
+ BASE_URL = "https://api.uspto.gov"
60
+
61
+
62
+ class UsptoOdpClient(BaseAsyncClient):
63
+ DEFAULT_BASE_URL = BASE_URL
64
+ CACHE_NAME = "uspto_odp"
65
+
66
+ def __init__(self, *, api_key: str | None = None, **kwargs) -> None:
67
+ api_key = api_key or os.environ.get("USPTO_ODP_API_KEY")
68
+ if not api_key:
69
+ raise ConfigurationError("USPTO_ODP_API_KEY required")
70
+ super().__init__(headers={"X-API-KEY": api_key}, **kwargs)
71
+
72
+ async def search_applications(
73
+ self, query: str, *, limit: int = 25
74
+ ) -> ListEnvelope[dict]:
75
+ payload = await self._request_json(
76
+ "POST",
77
+ "/api/v1/patent/applications/search",
78
+ json={"q": query, "pagination": {"limit": limit}},
79
+ )
80
+ return ListEnvelope(
81
+ summary=f"{payload['count']} applications matching {query!r}",
82
+ items=payload["patentFileWrapperDataBag"],
83
+ provenance=make_provenance(
84
+ source_url=f"{BASE_URL}/api/v1/patent/applications/search",
85
+ source_name="USPTO Open Data Portal",
86
+ ),
87
+ )
88
+ ```
89
+
90
+ No retry loop. No cache invalidation. No exception remapping. No connection lifecycle. The library author writes the API-shaped methods; `mcp-data-core` handles everything else.
91
+
92
+ ## What's inside
93
+
94
+ ```
95
+ mcp_data_core/
96
+ ├── base_client.py # BaseAsyncClient
97
+ ├── cache.py # CacheManager, build_cached_http_client, SQLite/WAL storage
98
+ ├── resilience.py # default_retryer, with_retry, RETRYABLE_STATUS_CODES
99
+ ├── oauth2.py # OAuth2ClientCredentialsAuth
100
+ ├── envelope.py # ResponseEnvelope, ListEnvelope, Provenance, cursor helpers
101
+ ├── exceptions.py # McpDataCoreError + 8 subclasses
102
+ ├── logging.py # configure() — per-app file logging
103
+ ├── filenames.py # Download filename conventions
104
+ ├── corpus_db.py # SQLite/FTS5 corpus reader
105
+ ├── corpus_compression.py # zstd helpers
106
+ └── mcp/ # Optional — installed via [mcp] extra
107
+ ├── server_factory.py # FastMCP app factory
108
+ ├── auth.py # OAuth 2.1 + bearer-token helpers
109
+ ├── middleware.py # Domain gate, friendly errors, logging
110
+ ├── conditional.py # Conditional tool registration
111
+ ├── downloads.py # Signed HMAC download URLs + on-disk cache
112
+ └── annotations.py # Tool annotations (READ_ONLY, DESTRUCTIVE)
113
+ ```
114
+
115
+ ## Provenance
116
+
117
+ Extracted from [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) 0.20.0 (May 2026) where it had matured as the shared infrastructure across multiple law and patent connectors. Split out as a standalone package so non-IP toolkits — regulatory (FDA), financial, scientific — can use the same scaffolding without pulling the IP-specific connector surface.
118
+
119
+ ### Used by
120
+
121
+ - [patent-client-agents](https://github.com/parkerhancock/patent-client-agents) — IP-registry connectors (USPTO, EPO, JPO, EUIPO, IP Australia, …)
122
+
123
+ ## Compatibility
124
+
125
+ - Python 3.11, 3.12, 3.13
126
+ - macOS, Linux. Windows untested.
127
+ - `httpx` 0.27+, `pydantic` 2.7+, `tenacity` 8.4+
128
+
129
+ ## Development
130
+
131
+ ```bash
132
+ git clone https://github.com/parkerhancock/mcp-data-core
133
+ cd mcp-data-core
134
+ uv sync --all-extras --dev
135
+ uv run pytest # 166 tests
136
+ uv run ruff check src tests
137
+ uv run ruff format src tests
138
+ ```
139
+
140
+ Tests are pure-Python — no network, no fixtures, no live APIs. They exercise the cache, retry policy, OAuth refresh, MCP middleware, signed download URLs, and corpus reader against an in-memory transport.
141
+
142
+ ## License
143
+
144
+ MIT
@@ -0,0 +1,89 @@
1
+ [project]
2
+ name = "mcp-data-core"
3
+ version = "0.1.0"
4
+ description = "Shared async HTTP scaffolding, response envelopes, corpus storage, and MCP server plumbing for data-fetching research toolkits."
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Parker Hancock", email = "633163+parkerhancock@users.noreply.github.com" }
8
+ ]
9
+ requires-python = ">=3.11"
10
+ license = { text = "MIT" }
11
+ keywords = ["mcp", "httpx", "async", "research", "cache", "retry", "scaffolding"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Software Development :: Libraries",
20
+ "Typing :: Typed",
21
+ ]
22
+ dependencies = [
23
+ "anyio>=4.4",
24
+ "httpx>=0.27",
25
+ "h2>=4.1",
26
+ "pydantic>=2.7",
27
+ "python-dateutil>=2.9",
28
+ "tenacity>=8.4",
29
+ "hishel[async]>=1.1.3",
30
+ "pyyaml>=6.0",
31
+ "zstandard>=0.22",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ mcp = [
36
+ "fastmcp>=3.2.3",
37
+ "starlette>=0.37",
38
+ # fastmcp imports `from griffe import ...` — the 2.x release split the
39
+ # top-level module out into a separate `griffelib` distribution, so
40
+ # fastmcp's import fails on griffe>=2. Pin here until upstream fastmcp
41
+ # either migrates to griffelib or re-adds the compat shim.
42
+ "griffe<2",
43
+ ]
44
+ gcs = [
45
+ "google-cloud-storage>=2.18",
46
+ ]
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/parkerhancock/mcp-data-core"
50
+ Repository = "https://github.com/parkerhancock/mcp-data-core"
51
+
52
+ [build-system]
53
+ requires = ["hatchling"]
54
+ build-backend = "hatchling.build"
55
+
56
+ [tool.hatch.build.targets.wheel]
57
+ packages = ["src/mcp_data_core"]
58
+
59
+ [dependency-groups]
60
+ dev = [
61
+ "pytest>=8.0.0",
62
+ "pytest-asyncio>=0.24.0",
63
+ "pytest-cov>=7.0.0",
64
+ "ruff>=0.9.0",
65
+ ]
66
+
67
+ [tool.ruff]
68
+ line-length = 100
69
+ target-version = "py311"
70
+
71
+ [tool.ruff.lint]
72
+ select = ["E", "F", "I", "UP", "B"]
73
+ ignore = [
74
+ "E501",
75
+ "B008",
76
+ ]
77
+
78
+ [tool.ruff.lint.isort]
79
+ known-first-party = ["mcp_data_core"]
80
+
81
+ [tool.ruff.format]
82
+ quote-style = "double"
83
+ indent-style = "space"
84
+ skip-magic-trailing-comma = false
85
+ line-ending = "auto"
86
+
87
+ [tool.pytest.ini_options]
88
+ asyncio_mode = "auto"
89
+ testpaths = ["tests"]
@@ -0,0 +1,78 @@
1
+ """Shared HTTP and MCP scaffolding for consumer libraries.
2
+
3
+ Provides the infrastructure that consumers build on:
4
+
5
+ - Exception hierarchy for API errors (``McpDataCoreError`` and subclasses)
6
+ - ``BaseAsyncClient`` with caching and retry support
7
+ - HTTP caching utilities (``CacheManager``, ``build_cached_http_client``)
8
+ - Resilience utilities (``default_retryer``, ``with_retry``)
9
+ - File-based logging configured per consumer app (``configure``)
10
+ """
11
+
12
+ from .base_client import BaseAsyncClient
13
+ from .cache import CacheManager, CacheStats, build_cached_http_client
14
+ from .envelope import (
15
+ ListEnvelope,
16
+ Provenance,
17
+ ResponseEnvelope,
18
+ decode_cursor,
19
+ encode_cursor,
20
+ make_provenance,
21
+ )
22
+ from .envelope import configure as configure_envelope
23
+ from .exceptions import (
24
+ ApiError,
25
+ AuthenticationError,
26
+ ConfigurationError,
27
+ McpDataCoreError,
28
+ NotFoundError,
29
+ ParseError,
30
+ RateLimitError,
31
+ ServerError,
32
+ ValidationError,
33
+ )
34
+ from .logging import configure, log_file_hint
35
+ from .oauth2 import OAuth2ClientCredentialsAuth
36
+ from .resilience import (
37
+ RETRYABLE_STATUS_CODES,
38
+ default_retryer,
39
+ is_retryable_error,
40
+ with_retry,
41
+ )
42
+
43
+ __all__ = [
44
+ # Base client
45
+ "BaseAsyncClient",
46
+ # Caching
47
+ "build_cached_http_client",
48
+ "CacheManager",
49
+ "CacheStats",
50
+ # Envelope
51
+ "Provenance",
52
+ "ResponseEnvelope",
53
+ "ListEnvelope",
54
+ "configure_envelope",
55
+ "make_provenance",
56
+ "encode_cursor",
57
+ "decode_cursor",
58
+ # Exceptions
59
+ "McpDataCoreError",
60
+ "ApiError",
61
+ "NotFoundError",
62
+ "RateLimitError",
63
+ "AuthenticationError",
64
+ "ServerError",
65
+ "ValidationError",
66
+ "ConfigurationError",
67
+ "ParseError",
68
+ # Logging
69
+ "configure",
70
+ "log_file_hint",
71
+ # OAuth2
72
+ "OAuth2ClientCredentialsAuth",
73
+ # Resilience
74
+ "RETRYABLE_STATUS_CODES",
75
+ "is_retryable_error",
76
+ "default_retryer",
77
+ "with_retry",
78
+ ]