crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +18 -10
  2. crossref_local/_aio/__init__.py +30 -0
  3. crossref_local/_aio/_impl.py +238 -0
  4. crossref_local/_cache/__init__.py +15 -0
  5. crossref_local/{cache_export.py → _cache/export.py} +27 -10
  6. crossref_local/_cache/utils.py +93 -0
  7. crossref_local/_cli/__init__.py +9 -0
  8. crossref_local/_cli/cli.py +512 -0
  9. crossref_local/_cli/mcp.py +351 -0
  10. crossref_local/_cli/mcp_server.py +413 -0
  11. crossref_local/_core/__init__.py +58 -0
  12. crossref_local/{api.py → _core/api.py} +24 -5
  13. crossref_local/{citations.py → _core/citations.py} +55 -26
  14. crossref_local/{config.py → _core/config.py} +40 -22
  15. crossref_local/{db.py → _core/db.py} +32 -26
  16. crossref_local/{fts.py → _core/fts.py} +18 -14
  17. crossref_local/{models.py → _core/models.py} +11 -6
  18. crossref_local/_remote/__init__.py +56 -0
  19. crossref_local/_remote/base.py +356 -0
  20. crossref_local/_remote/collections.py +175 -0
  21. crossref_local/_server/__init__.py +140 -0
  22. crossref_local/_server/middleware.py +25 -0
  23. crossref_local/_server/models.py +129 -0
  24. crossref_local/_server/routes_citations.py +98 -0
  25. crossref_local/_server/routes_collections.py +282 -0
  26. crossref_local/_server/routes_compat.py +102 -0
  27. crossref_local/_server/routes_works.py +128 -0
  28. crossref_local/_server/server.py +19 -0
  29. crossref_local/aio.py +30 -206
  30. crossref_local/cache.py +100 -100
  31. crossref_local/cli.py +5 -515
  32. crossref_local/jobs.py +169 -0
  33. crossref_local/mcp_server.py +5 -410
  34. crossref_local/remote.py +5 -266
  35. crossref_local/server.py +5 -349
  36. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
  37. crossref_local-0.5.0.dist-info/RECORD +47 -0
  38. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
  39. crossref_local/cli_mcp.py +0 -275
  40. crossref_local-0.4.0.dist-info/RECORD +0 -27
  41. /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
  42. /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
  43. /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
  44. /crossref_local/{cli_main.py → _cli/main.py} +0 -0
  45. /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  46. /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  47. /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  48. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,356 @@
1
+ """Remote API client for crossref_local.
2
+
3
+ Connects to a CrossRef Local API server instead of direct database access.
4
+ Use this when the database is on a remote server accessible via HTTP.
5
+ """
6
+
7
+ import json
8
+ import urllib.request
9
+ import urllib.parse
10
+ import urllib.error
11
+ from typing import List, Optional, Dict, Any
12
+
13
+ from .._core.models import Work, SearchResult
14
+ from .._core.config import DEFAULT_PORT
15
+
16
+ # Default URL uses SCITEX port convention
17
+ DEFAULT_API_URL = f"http://localhost:{DEFAULT_PORT}"
18
+
19
+
20
+ class RemoteClient:
21
+ """
22
+ HTTP client for CrossRef Local API server.
23
+
24
+ Provides the same interface as the local API but connects
25
+ to a remote server via HTTP.
26
+
27
+ Example:
28
+ >>> client = RemoteClient("http://localhost:31291")
29
+ >>> results = client.search(title="machine learning", limit=10)
30
+ >>> work = client.get("10.1038/nature12373")
31
+ """
32
+
33
+ def __init__(self, base_url: str = DEFAULT_API_URL, timeout: int = 30):
34
+ """
35
+ Initialize remote client.
36
+
37
+ Args:
38
+ base_url: API server URL (default: http://localhost:3333)
39
+ timeout: Request timeout in seconds
40
+ """
41
+ self.base_url = base_url.rstrip("/")
42
+ self.timeout = timeout
43
+
44
+ def _request(
45
+ self,
46
+ endpoint: str,
47
+ params: Optional[Dict[str, Any]] = None,
48
+ method: str = "GET",
49
+ data: Optional[Dict[str, Any]] = None,
50
+ ) -> Dict:
51
+ """Make HTTP request to API."""
52
+ url = f"{self.base_url}{endpoint}"
53
+ if params:
54
+ # Filter out None values
55
+ params = {k: v for k, v in params.items() if v is not None}
56
+ if params:
57
+ url = f"{url}?{urllib.parse.urlencode(params)}"
58
+
59
+ try:
60
+ req_data = None
61
+ if data is not None:
62
+ req_data = json.dumps(data).encode("utf-8")
63
+
64
+ req = urllib.request.Request(url, data=req_data, method=method)
65
+ req.add_header("Accept", "application/json")
66
+ if req_data:
67
+ req.add_header("Content-Type", "application/json")
68
+
69
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
70
+ return json.loads(response.read().decode("utf-8"))
71
+ except urllib.error.HTTPError as e:
72
+ if e.code == 404:
73
+ return None
74
+ raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
75
+ except urllib.error.URLError as e:
76
+ raise ConnectionError(
77
+ f"Cannot connect to API at {self.base_url}: {e.reason}"
78
+ ) from e
79
+
80
+ def health(self) -> Dict:
81
+ """Check API server health."""
82
+ return self._request("/health")
83
+
84
+ def info(self) -> Dict:
85
+ """Get database/API information."""
86
+ root = self._request("/")
87
+ info_data = self._request("/info")
88
+ return {
89
+ "api_url": self.base_url,
90
+ "api_version": root.get("version", "unknown"),
91
+ "status": root.get("status", "unknown"),
92
+ "mode": "remote",
93
+ "works": info_data.get("total_papers", 0) if info_data else 0,
94
+ "fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
95
+ "citations": info_data.get("citations", 0) if info_data else 0,
96
+ }
97
+
98
+ def search(
99
+ self,
100
+ query: Optional[str] = None,
101
+ doi: Optional[str] = None,
102
+ title: Optional[str] = None,
103
+ authors: Optional[str] = None,
104
+ year: Optional[int] = None,
105
+ limit: int = 10,
106
+ offset: int = 0,
107
+ ) -> SearchResult:
108
+ """
109
+ Search for papers.
110
+
111
+ Args:
112
+ query: Full-text search query (searches title by default)
113
+ doi: Search by DOI
114
+ title: Search by title (explicit)
115
+ authors: Search by author name
116
+ year: Filter by publication year
117
+ limit: Maximum results (default: 10, max: 100)
118
+ offset: Skip first N results for pagination
119
+
120
+ Returns:
121
+ SearchResult with matching works
122
+ """
123
+ # Use new /works endpoint with FTS5 search
124
+ search_query = query or title
125
+
126
+ params = {
127
+ "q": search_query,
128
+ "limit": min(limit, 100),
129
+ "offset": offset,
130
+ }
131
+
132
+ data = self._request("/works", params)
133
+
134
+ if not data:
135
+ return SearchResult(works=[], total=0, query=query or "", elapsed_ms=0.0)
136
+
137
+ works = []
138
+ for item in data.get("results", []):
139
+ work = Work(
140
+ doi=item.get("doi", ""),
141
+ title=item.get("title", ""),
142
+ authors=item.get("authors", []),
143
+ year=item.get("year"),
144
+ journal=item.get("journal"),
145
+ volume=item.get("volume"),
146
+ issue=item.get("issue"),
147
+ page=item.get("page") or item.get("pages"),
148
+ abstract=item.get("abstract"),
149
+ citation_count=item.get("citation_count"),
150
+ )
151
+ works.append(work)
152
+
153
+ return SearchResult(
154
+ works=works,
155
+ total=data.get("total", len(works)),
156
+ query=query or title or doi or "",
157
+ elapsed_ms=data.get("elapsed_ms", 0.0),
158
+ )
159
+
160
+ def get(self, doi: str) -> Optional[Work]:
161
+ """
162
+ Get a work by DOI.
163
+
164
+ Args:
165
+ doi: Digital Object Identifier
166
+
167
+ Returns:
168
+ Work object or None if not found
169
+ """
170
+ # Use /works/{doi} endpoint directly
171
+ data = self._request(f"/works/{doi}")
172
+ if not data or "error" in data:
173
+ return None
174
+
175
+ return Work(
176
+ doi=data.get("doi", doi),
177
+ title=data.get("title", ""),
178
+ authors=data.get("authors", []),
179
+ year=data.get("year"),
180
+ journal=data.get("journal"),
181
+ volume=data.get("volume"),
182
+ issue=data.get("issue"),
183
+ page=data.get("page"),
184
+ abstract=data.get("abstract"),
185
+ citation_count=data.get("citation_count"),
186
+ )
187
+
188
+ def get_many(self, dois: List[str]) -> List[Work]:
189
+ """
190
+ Get multiple works by DOI using batch endpoint.
191
+
192
+ Args:
193
+ dois: List of DOIs
194
+
195
+ Returns:
196
+ List of Work objects
197
+ """
198
+ # Use batch endpoint if available
199
+ try:
200
+ data = {"dois": dois}
201
+ req_data = json.dumps(data).encode("utf-8")
202
+ req = urllib.request.Request(
203
+ f"{self.base_url}/works/batch", data=req_data, method="POST"
204
+ )
205
+ req.add_header("Content-Type", "application/json")
206
+ req.add_header("Accept", "application/json")
207
+
208
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
209
+ result = json.loads(response.read().decode("utf-8"))
210
+
211
+ works = []
212
+ for item in result.get("results", []):
213
+ work = Work(
214
+ doi=item.get("doi", ""),
215
+ title=item.get("title", ""),
216
+ authors=item.get("authors", []),
217
+ year=item.get("year"),
218
+ journal=item.get("journal"),
219
+ volume=item.get("volume"),
220
+ issue=item.get("issue"),
221
+ page=item.get("page"),
222
+ abstract=item.get("abstract"),
223
+ citation_count=item.get("citation_count"),
224
+ )
225
+ works.append(work)
226
+ return works
227
+ except Exception:
228
+ # Fallback to individual lookups
229
+ works = []
230
+ for doi in dois:
231
+ work = self.get(doi)
232
+ if work:
233
+ works.append(work)
234
+ return works
235
+
236
+ def exists(self, doi: str) -> bool:
237
+ """Check if a DOI exists."""
238
+ return self.get(doi) is not None
239
+
240
+ def get_citations(self, doi: str, direction: str = "both") -> Dict:
241
+ """
242
+ Get citations for a paper (legacy endpoint).
243
+
244
+ Args:
245
+ doi: Paper DOI
246
+ direction: 'citing', 'cited_by', or 'both'
247
+
248
+ Returns:
249
+ Dict with citation information
250
+ """
251
+ params = {"doi": doi, "direction": direction}
252
+ return self._request("/api/citations/", params) or {}
253
+
254
+ def get_citing(self, doi: str, limit: int = 100) -> List[str]:
255
+ """
256
+ Get DOIs of papers that cite the given DOI.
257
+
258
+ Args:
259
+ doi: The DOI to find citations for
260
+ limit: Maximum number of citing papers to return
261
+
262
+ Returns:
263
+ List of DOIs that cite this paper
264
+ """
265
+ data = self._request(f"/citations/{doi}/citing", {"limit": limit})
266
+ if not data:
267
+ return []
268
+ return data.get("papers", [])
269
+
270
+ def get_cited(self, doi: str, limit: int = 100) -> List[str]:
271
+ """
272
+ Get DOIs of papers that the given DOI cites (references).
273
+
274
+ Args:
275
+ doi: The DOI to find references for
276
+ limit: Maximum number of referenced papers to return
277
+
278
+ Returns:
279
+ List of DOIs that this paper cites
280
+ """
281
+ data = self._request(f"/citations/{doi}/cited", {"limit": limit})
282
+ if not data:
283
+ return []
284
+ return data.get("papers", [])
285
+
286
+ def get_citation_count(self, doi: str) -> int:
287
+ """
288
+ Get the number of citations for a DOI.
289
+
290
+ Args:
291
+ doi: The DOI to count citations for
292
+
293
+ Returns:
294
+ Number of papers citing this DOI
295
+ """
296
+ data = self._request(f"/citations/{doi}/count")
297
+ if not data:
298
+ return 0
299
+ return data.get("citation_count", 0)
300
+
301
+ def get_citation_network(
302
+ self, doi: str, depth: int = 1, max_citing: int = 25, max_cited: int = 25
303
+ ) -> Dict:
304
+ """
305
+ Get citation network graph for a DOI.
306
+
307
+ Args:
308
+ doi: The DOI to build the network around
309
+ depth: How many levels of citations to include (1-3)
310
+ max_citing: Max papers citing each node to include
311
+ max_cited: Max papers each node cites to include
312
+
313
+ Returns:
314
+ Dict with nodes, edges, and stats
315
+ """
316
+ params = {
317
+ "depth": depth,
318
+ "max_citing": max_citing,
319
+ "max_cited": max_cited,
320
+ }
321
+ data = self._request(f"/citations/{doi}/network", params)
322
+ return data or {}
323
+
324
+ def get_journal(
325
+ self, issn: Optional[str] = None, name: Optional[str] = None
326
+ ) -> Dict:
327
+ """
328
+ Get journal information.
329
+
330
+ Args:
331
+ issn: Journal ISSN
332
+ name: Journal name
333
+
334
+ Returns:
335
+ Dict with journal information
336
+ """
337
+ params = {"issn": issn, "name": name}
338
+ return self._request("/api/journal/", params) or {}
339
+
340
+
341
+ # Module-level client for convenience
342
+ _client: Optional[RemoteClient] = None
343
+
344
+
345
+ def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
346
+ """Get or create singleton remote client."""
347
+ global _client
348
+ if _client is None or _client.base_url != base_url:
349
+ _client = RemoteClient(base_url)
350
+ return _client
351
+
352
+
353
+ def reset_client() -> None:
354
+ """Reset singleton client."""
355
+ global _client
356
+ _client = None
@@ -0,0 +1,175 @@
1
+ """Collection methods mixin for RemoteClient."""
2
+
3
+ import json
4
+ import urllib.request
5
+ import urllib.parse
6
+ import urllib.error
7
+ from typing import Dict, List, Optional, Any
8
+
9
+
10
+ class CollectionsMixin:
11
+ """Mixin providing collection management methods for RemoteClient."""
12
+
13
+ def list_collections(self) -> List[Dict]:
14
+ """
15
+ List all collections.
16
+
17
+ Returns:
18
+ List of collection info dictionaries
19
+ """
20
+ data = self._request("/collections")
21
+ if not data:
22
+ return []
23
+ return data.get("collections", [])
24
+
25
+ def create_collection(
26
+ self,
27
+ name: str,
28
+ query: Optional[str] = None,
29
+ dois: Optional[List[str]] = None,
30
+ limit: int = 1000,
31
+ ) -> Dict:
32
+ """
33
+ Create a new collection from search query or DOI list.
34
+
35
+ Args:
36
+ name: Collection name
37
+ query: FTS search query (if dois not provided)
38
+ dois: Explicit list of DOIs
39
+ limit: Max papers for query mode
40
+
41
+ Returns:
42
+ Collection info dictionary
43
+ """
44
+ body = {"name": name, "limit": limit}
45
+ if query:
46
+ body["query"] = query
47
+ if dois:
48
+ body["dois"] = dois
49
+
50
+ result = self._request("/collections", method="POST", data=body)
51
+ return result or {}
52
+
53
+ def get_collection(
54
+ self,
55
+ name: str,
56
+ fields: Optional[List[str]] = None,
57
+ include_abstract: bool = False,
58
+ include_references: bool = False,
59
+ include_citations: bool = False,
60
+ year_min: Optional[int] = None,
61
+ year_max: Optional[int] = None,
62
+ journal: Optional[str] = None,
63
+ limit: Optional[int] = None,
64
+ ) -> Dict:
65
+ """
66
+ Query a collection with field filtering.
67
+
68
+ Args:
69
+ name: Collection name
70
+ fields: Explicit field list
71
+ include_abstract: Include abstracts
72
+ include_references: Include references
73
+ include_citations: Include citation counts
74
+ year_min: Filter by min year
75
+ year_max: Filter by max year
76
+ journal: Filter by journal
77
+ limit: Max results
78
+
79
+ Returns:
80
+ Dict with collection name, count, and papers
81
+ """
82
+ params = {
83
+ "include_abstract": include_abstract,
84
+ "include_references": include_references,
85
+ "include_citations": include_citations,
86
+ "year_min": year_min,
87
+ "year_max": year_max,
88
+ "journal": journal,
89
+ "limit": limit,
90
+ }
91
+ if fields:
92
+ params["fields"] = ",".join(fields)
93
+
94
+ data = self._request(f"/collections/{name}", params)
95
+ return data or {}
96
+
97
+ def get_collection_stats(self, name: str) -> Dict:
98
+ """
99
+ Get collection statistics.
100
+
101
+ Args:
102
+ name: Collection name
103
+
104
+ Returns:
105
+ Dict with year distribution, top journals, citation stats
106
+ """
107
+ data = self._request(f"/collections/{name}/stats")
108
+ return data or {}
109
+
110
+ def download_collection(
111
+ self,
112
+ name: str,
113
+ output_path: str,
114
+ format: str = "json",
115
+ fields: Optional[List[str]] = None,
116
+ ) -> str:
117
+ """
118
+ Download collection as a file.
119
+
120
+ Args:
121
+ name: Collection name
122
+ output_path: Local file path to save to
123
+ format: Export format (json, csv, bibtex, dois)
124
+ fields: Fields to include (json/csv)
125
+
126
+ Returns:
127
+ Output file path
128
+ """
129
+ params = {"format": format}
130
+ if fields:
131
+ params["fields"] = ",".join(fields)
132
+
133
+ url = f"{self.base_url}/collections/{name}/download"
134
+ if params:
135
+ url = f"{url}?{urllib.parse.urlencode(params)}"
136
+
137
+ try:
138
+ req = urllib.request.Request(url)
139
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
140
+ content = response.read()
141
+ with open(output_path, "wb") as f:
142
+ f.write(content)
143
+ return output_path
144
+ except urllib.error.HTTPError as e:
145
+ raise ConnectionError(f"Download failed: {e.code} {e.reason}") from e
146
+ except urllib.error.URLError as e:
147
+ raise ConnectionError(f"Cannot connect: {e.reason}") from e
148
+
149
+ def delete_collection(self, name: str) -> bool:
150
+ """
151
+ Delete a collection.
152
+
153
+ Args:
154
+ name: Collection name
155
+
156
+ Returns:
157
+ True if deleted
158
+ """
159
+ data = self._request(f"/collections/{name}", method="DELETE")
160
+ if not data:
161
+ return False
162
+ return data.get("deleted", False)
163
+
164
+ def collection_exists(self, name: str) -> bool:
165
+ """
166
+ Check if a collection exists.
167
+
168
+ Args:
169
+ name: Collection name
170
+
171
+ Returns:
172
+ True if exists
173
+ """
174
+ data = self._request(f"/collections/{name}/stats")
175
+ return data is not None
@@ -0,0 +1,140 @@
1
+ """FastAPI server for CrossRef Local with FTS5 search.
2
+
3
+ Modular server structure:
4
+ - routes_works.py: /works endpoints
5
+ - routes_citations.py: /citations endpoints
6
+ - routes_collections.py: /collections endpoints
7
+ - routes_compat.py: Legacy /api/* endpoints
8
+ - models.py: Pydantic response models
9
+ - middleware.py: Request middleware
10
+ """
11
+
12
+ import os
13
+
14
+ from fastapi import FastAPI
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+
17
+ from .. import __version__
18
+ from .middleware import UserContextMiddleware
19
+ from .routes_works import router as works_router
20
+ from .routes_citations import router as citations_router
21
+ from .routes_collections import router as collections_router
22
+ from .routes_compat import router as compat_router
23
+
24
+ # Create FastAPI app
25
+ app = FastAPI(
26
+ title="CrossRef Local API",
27
+ description="Fast full-text search across 167M+ scholarly works",
28
+ version=__version__,
29
+ )
30
+
31
+ # Middleware
32
+ app.add_middleware(UserContextMiddleware)
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"],
36
+ allow_methods=["*"],
37
+ allow_headers=["*"],
38
+ )
39
+
40
+ # Include routers
41
+ app.include_router(works_router)
42
+ app.include_router(citations_router)
43
+ app.include_router(collections_router)
44
+ app.include_router(compat_router)
45
+
46
+
47
+ @app.get("/")
48
+ def root():
49
+ """API root with endpoint information."""
50
+ return {
51
+ "name": "CrossRef Local API",
52
+ "version": __version__,
53
+ "status": "running",
54
+ "endpoints": {
55
+ "health": "/health",
56
+ "info": "/info",
57
+ "search": "/works?q=<query>",
58
+ "get_by_doi": "/works/{doi}",
59
+ "batch": "/works/batch",
60
+ "citations_citing": "/citations/{doi}/citing",
61
+ "citations_cited": "/citations/{doi}/cited",
62
+ "citations_count": "/citations/{doi}/count",
63
+ "citations_network": "/citations/{doi}/network",
64
+ "collections_list": "/collections",
65
+ "collections_create": "/collections (POST)",
66
+ "collections_get": "/collections/{name}",
67
+ "collections_stats": "/collections/{name}/stats",
68
+ "collections_download": "/collections/{name}/download",
69
+ "collections_delete": "/collections/{name} (DELETE)",
70
+ },
71
+ }
72
+
73
+
74
+ @app.get("/health")
75
+ def health():
76
+ """Health check endpoint."""
77
+ from .._core.db import get_db
78
+
79
+ db = get_db()
80
+ return {
81
+ "status": "healthy",
82
+ "database_connected": db is not None,
83
+ "database_path": str(db.db_path) if db else None,
84
+ }
85
+
86
+
87
+ @app.get("/info")
88
+ def info():
89
+ """Get database statistics."""
90
+ from .._core.db import get_db
91
+ from .models import InfoResponse
92
+
93
+ db = get_db()
94
+
95
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
96
+ work_count = row["count"] if row else 0
97
+
98
+ try:
99
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
100
+ fts_count = row["count"] if row else 0
101
+ except Exception:
102
+ fts_count = 0
103
+
104
+ try:
105
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
106
+ citation_count = row["count"] if row else 0
107
+ except Exception:
108
+ citation_count = 0
109
+
110
+ return InfoResponse(
111
+ total_papers=work_count,
112
+ fts_indexed=fts_count,
113
+ citations=citation_count,
114
+ database_path=str(db.db_path),
115
+ )
116
+
117
+
118
+ # Default port: SCITEX convention (3129X scheme)
119
+ DEFAULT_PORT = int(
120
+ os.environ.get(
121
+ "SCITEX_SCHOLAR_CROSSREF_PORT",
122
+ os.environ.get("CROSSREF_LOCAL_PORT", "31291"),
123
+ )
124
+ )
125
+ DEFAULT_HOST = os.environ.get(
126
+ "SCITEX_SCHOLAR_CROSSREF_HOST",
127
+ os.environ.get("CROSSREF_LOCAL_HOST", "0.0.0.0"),
128
+ )
129
+
130
+
131
+ def run_server(host: str = None, port: int = None):
132
+ """Run the FastAPI server."""
133
+ import uvicorn
134
+
135
+ host = host or DEFAULT_HOST
136
+ port = port or DEFAULT_PORT
137
+ uvicorn.run(app, host=host, port=port)
138
+
139
+
140
+ __all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
@@ -0,0 +1,25 @@
1
+ """Request middleware for CrossRef Local API."""
2
+
3
+ from fastapi import Request
4
+ from starlette.middleware.base import BaseHTTPMiddleware
5
+
6
+
7
+ class UserContextMiddleware(BaseHTTPMiddleware):
8
+ """Extract X-User-ID header for multi-tenant collection scoping.
9
+
10
+ When requests come through scitex-cloud gateway, it passes the
11
+ authenticated user's ID via X-User-ID header. This middleware
12
+ extracts it and makes it available via request.state.user_id.
13
+
14
+ Usage in endpoints:
15
+ @app.get("/collections")
16
+ def list_collections(request: Request):
17
+ user_id = request.state.user_id # None for local, set for cloud
18
+ ...
19
+ """
20
+
21
+ async def dispatch(self, request: Request, call_next):
22
+ # Extract user ID from header (passed by scitex-cloud gateway)
23
+ request.state.user_id = request.headers.get("X-User-ID")
24
+ response = await call_next(request)
25
+ return response