crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,175 @@
1
+ """Collection methods mixin for RemoteClient."""
2
+
3
+ import json
4
+ import urllib.request
5
+ import urllib.parse
6
+ import urllib.error
7
+ from typing import Dict, List, Optional, Any
8
+
9
+
10
+ class CollectionsMixin:
11
+ """Mixin providing collection management methods for RemoteClient."""
12
+
13
+ def list_collections(self) -> List[Dict]:
14
+ """
15
+ List all collections.
16
+
17
+ Returns:
18
+ List of collection info dictionaries
19
+ """
20
+ data = self._request("/collections")
21
+ if not data:
22
+ return []
23
+ return data.get("collections", [])
24
+
25
+ def create_collection(
26
+ self,
27
+ name: str,
28
+ query: Optional[str] = None,
29
+ dois: Optional[List[str]] = None,
30
+ limit: int = 1000,
31
+ ) -> Dict:
32
+ """
33
+ Create a new collection from search query or DOI list.
34
+
35
+ Args:
36
+ name: Collection name
37
+ query: FTS search query (if dois not provided)
38
+ dois: Explicit list of DOIs
39
+ limit: Max papers for query mode
40
+
41
+ Returns:
42
+ Collection info dictionary
43
+ """
44
+ body = {"name": name, "limit": limit}
45
+ if query:
46
+ body["query"] = query
47
+ if dois:
48
+ body["dois"] = dois
49
+
50
+ result = self._request("/collections", method="POST", data=body)
51
+ return result or {}
52
+
53
+ def get_collection(
54
+ self,
55
+ name: str,
56
+ fields: Optional[List[str]] = None,
57
+ include_abstract: bool = False,
58
+ include_references: bool = False,
59
+ include_citations: bool = False,
60
+ year_min: Optional[int] = None,
61
+ year_max: Optional[int] = None,
62
+ journal: Optional[str] = None,
63
+ limit: Optional[int] = None,
64
+ ) -> Dict:
65
+ """
66
+ Query a collection with field filtering.
67
+
68
+ Args:
69
+ name: Collection name
70
+ fields: Explicit field list
71
+ include_abstract: Include abstracts
72
+ include_references: Include references
73
+ include_citations: Include citation counts
74
+ year_min: Filter by min year
75
+ year_max: Filter by max year
76
+ journal: Filter by journal
77
+ limit: Max results
78
+
79
+ Returns:
80
+ Dict with collection name, count, and papers
81
+ """
82
+ params = {
83
+ "include_abstract": include_abstract,
84
+ "include_references": include_references,
85
+ "include_citations": include_citations,
86
+ "year_min": year_min,
87
+ "year_max": year_max,
88
+ "journal": journal,
89
+ "limit": limit,
90
+ }
91
+ if fields:
92
+ params["fields"] = ",".join(fields)
93
+
94
+ data = self._request(f"/collections/{name}", params)
95
+ return data or {}
96
+
97
+ def get_collection_stats(self, name: str) -> Dict:
98
+ """
99
+ Get collection statistics.
100
+
101
+ Args:
102
+ name: Collection name
103
+
104
+ Returns:
105
+ Dict with year distribution, top journals, citation stats
106
+ """
107
+ data = self._request(f"/collections/{name}/stats")
108
+ return data or {}
109
+
110
+ def download_collection(
111
+ self,
112
+ name: str,
113
+ output_path: str,
114
+ format: str = "json",
115
+ fields: Optional[List[str]] = None,
116
+ ) -> str:
117
+ """
118
+ Download collection as a file.
119
+
120
+ Args:
121
+ name: Collection name
122
+ output_path: Local file path to save to
123
+ format: Export format (json, csv, bibtex, dois)
124
+ fields: Fields to include (json/csv)
125
+
126
+ Returns:
127
+ Output file path
128
+ """
129
+ params = {"format": format}
130
+ if fields:
131
+ params["fields"] = ",".join(fields)
132
+
133
+ url = f"{self.base_url}/collections/{name}/download"
134
+ if params:
135
+ url = f"{url}?{urllib.parse.urlencode(params)}"
136
+
137
+ try:
138
+ req = urllib.request.Request(url)
139
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
140
+ content = response.read()
141
+ with open(output_path, "wb") as f:
142
+ f.write(content)
143
+ return output_path
144
+ except urllib.error.HTTPError as e:
145
+ raise ConnectionError(f"Download failed: {e.code} {e.reason}") from e
146
+ except urllib.error.URLError as e:
147
+ raise ConnectionError(f"Cannot connect: {e.reason}") from e
148
+
149
+ def delete_collection(self, name: str) -> bool:
150
+ """
151
+ Delete a collection.
152
+
153
+ Args:
154
+ name: Collection name
155
+
156
+ Returns:
157
+ True if deleted
158
+ """
159
+ data = self._request(f"/collections/{name}", method="DELETE")
160
+ if not data:
161
+ return False
162
+ return data.get("deleted", False)
163
+
164
+ def collection_exists(self, name: str) -> bool:
165
+ """
166
+ Check if a collection exists.
167
+
168
+ Args:
169
+ name: Collection name
170
+
171
+ Returns:
172
+ True if exists
173
+ """
174
+ data = self._request(f"/collections/{name}/stats")
175
+ return data is not None
@@ -0,0 +1,140 @@
1
+ """FastAPI server for CrossRef Local with FTS5 search.
2
+
3
+ Modular server structure:
4
+ - routes_works.py: /works endpoints
5
+ - routes_citations.py: /citations endpoints
6
+ - routes_collections.py: /collections endpoints
7
+ - routes_compat.py: Legacy /api/* endpoints
8
+ - models.py: Pydantic response models
9
+ - middleware.py: Request middleware
10
+ """
11
+
12
+ import os
13
+
14
+ from fastapi import FastAPI
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+
17
+ from .. import __version__
18
+ from .middleware import UserContextMiddleware
19
+ from .routes_works import router as works_router
20
+ from .routes_citations import router as citations_router
21
+ from .routes_collections import router as collections_router
22
+ from .routes_compat import router as compat_router
23
+
24
+ # Create FastAPI app
25
+ app = FastAPI(
26
+ title="CrossRef Local API",
27
+ description="Fast full-text search across 167M+ scholarly works",
28
+ version=__version__,
29
+ )
30
+
31
+ # Middleware
32
+ app.add_middleware(UserContextMiddleware)
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"],
36
+ allow_methods=["*"],
37
+ allow_headers=["*"],
38
+ )
39
+
40
+ # Include routers
41
+ app.include_router(works_router)
42
+ app.include_router(citations_router)
43
+ app.include_router(collections_router)
44
+ app.include_router(compat_router)
45
+
46
+
47
+ @app.get("/")
48
+ def root():
49
+ """API root with endpoint information."""
50
+ return {
51
+ "name": "CrossRef Local API",
52
+ "version": __version__,
53
+ "status": "running",
54
+ "endpoints": {
55
+ "health": "/health",
56
+ "info": "/info",
57
+ "search": "/works?q=<query>",
58
+ "get_by_doi": "/works/{doi}",
59
+ "batch": "/works/batch",
60
+ "citations_citing": "/citations/{doi}/citing",
61
+ "citations_cited": "/citations/{doi}/cited",
62
+ "citations_count": "/citations/{doi}/count",
63
+ "citations_network": "/citations/{doi}/network",
64
+ "collections_list": "/collections",
65
+ "collections_create": "/collections (POST)",
66
+ "collections_get": "/collections/{name}",
67
+ "collections_stats": "/collections/{name}/stats",
68
+ "collections_download": "/collections/{name}/download",
69
+ "collections_delete": "/collections/{name} (DELETE)",
70
+ },
71
+ }
72
+
73
+
74
+ @app.get("/health")
75
+ def health():
76
+ """Health check endpoint."""
77
+ from .._core.db import get_db
78
+
79
+ db = get_db()
80
+ return {
81
+ "status": "healthy",
82
+ "database_connected": db is not None,
83
+ "database_path": str(db.db_path) if db else None,
84
+ }
85
+
86
+
87
+ @app.get("/info")
88
+ def info():
89
+ """Get database statistics."""
90
+ from .._core.db import get_db
91
+ from .models import InfoResponse
92
+
93
+ db = get_db()
94
+
95
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
96
+ work_count = row["count"] if row else 0
97
+
98
+ try:
99
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
100
+ fts_count = row["count"] if row else 0
101
+ except Exception:
102
+ fts_count = 0
103
+
104
+ try:
105
+ row = db.fetchone("SELECT COUNT(*) as count FROM citations")
106
+ citation_count = row["count"] if row else 0
107
+ except Exception:
108
+ citation_count = 0
109
+
110
+ return InfoResponse(
111
+ total_papers=work_count,
112
+ fts_indexed=fts_count,
113
+ citations=citation_count,
114
+ database_path=str(db.db_path),
115
+ )
116
+
117
+
118
+ # Default port: SCITEX convention (3129X scheme)
119
+ DEFAULT_PORT = int(
120
+ os.environ.get(
121
+ "SCITEX_SCHOLAR_CROSSREF_PORT",
122
+ os.environ.get("CROSSREF_LOCAL_PORT", "31291"),
123
+ )
124
+ )
125
+ DEFAULT_HOST = os.environ.get(
126
+ "SCITEX_SCHOLAR_CROSSREF_HOST",
127
+ os.environ.get("CROSSREF_LOCAL_HOST", "0.0.0.0"),
128
+ )
129
+
130
+
131
+ def run_server(host: str = None, port: int = None):
132
+ """Run the FastAPI server."""
133
+ import uvicorn
134
+
135
+ host = host or DEFAULT_HOST
136
+ port = port or DEFAULT_PORT
137
+ uvicorn.run(app, host=host, port=port)
138
+
139
+
140
+ __all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
@@ -0,0 +1,25 @@
1
+ """Request middleware for CrossRef Local API."""
2
+
3
+ from fastapi import Request
4
+ from starlette.middleware.base import BaseHTTPMiddleware
5
+
6
+
7
+ class UserContextMiddleware(BaseHTTPMiddleware):
8
+ """Extract X-User-ID header for multi-tenant collection scoping.
9
+
10
+ When requests come through scitex-cloud gateway, it passes the
11
+ authenticated user's ID via X-User-ID header. This middleware
12
+ extracts it and makes it available via request.state.user_id.
13
+
14
+ Usage in endpoints:
15
+ @app.get("/collections")
16
+ def list_collections(request: Request):
17
+ user_id = request.state.user_id # None for local, set for cloud
18
+ ...
19
+ """
20
+
21
+ async def dispatch(self, request: Request, call_next):
22
+ # Extract user ID from header (passed by scitex-cloud gateway)
23
+ request.state.user_id = request.headers.get("X-User-ID")
24
+ response = await call_next(request)
25
+ return response
@@ -0,0 +1,129 @@
1
+ """Pydantic models for API responses."""
2
+
3
+ from typing import Optional, List
4
+ from pydantic import BaseModel
5
+
6
+ from .. import __version__
7
+
8
+
9
+ class WorkResponse(BaseModel):
10
+ """Work metadata response."""
11
+
12
+ doi: str
13
+ title: Optional[str] = None
14
+ authors: List[str] = []
15
+ year: Optional[int] = None
16
+ journal: Optional[str] = None
17
+ issn: Optional[str] = None
18
+ volume: Optional[str] = None
19
+ issue: Optional[str] = None
20
+ page: Optional[str] = None
21
+ abstract: Optional[str] = None
22
+ citation_count: Optional[int] = None
23
+
24
+
25
+ class SearchResponse(BaseModel):
26
+ """Search results response."""
27
+
28
+ query: str
29
+ total: int
30
+ returned: int
31
+ elapsed_ms: float
32
+ results: List[WorkResponse]
33
+
34
+
35
+ class InfoResponse(BaseModel):
36
+ """Database info response."""
37
+
38
+ name: str = "CrossRef Local API"
39
+ version: str = __version__
40
+ status: str = "running"
41
+ mode: str = "local"
42
+ total_papers: int
43
+ fts_indexed: int
44
+ citations: int
45
+ database_path: str
46
+
47
+
48
+ class BatchRequest(BaseModel):
49
+ """Batch DOI lookup request."""
50
+
51
+ dois: List[str]
52
+
53
+
54
+ class BatchResponse(BaseModel):
55
+ """Batch DOI lookup response."""
56
+
57
+ requested: int
58
+ found: int
59
+ results: List[WorkResponse]
60
+
61
+
62
+ # Citation models
63
+ class CitingResponse(BaseModel):
64
+ """Papers citing a DOI."""
65
+
66
+ doi: str
67
+ citing_count: int
68
+ papers: List[str]
69
+
70
+
71
+ class CitedResponse(BaseModel):
72
+ """Papers cited by a DOI."""
73
+
74
+ doi: str
75
+ cited_count: int
76
+ papers: List[str]
77
+
78
+
79
+ class CitationCountResponse(BaseModel):
80
+ """Citation count for a DOI."""
81
+
82
+ doi: str
83
+ citation_count: int
84
+
85
+
86
+ class CitationNetworkResponse(BaseModel):
87
+ """Citation network graph."""
88
+
89
+ center_doi: str
90
+ depth: int
91
+ total_nodes: int
92
+ total_edges: int
93
+ nodes: List[dict]
94
+ edges: List[dict]
95
+
96
+
97
+ # Collection models
98
+ class CollectionCreateRequest(BaseModel):
99
+ """Create collection request."""
100
+
101
+ name: str
102
+ query: Optional[str] = None
103
+ dois: Optional[List[str]] = None
104
+ limit: int = 1000
105
+
106
+
107
+ class CollectionInfo(BaseModel):
108
+ """Collection information."""
109
+
110
+ name: str
111
+ path: str
112
+ size_bytes: int
113
+ size_mb: float
114
+ paper_count: int
115
+ created_at: str
116
+ query: Optional[str] = None
117
+
118
+
119
+ class CollectionQueryRequest(BaseModel):
120
+ """Query collection request."""
121
+
122
+ fields: Optional[List[str]] = None
123
+ include_abstract: bool = False
124
+ include_references: bool = False
125
+ include_citations: bool = False
126
+ year_min: Optional[int] = None
127
+ year_max: Optional[int] = None
128
+ journal: Optional[str] = None
129
+ limit: Optional[int] = None
@@ -0,0 +1,98 @@
1
+ """Citation network endpoints."""
2
+
3
+ from fastapi import APIRouter, Query
4
+
5
+ from .._core.citations import get_citing, get_cited, get_citation_count, CitationNetwork
6
+ from .models import (
7
+ CitingResponse,
8
+ CitedResponse,
9
+ CitationCountResponse,
10
+ CitationNetworkResponse,
11
+ )
12
+
13
+ router = APIRouter(prefix="/citations", tags=["citations"])
14
+
15
+
16
+ @router.get("/{doi:path}/citing", response_model=CitingResponse)
17
+ def get_citing_papers(
18
+ doi: str,
19
+ limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
20
+ ):
21
+ """
22
+ Get papers that cite this DOI.
23
+
24
+ Examples:
25
+ /citations/10.1038/nature12373/citing
26
+ /citations/10.1038/nature12373/citing?limit=50
27
+ """
28
+ citing_dois = get_citing(doi, limit=limit)
29
+ return CitingResponse(
30
+ doi=doi,
31
+ citing_count=len(citing_dois),
32
+ papers=citing_dois,
33
+ )
34
+
35
+
36
+ @router.get("/{doi:path}/cited", response_model=CitedResponse)
37
+ def get_cited_papers(
38
+ doi: str,
39
+ limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
40
+ ):
41
+ """
42
+ Get papers cited by this DOI (references).
43
+
44
+ Examples:
45
+ /citations/10.1038/nature12373/cited
46
+ /citations/10.1038/nature12373/cited?limit=50
47
+ """
48
+ cited_dois = get_cited(doi, limit=limit)
49
+ return CitedResponse(
50
+ doi=doi,
51
+ cited_count=len(cited_dois),
52
+ papers=cited_dois,
53
+ )
54
+
55
+
56
+ @router.get("/{doi:path}/count", response_model=CitationCountResponse)
57
+ def get_citation_count_endpoint(doi: str):
58
+ """
59
+ Get citation count for a DOI.
60
+
61
+ Examples:
62
+ /citations/10.1038/nature12373/count
63
+ """
64
+ count = get_citation_count(doi)
65
+ return CitationCountResponse(doi=doi, citation_count=count)
66
+
67
+
68
+ @router.get("/{doi:path}/network", response_model=CitationNetworkResponse)
69
+ def get_citation_network(
70
+ doi: str,
71
+ depth: int = Query(1, ge=1, le=3, description="Network depth (1-3)"),
72
+ max_citing: int = Query(25, ge=1, le=100, description="Max citing per node"),
73
+ max_cited: int = Query(25, ge=1, le=100, description="Max cited per node"),
74
+ ):
75
+ """
76
+ Get citation network graph for a DOI.
77
+
78
+ Returns nodes (papers) and edges (citation relationships).
79
+
80
+ Examples:
81
+ /citations/10.1038/nature12373/network
82
+ /citations/10.1038/nature12373/network?depth=2&max_citing=50
83
+ """
84
+ network = CitationNetwork(
85
+ doi,
86
+ depth=depth,
87
+ max_citing=max_citing,
88
+ max_cited=max_cited,
89
+ )
90
+ data = network.to_dict()
91
+ return CitationNetworkResponse(
92
+ center_doi=data["center_doi"],
93
+ depth=data["depth"],
94
+ total_nodes=data["stats"]["total_nodes"],
95
+ total_edges=data["stats"]["total_edges"],
96
+ nodes=data["nodes"],
97
+ edges=data["edges"],
98
+ )