crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +18 -10
  2. crossref_local/_aio/__init__.py +30 -0
  3. crossref_local/_aio/_impl.py +238 -0
  4. crossref_local/_cache/__init__.py +15 -0
  5. crossref_local/{cache_export.py → _cache/export.py} +27 -10
  6. crossref_local/_cache/utils.py +93 -0
  7. crossref_local/_cli/__init__.py +9 -0
  8. crossref_local/_cli/cli.py +512 -0
  9. crossref_local/_cli/mcp.py +351 -0
  10. crossref_local/_cli/mcp_server.py +413 -0
  11. crossref_local/_core/__init__.py +58 -0
  12. crossref_local/{api.py → _core/api.py} +24 -5
  13. crossref_local/{citations.py → _core/citations.py} +55 -26
  14. crossref_local/{config.py → _core/config.py} +40 -22
  15. crossref_local/{db.py → _core/db.py} +32 -26
  16. crossref_local/{fts.py → _core/fts.py} +18 -14
  17. crossref_local/{models.py → _core/models.py} +11 -6
  18. crossref_local/_remote/__init__.py +56 -0
  19. crossref_local/_remote/base.py +356 -0
  20. crossref_local/_remote/collections.py +175 -0
  21. crossref_local/_server/__init__.py +140 -0
  22. crossref_local/_server/middleware.py +25 -0
  23. crossref_local/_server/models.py +129 -0
  24. crossref_local/_server/routes_citations.py +98 -0
  25. crossref_local/_server/routes_collections.py +282 -0
  26. crossref_local/_server/routes_compat.py +102 -0
  27. crossref_local/_server/routes_works.py +128 -0
  28. crossref_local/_server/server.py +19 -0
  29. crossref_local/aio.py +30 -206
  30. crossref_local/cache.py +100 -100
  31. crossref_local/cli.py +5 -515
  32. crossref_local/jobs.py +169 -0
  33. crossref_local/mcp_server.py +5 -410
  34. crossref_local/remote.py +5 -266
  35. crossref_local/server.py +5 -349
  36. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
  37. crossref_local-0.5.0.dist-info/RECORD +47 -0
  38. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
  39. crossref_local/cli_mcp.py +0 -275
  40. crossref_local-0.4.0.dist-info/RECORD +0 -27
  41. /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
  42. /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
  43. /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
  44. /crossref_local/{cli_main.py → _cli/main.py} +0 -0
  45. /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  46. /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  47. /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  48. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,129 @@
1
+ """Pydantic models for API responses."""
2
+
3
+ from typing import Optional, List
4
+ from pydantic import BaseModel
5
+
6
+ from .. import __version__
7
+
8
+
9
+ class WorkResponse(BaseModel):
10
+ """Work metadata response."""
11
+
12
+ doi: str
13
+ title: Optional[str] = None
14
+ authors: List[str] = []
15
+ year: Optional[int] = None
16
+ journal: Optional[str] = None
17
+ issn: Optional[str] = None
18
+ volume: Optional[str] = None
19
+ issue: Optional[str] = None
20
+ page: Optional[str] = None
21
+ abstract: Optional[str] = None
22
+ citation_count: Optional[int] = None
23
+
24
+
25
+ class SearchResponse(BaseModel):
26
+ """Search results response."""
27
+
28
+ query: str
29
+ total: int
30
+ returned: int
31
+ elapsed_ms: float
32
+ results: List[WorkResponse]
33
+
34
+
35
+ class InfoResponse(BaseModel):
36
+ """Database info response."""
37
+
38
+ name: str = "CrossRef Local API"
39
+ version: str = __version__
40
+ status: str = "running"
41
+ mode: str = "local"
42
+ total_papers: int
43
+ fts_indexed: int
44
+ citations: int
45
+ database_path: str
46
+
47
+
48
+ class BatchRequest(BaseModel):
49
+ """Batch DOI lookup request."""
50
+
51
+ dois: List[str]
52
+
53
+
54
+ class BatchResponse(BaseModel):
55
+ """Batch DOI lookup response."""
56
+
57
+ requested: int
58
+ found: int
59
+ results: List[WorkResponse]
60
+
61
+
62
+ # Citation models
63
+ class CitingResponse(BaseModel):
64
+ """Papers citing a DOI."""
65
+
66
+ doi: str
67
+ citing_count: int
68
+ papers: List[str]
69
+
70
+
71
+ class CitedResponse(BaseModel):
72
+ """Papers cited by a DOI."""
73
+
74
+ doi: str
75
+ cited_count: int
76
+ papers: List[str]
77
+
78
+
79
+ class CitationCountResponse(BaseModel):
80
+ """Citation count for a DOI."""
81
+
82
+ doi: str
83
+ citation_count: int
84
+
85
+
86
+ class CitationNetworkResponse(BaseModel):
87
+ """Citation network graph."""
88
+
89
+ center_doi: str
90
+ depth: int
91
+ total_nodes: int
92
+ total_edges: int
93
+ nodes: List[dict]
94
+ edges: List[dict]
95
+
96
+
97
+ # Collection models
98
+ class CollectionCreateRequest(BaseModel):
99
+ """Create collection request."""
100
+
101
+ name: str
102
+ query: Optional[str] = None
103
+ dois: Optional[List[str]] = None
104
+ limit: int = 1000
105
+
106
+
107
+ class CollectionInfo(BaseModel):
108
+ """Collection information."""
109
+
110
+ name: str
111
+ path: str
112
+ size_bytes: int
113
+ size_mb: float
114
+ paper_count: int
115
+ created_at: str
116
+ query: Optional[str] = None
117
+
118
+
119
+ class CollectionQueryRequest(BaseModel):
120
+ """Query collection request."""
121
+
122
+ fields: Optional[List[str]] = None
123
+ include_abstract: bool = False
124
+ include_references: bool = False
125
+ include_citations: bool = False
126
+ year_min: Optional[int] = None
127
+ year_max: Optional[int] = None
128
+ journal: Optional[str] = None
129
+ limit: Optional[int] = None
@@ -0,0 +1,98 @@
1
+ """Citation network endpoints."""
2
+
3
+ from fastapi import APIRouter, Query
4
+
5
+ from .._core.citations import get_citing, get_cited, get_citation_count, CitationNetwork
6
+ from .models import (
7
+ CitingResponse,
8
+ CitedResponse,
9
+ CitationCountResponse,
10
+ CitationNetworkResponse,
11
+ )
12
+
13
+ router = APIRouter(prefix="/citations", tags=["citations"])
14
+
15
+
16
+ @router.get("/{doi:path}/citing", response_model=CitingResponse)
17
+ def get_citing_papers(
18
+ doi: str,
19
+ limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
20
+ ):
21
+ """
22
+ Get papers that cite this DOI.
23
+
24
+ Examples:
25
+ /citations/10.1038/nature12373/citing
26
+ /citations/10.1038/nature12373/citing?limit=50
27
+ """
28
+ citing_dois = get_citing(doi, limit=limit)
29
+ return CitingResponse(
30
+ doi=doi,
31
+ citing_count=len(citing_dois),
32
+ papers=citing_dois,
33
+ )
34
+
35
+
36
+ @router.get("/{doi:path}/cited", response_model=CitedResponse)
37
+ def get_cited_papers(
38
+ doi: str,
39
+ limit: int = Query(100, ge=1, le=1000, description="Max papers to return"),
40
+ ):
41
+ """
42
+ Get papers cited by this DOI (references).
43
+
44
+ Examples:
45
+ /citations/10.1038/nature12373/cited
46
+ /citations/10.1038/nature12373/cited?limit=50
47
+ """
48
+ cited_dois = get_cited(doi, limit=limit)
49
+ return CitedResponse(
50
+ doi=doi,
51
+ cited_count=len(cited_dois),
52
+ papers=cited_dois,
53
+ )
54
+
55
+
56
+ @router.get("/{doi:path}/count", response_model=CitationCountResponse)
57
+ def get_citation_count_endpoint(doi: str):
58
+ """
59
+ Get citation count for a DOI.
60
+
61
+ Examples:
62
+ /citations/10.1038/nature12373/count
63
+ """
64
+ count = get_citation_count(doi)
65
+ return CitationCountResponse(doi=doi, citation_count=count)
66
+
67
+
68
+ @router.get("/{doi:path}/network", response_model=CitationNetworkResponse)
69
+ def get_citation_network(
70
+ doi: str,
71
+ depth: int = Query(1, ge=1, le=3, description="Network depth (1-3)"),
72
+ max_citing: int = Query(25, ge=1, le=100, description="Max citing per node"),
73
+ max_cited: int = Query(25, ge=1, le=100, description="Max cited per node"),
74
+ ):
75
+ """
76
+ Get citation network graph for a DOI.
77
+
78
+ Returns nodes (papers) and edges (citation relationships).
79
+
80
+ Examples:
81
+ /citations/10.1038/nature12373/network
82
+ /citations/10.1038/nature12373/network?depth=2&max_citing=50
83
+ """
84
+ network = CitationNetwork(
85
+ doi,
86
+ depth=depth,
87
+ max_citing=max_citing,
88
+ max_cited=max_cited,
89
+ )
90
+ data = network.to_dict()
91
+ return CitationNetworkResponse(
92
+ center_doi=data["center_doi"],
93
+ depth=data["depth"],
94
+ total_nodes=data["stats"]["total_nodes"],
95
+ total_edges=data["stats"]["total_edges"],
96
+ nodes=data["nodes"],
97
+ edges=data["edges"],
98
+ )
@@ -0,0 +1,282 @@
1
+ """Collection management endpoints with file download support."""
2
+
3
+ import tempfile
4
+ from typing import Optional
5
+
6
+ from fastapi import APIRouter, Query, HTTPException, Request
7
+ from fastapi.responses import FileResponse
8
+
9
+ from .. import cache
10
+ from .._cache.utils import sanitize_name
11
+ from .models import CollectionCreateRequest, CollectionInfo
12
+
13
+
14
+ # Allowed fields for field filtering (whitelist)
15
+ ALLOWED_FIELDS = {
16
+ "doi",
17
+ "title",
18
+ "authors",
19
+ "year",
20
+ "journal",
21
+ "volume",
22
+ "issue",
23
+ "page",
24
+ "abstract",
25
+ "citation_count",
26
+ "references",
27
+ "issn",
28
+ "publisher",
29
+ }
30
+
31
+ # Maximum limits
32
+ MAX_LIMIT = 10000
33
+ MAX_DOIS = 1000
34
+
35
+ router = APIRouter(prefix="/collections", tags=["collections"])
36
+
37
+
38
+ def _get_user_id(request: Request) -> Optional[str]:
39
+ """Get user ID from request state (set by middleware)."""
40
+ return getattr(request.state, "user_id", None)
41
+
42
+
43
+ @router.get("")
44
+ def list_collections(request: Request):
45
+ """
46
+ List all collections.
47
+
48
+ For cloud API (with X-User-ID header), returns only user's collections.
49
+ For local API, returns all collections.
50
+ """
51
+ user_id = _get_user_id(request)
52
+ caches = cache.list_caches(user_id=user_id)
53
+ return {
54
+ "count": len(caches),
55
+ "collections": [c.to_dict() for c in caches],
56
+ }
57
+
58
+
59
+ @router.post("", response_model=CollectionInfo)
60
+ def create_collection(request: Request, body: CollectionCreateRequest):
61
+ """
62
+ Create a new collection from search query or DOI list.
63
+
64
+ Request body:
65
+ {"name": "epilepsy", "query": "epilepsy seizure", "limit": 500}
66
+ or
67
+ {"name": "my_papers", "dois": ["10.1038/...", "10.1016/..."]}
68
+ """
69
+ user_id = _get_user_id(request)
70
+
71
+ # Validate collection name
72
+ try:
73
+ sanitize_name(body.name)
74
+ except ValueError as e:
75
+ raise HTTPException(status_code=400, detail=str(e))
76
+
77
+ if not body.query and not body.dois:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail="Must provide 'query' or 'dois'",
81
+ )
82
+
83
+ # Validate limits
84
+ if body.limit > MAX_LIMIT:
85
+ raise HTTPException(
86
+ status_code=400,
87
+ detail=f"Limit exceeds maximum ({MAX_LIMIT})",
88
+ )
89
+
90
+ if body.dois and len(body.dois) > MAX_DOIS:
91
+ raise HTTPException(
92
+ status_code=400,
93
+ detail=f"Too many DOIs ({len(body.dois)}). Maximum: {MAX_DOIS}",
94
+ )
95
+
96
+ try:
97
+ info = cache.create(
98
+ body.name,
99
+ query=body.query,
100
+ dois=body.dois,
101
+ limit=body.limit,
102
+ user_id=user_id,
103
+ )
104
+ return CollectionInfo(**info.to_dict())
105
+ except ValueError as e:
106
+ raise HTTPException(status_code=400, detail=str(e))
107
+ except Exception as e:
108
+ raise HTTPException(status_code=500, detail=str(e))
109
+
110
+
111
+ @router.get("/{name}")
112
+ def query_collection(
113
+ name: str,
114
+ request: Request,
115
+ fields: Optional[str] = Query(None, description="Comma-separated field list"),
116
+ include_abstract: bool = Query(False, description="Include abstracts"),
117
+ include_references: bool = Query(False, description="Include references"),
118
+ include_citations: bool = Query(False, description="Include citation counts"),
119
+ year_min: Optional[int] = Query(None, description="Filter by min year"),
120
+ year_max: Optional[int] = Query(None, description="Filter by max year"),
121
+ journal: Optional[str] = Query(None, description="Filter by journal"),
122
+ limit: Optional[int] = Query(None, description="Max results"),
123
+ ):
124
+ """
125
+ Query a collection with field filtering.
126
+
127
+ Returns minimal data to reduce response size.
128
+ Use 'fields' parameter to specify exactly which fields to return.
129
+
130
+ Examples:
131
+ /collections/epilepsy?fields=doi,title,year
132
+ /collections/epilepsy?year_min=2020&include_citations=true
133
+ """
134
+ user_id = _get_user_id(request)
135
+
136
+ # Validate collection name
137
+ try:
138
+ sanitize_name(name)
139
+ except ValueError as e:
140
+ raise HTTPException(status_code=400, detail=str(e))
141
+
142
+ if not cache.exists(name, user_id=user_id):
143
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
144
+
145
+ # Validate and filter fields
146
+ field_list = None
147
+ if fields:
148
+ field_list = [f.strip() for f in fields.split(",")]
149
+ invalid_fields = set(field_list) - ALLOWED_FIELDS
150
+ if invalid_fields:
151
+ raise HTTPException(
152
+ status_code=400,
153
+ detail=f"Invalid fields: {invalid_fields}. Allowed: {ALLOWED_FIELDS}",
154
+ )
155
+
156
+ papers = cache.query(
157
+ name,
158
+ fields=field_list,
159
+ include_abstract=include_abstract,
160
+ include_references=include_references,
161
+ include_citations=include_citations,
162
+ year_min=year_min,
163
+ year_max=year_max,
164
+ journal=journal,
165
+ limit=limit,
166
+ user_id=user_id,
167
+ )
168
+
169
+ return {
170
+ "name": name,
171
+ "count": len(papers),
172
+ "papers": papers,
173
+ }
174
+
175
+
176
+ @router.get("/{name}/stats")
177
+ def collection_stats(name: str, request: Request):
178
+ """
179
+ Get collection statistics.
180
+
181
+ Returns year distribution, top journals, citation stats.
182
+ """
183
+ user_id = _get_user_id(request)
184
+
185
+ try:
186
+ sanitize_name(name)
187
+ except ValueError as e:
188
+ raise HTTPException(status_code=400, detail=str(e))
189
+
190
+ if not cache.exists(name, user_id=user_id):
191
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
192
+
193
+ stats = cache.stats(name, user_id=user_id)
194
+ return {"name": name, **stats}
195
+
196
+
197
+ @router.get("/{name}/download")
198
+ def download_collection(
199
+ name: str,
200
+ request: Request,
201
+ format: str = Query("json", description="Export format: json, csv, bibtex, dois"),
202
+ fields: Optional[str] = Query(None, description="Fields to include (json/csv)"),
203
+ ):
204
+ """
205
+ Download collection as a file.
206
+
207
+ Supports multiple formats:
208
+ - json: Full JSON with all fields or specified fields
209
+ - csv: CSV format with specified fields
210
+ - bibtex: BibTeX format for bibliography
211
+ - dois: Plain text list of DOIs
212
+
213
+ Examples:
214
+ /collections/epilepsy/download?format=json
215
+ /collections/epilepsy/download?format=bibtex
216
+ /collections/epilepsy/download?format=csv&fields=doi,title,year
217
+ """
218
+ user_id = _get_user_id(request)
219
+
220
+ try:
221
+ sanitize_name(name)
222
+ except ValueError as e:
223
+ raise HTTPException(status_code=400, detail=str(e))
224
+
225
+ if not cache.exists(name, user_id=user_id):
226
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
227
+
228
+ # Determine file extension and media type
229
+ format_info = {
230
+ "json": ("application/json", ".json"),
231
+ "csv": ("text/csv", ".csv"),
232
+ "bibtex": ("application/x-bibtex", ".bib"),
233
+ "dois": ("text/plain", ".txt"),
234
+ }
235
+
236
+ if format not in format_info:
237
+ raise HTTPException(
238
+ status_code=400,
239
+ detail=f"Unsupported format: {format}. Use: json, csv, bibtex, dois",
240
+ )
241
+
242
+ media_type, ext = format_info[format]
243
+ filename = f"{name}{ext}"
244
+
245
+ # Export to temporary file
246
+ with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False) as tmp:
247
+ field_list = fields.split(",") if fields else None
248
+ cache.export(
249
+ name,
250
+ tmp.name,
251
+ format=format,
252
+ fields=field_list,
253
+ user_id=user_id,
254
+ )
255
+ tmp_path = tmp.name
256
+
257
+ return FileResponse(
258
+ tmp_path,
259
+ media_type=media_type,
260
+ filename=filename,
261
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
262
+ )
263
+
264
+
265
+ @router.delete("/{name}")
266
+ def delete_collection(name: str, request: Request):
267
+ """
268
+ Delete a collection.
269
+ """
270
+ user_id = _get_user_id(request)
271
+
272
+ try:
273
+ sanitize_name(name)
274
+ except ValueError as e:
275
+ raise HTTPException(status_code=400, detail=str(e))
276
+
277
+ if not cache.exists(name, user_id=user_id):
278
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
279
+
280
+ deleted = cache.delete(name, user_id=user_id)
281
+
282
+ return {"deleted": deleted, "name": name}
@@ -0,0 +1,102 @@
1
+ """Backwards-compatible legacy API endpoints."""
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+
7
+ from .._core import fts
8
+ from .._core.db import get_db
9
+ from .._core.models import Work
10
+ from .models import WorkResponse
11
+ from .routes_works import get_work
12
+
13
+ router = APIRouter(prefix="/api", tags=["legacy"])
14
+
15
+
16
+ @router.get("/search/")
17
+ def api_search_compat(
18
+ title: Optional[str] = None,
19
+ q: Optional[str] = None,
20
+ doi: Optional[str] = None,
21
+ limit: int = 10,
22
+ ):
23
+ """Backwards-compatible search endpoint."""
24
+ query = title or q
25
+
26
+ if doi:
27
+ # DOI lookup
28
+ try:
29
+ work = get_work(doi)
30
+ return {
31
+ "query": {"doi": doi},
32
+ "results": [work.model_dump()],
33
+ "total": 1,
34
+ "returned": 1,
35
+ }
36
+ except HTTPException:
37
+ return {"query": {"doi": doi}, "results": [], "total": 0, "returned": 0}
38
+
39
+ if not query:
40
+ raise HTTPException(
41
+ status_code=400, detail="Specify q, title, or doi parameter"
42
+ )
43
+
44
+ # Call fts.search directly (not the endpoint function)
45
+ results = fts.search(query, limit=limit, offset=0)
46
+ return {
47
+ "query": {
48
+ "title": query,
49
+ "doi": None,
50
+ "year": None,
51
+ "authors": None,
52
+ "limit": limit,
53
+ },
54
+ "results": [
55
+ WorkResponse(
56
+ doi=w.doi,
57
+ title=w.title,
58
+ authors=w.authors,
59
+ year=w.year,
60
+ journal=w.journal,
61
+ issn=w.issn,
62
+ volume=w.volume,
63
+ issue=w.issue,
64
+ page=w.page,
65
+ abstract=w.abstract,
66
+ citation_count=w.citation_count,
67
+ ).model_dump()
68
+ for w in results.works
69
+ ],
70
+ "total": results.total,
71
+ "returned": len(results.works),
72
+ }
73
+
74
+
75
+ @router.get("/stats/")
76
+ def api_stats_compat():
77
+ """Backwards-compatible stats endpoint."""
78
+ db = get_db()
79
+
80
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
81
+ work_count = row["count"] if row else 0
82
+
83
+ # Get table names
84
+ tables = []
85
+ for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='table'"):
86
+ tables.append(row["name"])
87
+
88
+ # Get index names
89
+ indices = []
90
+ for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='index'"):
91
+ if row["name"]:
92
+ indices.append(row["name"])
93
+
94
+ return {
95
+ "total_papers": work_count,
96
+ "database_size_mb": None,
97
+ "year_range": None,
98
+ "total_journals": 0,
99
+ "total_citations": None,
100
+ "tables": tables,
101
+ "indices": indices,
102
+ }