crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,282 @@
1
+ """Collection management endpoints with file download support."""
2
+
3
+ import tempfile
4
+ from typing import Optional
5
+
6
+ from fastapi import APIRouter, Query, HTTPException, Request
7
+ from fastapi.responses import FileResponse
8
+
9
+ from .. import cache
10
+ from .._cache.utils import sanitize_name
11
+ from .models import CollectionCreateRequest, CollectionInfo
12
+
13
+
14
+ # Allowed fields for field filtering (whitelist)
15
+ ALLOWED_FIELDS = {
16
+ "doi",
17
+ "title",
18
+ "authors",
19
+ "year",
20
+ "journal",
21
+ "volume",
22
+ "issue",
23
+ "page",
24
+ "abstract",
25
+ "citation_count",
26
+ "references",
27
+ "issn",
28
+ "publisher",
29
+ }
30
+
31
+ # Maximum limits
32
+ MAX_LIMIT = 10000
33
+ MAX_DOIS = 1000
34
+
35
+ router = APIRouter(prefix="/collections", tags=["collections"])
36
+
37
+
38
+ def _get_user_id(request: Request) -> Optional[str]:
39
+ """Get user ID from request state (set by middleware)."""
40
+ return getattr(request.state, "user_id", None)
41
+
42
+
43
+ @router.get("")
44
+ def list_collections(request: Request):
45
+ """
46
+ List all collections.
47
+
48
+ For cloud API (with X-User-ID header), returns only user's collections.
49
+ For local API, returns all collections.
50
+ """
51
+ user_id = _get_user_id(request)
52
+ caches = cache.list_caches(user_id=user_id)
53
+ return {
54
+ "count": len(caches),
55
+ "collections": [c.to_dict() for c in caches],
56
+ }
57
+
58
+
59
+ @router.post("", response_model=CollectionInfo)
60
+ def create_collection(request: Request, body: CollectionCreateRequest):
61
+ """
62
+ Create a new collection from search query or DOI list.
63
+
64
+ Request body:
65
+ {"name": "epilepsy", "query": "epilepsy seizure", "limit": 500}
66
+ or
67
+ {"name": "my_papers", "dois": ["10.1038/...", "10.1016/..."]}
68
+ """
69
+ user_id = _get_user_id(request)
70
+
71
+ # Validate collection name
72
+ try:
73
+ sanitize_name(body.name)
74
+ except ValueError as e:
75
+ raise HTTPException(status_code=400, detail=str(e))
76
+
77
+ if not body.query and not body.dois:
78
+ raise HTTPException(
79
+ status_code=400,
80
+ detail="Must provide 'query' or 'dois'",
81
+ )
82
+
83
+ # Validate limits
84
+ if body.limit > MAX_LIMIT:
85
+ raise HTTPException(
86
+ status_code=400,
87
+ detail=f"Limit exceeds maximum ({MAX_LIMIT})",
88
+ )
89
+
90
+ if body.dois and len(body.dois) > MAX_DOIS:
91
+ raise HTTPException(
92
+ status_code=400,
93
+ detail=f"Too many DOIs ({len(body.dois)}). Maximum: {MAX_DOIS}",
94
+ )
95
+
96
+ try:
97
+ info = cache.create(
98
+ body.name,
99
+ query=body.query,
100
+ dois=body.dois,
101
+ limit=body.limit,
102
+ user_id=user_id,
103
+ )
104
+ return CollectionInfo(**info.to_dict())
105
+ except ValueError as e:
106
+ raise HTTPException(status_code=400, detail=str(e))
107
+ except Exception as e:
108
+ raise HTTPException(status_code=500, detail=str(e))
109
+
110
+
111
+ @router.get("/{name}")
112
+ def query_collection(
113
+ name: str,
114
+ request: Request,
115
+ fields: Optional[str] = Query(None, description="Comma-separated field list"),
116
+ include_abstract: bool = Query(False, description="Include abstracts"),
117
+ include_references: bool = Query(False, description="Include references"),
118
+ include_citations: bool = Query(False, description="Include citation counts"),
119
+ year_min: Optional[int] = Query(None, description="Filter by min year"),
120
+ year_max: Optional[int] = Query(None, description="Filter by max year"),
121
+ journal: Optional[str] = Query(None, description="Filter by journal"),
122
+ limit: Optional[int] = Query(None, description="Max results"),
123
+ ):
124
+ """
125
+ Query a collection with field filtering.
126
+
127
+ Returns minimal data to reduce response size.
128
+ Use 'fields' parameter to specify exactly which fields to return.
129
+
130
+ Examples:
131
+ /collections/epilepsy?fields=doi,title,year
132
+ /collections/epilepsy?year_min=2020&include_citations=true
133
+ """
134
+ user_id = _get_user_id(request)
135
+
136
+ # Validate collection name
137
+ try:
138
+ sanitize_name(name)
139
+ except ValueError as e:
140
+ raise HTTPException(status_code=400, detail=str(e))
141
+
142
+ if not cache.exists(name, user_id=user_id):
143
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
144
+
145
+ # Validate and filter fields
146
+ field_list = None
147
+ if fields:
148
+ field_list = [f.strip() for f in fields.split(",")]
149
+ invalid_fields = set(field_list) - ALLOWED_FIELDS
150
+ if invalid_fields:
151
+ raise HTTPException(
152
+ status_code=400,
153
+ detail=f"Invalid fields: {invalid_fields}. Allowed: {ALLOWED_FIELDS}",
154
+ )
155
+
156
+ papers = cache.query(
157
+ name,
158
+ fields=field_list,
159
+ include_abstract=include_abstract,
160
+ include_references=include_references,
161
+ include_citations=include_citations,
162
+ year_min=year_min,
163
+ year_max=year_max,
164
+ journal=journal,
165
+ limit=limit,
166
+ user_id=user_id,
167
+ )
168
+
169
+ return {
170
+ "name": name,
171
+ "count": len(papers),
172
+ "papers": papers,
173
+ }
174
+
175
+
176
+ @router.get("/{name}/stats")
177
+ def collection_stats(name: str, request: Request):
178
+ """
179
+ Get collection statistics.
180
+
181
+ Returns year distribution, top journals, citation stats.
182
+ """
183
+ user_id = _get_user_id(request)
184
+
185
+ try:
186
+ sanitize_name(name)
187
+ except ValueError as e:
188
+ raise HTTPException(status_code=400, detail=str(e))
189
+
190
+ if not cache.exists(name, user_id=user_id):
191
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
192
+
193
+ stats = cache.stats(name, user_id=user_id)
194
+ return {"name": name, **stats}
195
+
196
+
197
+ @router.get("/{name}/download")
198
+ def download_collection(
199
+ name: str,
200
+ request: Request,
201
+ format: str = Query("json", description="Export format: json, csv, bibtex, dois"),
202
+ fields: Optional[str] = Query(None, description="Fields to include (json/csv)"),
203
+ ):
204
+ """
205
+ Download collection as a file.
206
+
207
+ Supports multiple formats:
208
+ - json: Full JSON with all fields or specified fields
209
+ - csv: CSV format with specified fields
210
+ - bibtex: BibTeX format for bibliography
211
+ - dois: Plain text list of DOIs
212
+
213
+ Examples:
214
+ /collections/epilepsy/download?format=json
215
+ /collections/epilepsy/download?format=bibtex
216
+ /collections/epilepsy/download?format=csv&fields=doi,title,year
217
+ """
218
+ user_id = _get_user_id(request)
219
+
220
+ try:
221
+ sanitize_name(name)
222
+ except ValueError as e:
223
+ raise HTTPException(status_code=400, detail=str(e))
224
+
225
+ if not cache.exists(name, user_id=user_id):
226
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
227
+
228
+ # Determine file extension and media type
229
+ format_info = {
230
+ "json": ("application/json", ".json"),
231
+ "csv": ("text/csv", ".csv"),
232
+ "bibtex": ("application/x-bibtex", ".bib"),
233
+ "dois": ("text/plain", ".txt"),
234
+ }
235
+
236
+ if format not in format_info:
237
+ raise HTTPException(
238
+ status_code=400,
239
+ detail=f"Unsupported format: {format}. Use: json, csv, bibtex, dois",
240
+ )
241
+
242
+ media_type, ext = format_info[format]
243
+ filename = f"{name}{ext}"
244
+
245
+ # Export to temporary file
246
+ with tempfile.NamedTemporaryFile(mode="w", suffix=ext, delete=False) as tmp:
247
+ field_list = fields.split(",") if fields else None
248
+ cache.export(
249
+ name,
250
+ tmp.name,
251
+ format=format,
252
+ fields=field_list,
253
+ user_id=user_id,
254
+ )
255
+ tmp_path = tmp.name
256
+
257
+ return FileResponse(
258
+ tmp_path,
259
+ media_type=media_type,
260
+ filename=filename,
261
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
262
+ )
263
+
264
+
265
+ @router.delete("/{name}")
266
+ def delete_collection(name: str, request: Request):
267
+ """
268
+ Delete a collection.
269
+ """
270
+ user_id = _get_user_id(request)
271
+
272
+ try:
273
+ sanitize_name(name)
274
+ except ValueError as e:
275
+ raise HTTPException(status_code=400, detail=str(e))
276
+
277
+ if not cache.exists(name, user_id=user_id):
278
+ raise HTTPException(status_code=404, detail=f"Collection not found: {name}")
279
+
280
+ deleted = cache.delete(name, user_id=user_id)
281
+
282
+ return {"deleted": deleted, "name": name}
@@ -0,0 +1,102 @@
1
+ """Backwards-compatible legacy API endpoints."""
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+
7
+ from .._core import fts
8
+ from .._core.db import get_db
9
+ from .._core.models import Work
10
+ from .models import WorkResponse
11
+ from .routes_works import get_work
12
+
13
+ router = APIRouter(prefix="/api", tags=["legacy"])
14
+
15
+
16
+ @router.get("/search/")
17
+ def api_search_compat(
18
+ title: Optional[str] = None,
19
+ q: Optional[str] = None,
20
+ doi: Optional[str] = None,
21
+ limit: int = 10,
22
+ ):
23
+ """Backwards-compatible search endpoint."""
24
+ query = title or q
25
+
26
+ if doi:
27
+ # DOI lookup
28
+ try:
29
+ work = get_work(doi)
30
+ return {
31
+ "query": {"doi": doi},
32
+ "results": [work.model_dump()],
33
+ "total": 1,
34
+ "returned": 1,
35
+ }
36
+ except HTTPException:
37
+ return {"query": {"doi": doi}, "results": [], "total": 0, "returned": 0}
38
+
39
+ if not query:
40
+ raise HTTPException(
41
+ status_code=400, detail="Specify q, title, or doi parameter"
42
+ )
43
+
44
+ # Call fts.search directly (not the endpoint function)
45
+ results = fts.search(query, limit=limit, offset=0)
46
+ return {
47
+ "query": {
48
+ "title": query,
49
+ "doi": None,
50
+ "year": None,
51
+ "authors": None,
52
+ "limit": limit,
53
+ },
54
+ "results": [
55
+ WorkResponse(
56
+ doi=w.doi,
57
+ title=w.title,
58
+ authors=w.authors,
59
+ year=w.year,
60
+ journal=w.journal,
61
+ issn=w.issn,
62
+ volume=w.volume,
63
+ issue=w.issue,
64
+ page=w.page,
65
+ abstract=w.abstract,
66
+ citation_count=w.citation_count,
67
+ ).model_dump()
68
+ for w in results.works
69
+ ],
70
+ "total": results.total,
71
+ "returned": len(results.works),
72
+ }
73
+
74
+
75
+ @router.get("/stats/")
76
+ def api_stats_compat():
77
+ """Backwards-compatible stats endpoint."""
78
+ db = get_db()
79
+
80
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
81
+ work_count = row["count"] if row else 0
82
+
83
+ # Get table names
84
+ tables = []
85
+ for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='table'"):
86
+ tables.append(row["name"])
87
+
88
+ # Get index names
89
+ indices = []
90
+ for row in db.fetchall("SELECT name FROM sqlite_master WHERE type='index'"):
91
+ if row["name"]:
92
+ indices.append(row["name"])
93
+
94
+ return {
95
+ "total_papers": work_count,
96
+ "database_size_mb": None,
97
+ "year_range": None,
98
+ "total_journals": 0,
99
+ "total_citations": None,
100
+ "tables": tables,
101
+ "indices": indices,
102
+ }
@@ -0,0 +1,128 @@
1
+ """Work search and retrieval endpoints."""
2
+
3
+ import time
4
+ from typing import Optional
5
+
6
+ from fastapi import APIRouter, Query, HTTPException
7
+
8
+ from .._core import fts
9
+ from .._core.db import get_db
10
+ from .._core.models import Work
11
+ from .models import WorkResponse, SearchResponse, BatchRequest, BatchResponse
12
+
13
+ router = APIRouter(tags=["works"])
14
+
15
+
16
+ @router.get("/works", response_model=SearchResponse)
17
+ def search_works(
18
+ q: str = Query(..., description="Search query (FTS5 syntax supported)"),
19
+ limit: int = Query(10, ge=1, le=100, description="Max results"),
20
+ offset: int = Query(0, ge=0, description="Skip first N results"),
21
+ ):
22
+ """
23
+ Full-text search across works.
24
+
25
+ Uses FTS5 index for fast searching across titles, abstracts, and authors.
26
+ Supports FTS5 query syntax like AND, OR, NOT, "exact phrases".
27
+
28
+ Examples:
29
+ /works?q=machine learning
30
+ /works?q="neural network" AND hippocampus
31
+ /works?q=CRISPR&limit=20
32
+ """
33
+ start = time.perf_counter()
34
+
35
+ try:
36
+ results = fts.search(q, limit=limit, offset=offset)
37
+ except Exception as e:
38
+ raise HTTPException(status_code=400, detail=f"Search error: {e}")
39
+
40
+ elapsed_ms = (time.perf_counter() - start) * 1000
41
+
42
+ return SearchResponse(
43
+ query=q,
44
+ total=results.total,
45
+ returned=len(results.works),
46
+ elapsed_ms=round(elapsed_ms, 2),
47
+ results=[
48
+ WorkResponse(
49
+ doi=w.doi,
50
+ title=w.title,
51
+ authors=w.authors,
52
+ year=w.year,
53
+ journal=w.journal,
54
+ issn=w.issn,
55
+ volume=w.volume,
56
+ issue=w.issue,
57
+ page=w.page,
58
+ abstract=w.abstract,
59
+ citation_count=w.citation_count,
60
+ )
61
+ for w in results.works
62
+ ],
63
+ )
64
+
65
+
66
+ @router.get("/works/{doi:path}", response_model=Optional[WorkResponse])
67
+ def get_work(doi: str):
68
+ """
69
+ Get work metadata by DOI.
70
+
71
+ Examples:
72
+ /works/10.1038/nature12373
73
+ /works/10.1016/j.cell.2020.01.001
74
+ """
75
+ db = get_db()
76
+ metadata = db.get_metadata(doi)
77
+
78
+ if metadata is None:
79
+ raise HTTPException(status_code=404, detail=f"DOI not found: {doi}")
80
+
81
+ work = Work.from_metadata(doi, metadata)
82
+
83
+ return WorkResponse(
84
+ doi=work.doi,
85
+ title=work.title,
86
+ authors=work.authors,
87
+ year=work.year,
88
+ journal=work.journal,
89
+ issn=work.issn,
90
+ volume=work.volume,
91
+ issue=work.issue,
92
+ page=work.page,
93
+ abstract=work.abstract,
94
+ citation_count=work.citation_count,
95
+ )
96
+
97
+
98
+ @router.post("/works/batch", response_model=BatchResponse)
99
+ def get_works_batch(request: BatchRequest):
100
+ """
101
+ Get multiple works by DOI.
102
+
103
+ Request body: {"dois": ["10.1038/...", "10.1016/..."]}
104
+ """
105
+ db = get_db()
106
+ results = []
107
+
108
+ for doi in request.dois:
109
+ metadata = db.get_metadata(doi)
110
+ if metadata:
111
+ work = Work.from_metadata(doi, metadata)
112
+ results.append(
113
+ WorkResponse(
114
+ doi=work.doi,
115
+ title=work.title,
116
+ authors=work.authors,
117
+ year=work.year,
118
+ journal=work.journal,
119
+ abstract=work.abstract,
120
+ citation_count=work.citation_count,
121
+ )
122
+ )
123
+
124
+ return BatchResponse(
125
+ requested=len(request.dois),
126
+ found=len(results),
127
+ results=results,
128
+ )
@@ -0,0 +1,19 @@
1
+ """FastAPI server for CrossRef Local with FTS5 search.
2
+
3
+ This module re-exports from the modular server package for backwards compatibility.
4
+
5
+ Usage:
6
+ crossref-local api # Run on default port 31291
7
+ crossref-local api --port 8080 # Custom port
8
+
9
+ # Or directly:
10
+ uvicorn crossref_local.server:app --host 0.0.0.0 --port 31291
11
+ """
12
+
13
+ # Re-export from modular server package
14
+ from .server import app, run_server, DEFAULT_PORT, DEFAULT_HOST
15
+
16
+ __all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
17
+
18
+ if __name__ == "__main__":
19
+ run_server()