crossref-local 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +24 -10
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/{cache_export.py → _cache/export.py} +27 -10
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cli.py +389 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +457 -0
- crossref_local/_cli/search.py +199 -0
- crossref_local/_core/__init__.py +62 -0
- crossref_local/{api.py → _core/api.py} +26 -5
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +40 -22
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/_core/export.py +344 -0
- crossref_local/{fts.py → _core/fts.py} +37 -14
- crossref_local/{models.py → _core/models.py} +120 -6
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +378 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +143 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +178 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +100 -100
- crossref_local/cli.py +5 -515
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -410
- crossref_local/remote.py +5 -266
- crossref_local/server.py +5 -349
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +36 -11
- crossref_local-0.5.1.dist-info/RECORD +49 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +1 -1
- crossref_local/cli_mcp.py +0 -275
- crossref_local-0.4.0.dist-info/RECORD +0 -27
- /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
- /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
- /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
- /crossref_local/{cli_main.py → _cli/main.py} +0 -0
- /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0
crossref_local/cache.py
CHANGED
|
@@ -18,38 +18,37 @@ Usage:
|
|
|
18
18
|
>>> stats = cache.stats("epilepsy")
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
import json
|
|
22
|
-
import
|
|
23
|
-
import
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from typing import
|
|
21
|
+
import json as _json
|
|
22
|
+
import time as _time
|
|
23
|
+
from dataclasses import dataclass as _dataclass
|
|
24
|
+
from typing import Any as _Any
|
|
25
|
+
from typing import Dict as _Dict
|
|
26
|
+
from typing import List as _List
|
|
27
|
+
from typing import Optional as _Optional
|
|
28
|
+
|
|
29
|
+
from ._core.api import get_many as _get_many
|
|
30
|
+
from ._core.api import search as _search
|
|
31
|
+
from ._cache.utils import cache_path as _cache_path
|
|
32
|
+
from ._cache.utils import get_cache_dir as _get_cache_dir
|
|
33
|
+
from ._cache.utils import meta_path as _meta_path
|
|
27
34
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _cache_path(name: str) -> Path:
|
|
43
|
-
"""Get path for a named cache."""
|
|
44
|
-
return _get_cache_dir() / f"{name}.json"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def _meta_path(name: str) -> Path:
|
|
48
|
-
"""Get path for cache metadata."""
|
|
49
|
-
return _get_cache_dir() / f"{name}.meta.json"
|
|
35
|
+
__all__ = [
|
|
36
|
+
"CacheInfo",
|
|
37
|
+
"create",
|
|
38
|
+
"append",
|
|
39
|
+
"load",
|
|
40
|
+
"query",
|
|
41
|
+
"query_dois",
|
|
42
|
+
"stats",
|
|
43
|
+
"info",
|
|
44
|
+
"exists",
|
|
45
|
+
"list_caches",
|
|
46
|
+
"delete",
|
|
47
|
+
"export",
|
|
48
|
+
]
|
|
50
49
|
|
|
51
50
|
|
|
52
|
-
@
|
|
51
|
+
@_dataclass
|
|
53
52
|
class CacheInfo:
|
|
54
53
|
"""Information about a cache."""
|
|
55
54
|
|
|
@@ -58,7 +57,7 @@ class CacheInfo:
|
|
|
58
57
|
size_bytes: int
|
|
59
58
|
paper_count: int
|
|
60
59
|
created_at: str
|
|
61
|
-
query:
|
|
60
|
+
query: _Optional[str] = None
|
|
62
61
|
|
|
63
62
|
def to_dict(self) -> dict:
|
|
64
63
|
return {
|
|
@@ -74,11 +73,12 @@ class CacheInfo:
|
|
|
74
73
|
|
|
75
74
|
def create(
|
|
76
75
|
name: str,
|
|
77
|
-
query:
|
|
78
|
-
dois:
|
|
79
|
-
papers:
|
|
76
|
+
query: _Optional[str] = None,
|
|
77
|
+
dois: _Optional[_List[str]] = None,
|
|
78
|
+
papers: _Optional[_List[_Dict[str, _Any]]] = None,
|
|
80
79
|
limit: int = 1000,
|
|
81
80
|
offset: int = 0,
|
|
81
|
+
user_id: _Optional[str] = None,
|
|
82
82
|
) -> CacheInfo:
|
|
83
83
|
"""Create a cache from search query, DOI list, or pre-fetched papers.
|
|
84
84
|
|
|
@@ -89,6 +89,7 @@ def create(
|
|
|
89
89
|
papers: Pre-fetched paper dicts (skips API calls)
|
|
90
90
|
limit: Max papers to fetch (for query mode)
|
|
91
91
|
offset: Offset for pagination (for query mode)
|
|
92
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
92
93
|
|
|
93
94
|
Returns:
|
|
94
95
|
CacheInfo with cache details
|
|
@@ -105,31 +106,31 @@ def create(
|
|
|
105
106
|
raise ValueError("Must provide 'query', 'dois', or 'papers'")
|
|
106
107
|
elif dois is None:
|
|
107
108
|
# Get DOIs from search
|
|
108
|
-
results =
|
|
109
|
+
results = _search(query, limit=limit, offset=offset)
|
|
109
110
|
dois = [w.doi for w in results.works]
|
|
110
111
|
# Fetch full metadata
|
|
111
|
-
works =
|
|
112
|
+
works = _get_many(dois)
|
|
112
113
|
papers = [w.to_dict() for w in works]
|
|
113
114
|
else:
|
|
114
115
|
# Fetch full metadata for DOIs
|
|
115
|
-
works =
|
|
116
|
+
works = _get_many(dois)
|
|
116
117
|
papers = [w.to_dict() for w in works]
|
|
117
118
|
|
|
118
119
|
# Save cache
|
|
119
|
-
cache_file = _cache_path(name)
|
|
120
|
+
cache_file = _cache_path(name, user_id)
|
|
120
121
|
with open(cache_file, "w") as f:
|
|
121
|
-
|
|
122
|
+
_json.dump(papers, f)
|
|
122
123
|
|
|
123
124
|
# Save metadata
|
|
124
125
|
meta = {
|
|
125
126
|
"name": name,
|
|
126
127
|
"query": query,
|
|
127
|
-
"created_at":
|
|
128
|
+
"created_at": _time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
128
129
|
"paper_count": len(papers),
|
|
129
130
|
"dois_requested": len(dois) if dois else len(papers),
|
|
130
131
|
}
|
|
131
|
-
with open(_meta_path(name), "w") as f:
|
|
132
|
-
|
|
132
|
+
with open(_meta_path(name, user_id), "w") as f:
|
|
133
|
+
_json.dump(meta, f, indent=2)
|
|
133
134
|
|
|
134
135
|
return CacheInfo(
|
|
135
136
|
name=name,
|
|
@@ -143,10 +144,11 @@ def create(
|
|
|
143
144
|
|
|
144
145
|
def append(
|
|
145
146
|
name: str,
|
|
146
|
-
query:
|
|
147
|
-
dois:
|
|
147
|
+
query: _Optional[str] = None,
|
|
148
|
+
dois: _Optional[_List[str]] = None,
|
|
148
149
|
limit: int = 1000,
|
|
149
150
|
offset: int = 0,
|
|
151
|
+
user_id: _Optional[str] = None,
|
|
150
152
|
) -> CacheInfo:
|
|
151
153
|
"""Append papers to existing cache.
|
|
152
154
|
|
|
@@ -156,20 +158,23 @@ def append(
|
|
|
156
158
|
dois: Explicit list of DOIs to add
|
|
157
159
|
limit: Max papers to fetch (for query mode)
|
|
158
160
|
offset: Offset for pagination (for query mode)
|
|
161
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
159
162
|
|
|
160
163
|
Returns:
|
|
161
164
|
Updated CacheInfo
|
|
162
165
|
"""
|
|
163
|
-
if not exists(name):
|
|
164
|
-
return create(
|
|
166
|
+
if not exists(name, user_id=user_id):
|
|
167
|
+
return create(
|
|
168
|
+
name, query=query, dois=dois, limit=limit, offset=offset, user_id=user_id
|
|
169
|
+
)
|
|
165
170
|
|
|
166
171
|
# Load existing
|
|
167
|
-
existing = load(name)
|
|
172
|
+
existing = load(name, user_id=user_id)
|
|
168
173
|
existing_dois = {p["doi"] for p in existing}
|
|
169
174
|
|
|
170
175
|
# Get new DOIs
|
|
171
176
|
if dois is None and query is not None:
|
|
172
|
-
results =
|
|
177
|
+
results = _search(query, limit=limit, offset=offset)
|
|
173
178
|
dois = [w.doi for w in results.works]
|
|
174
179
|
elif dois is None:
|
|
175
180
|
raise ValueError("Must provide either 'query' or 'dois'")
|
|
@@ -179,62 +184,64 @@ def append(
|
|
|
179
184
|
|
|
180
185
|
if new_dois:
|
|
181
186
|
# Fetch new metadata
|
|
182
|
-
new_works =
|
|
187
|
+
new_works = _get_many(new_dois)
|
|
183
188
|
new_papers = [w.to_dict() for w in new_works]
|
|
184
189
|
|
|
185
190
|
# Combine and save
|
|
186
191
|
all_papers = existing + new_papers
|
|
187
|
-
cache_file = _cache_path(name)
|
|
192
|
+
cache_file = _cache_path(name, user_id)
|
|
188
193
|
with open(cache_file, "w") as f:
|
|
189
|
-
|
|
194
|
+
_json.dump(all_papers, f)
|
|
190
195
|
|
|
191
196
|
# Update metadata
|
|
192
|
-
meta_file = _meta_path(name)
|
|
197
|
+
meta_file = _meta_path(name, user_id)
|
|
193
198
|
if meta_file.exists():
|
|
194
199
|
with open(meta_file) as f:
|
|
195
|
-
meta =
|
|
200
|
+
meta = _json.load(f)
|
|
196
201
|
else:
|
|
197
202
|
meta = {"name": name}
|
|
198
203
|
|
|
199
|
-
meta["updated_at"] =
|
|
204
|
+
meta["updated_at"] = _time.strftime("%Y-%m-%d %H:%M:%S")
|
|
200
205
|
meta["paper_count"] = len(all_papers)
|
|
201
206
|
|
|
202
207
|
with open(meta_file, "w") as f:
|
|
203
|
-
|
|
208
|
+
_json.dump(meta, f, indent=2)
|
|
204
209
|
|
|
205
|
-
return info(name)
|
|
210
|
+
return info(name, user_id=user_id)
|
|
206
211
|
|
|
207
|
-
return info(name)
|
|
212
|
+
return info(name, user_id=user_id)
|
|
208
213
|
|
|
209
214
|
|
|
210
|
-
def load(name: str) ->
|
|
215
|
+
def load(name: str, user_id: _Optional[str] = None) -> _List[_Dict[str, _Any]]:
|
|
211
216
|
"""Load raw cache data.
|
|
212
217
|
|
|
213
218
|
Args:
|
|
214
219
|
name: Cache name
|
|
220
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
215
221
|
|
|
216
222
|
Returns:
|
|
217
223
|
List of paper dictionaries with full metadata
|
|
218
224
|
"""
|
|
219
|
-
cache_file = _cache_path(name)
|
|
225
|
+
cache_file = _cache_path(name, user_id)
|
|
220
226
|
if not cache_file.exists():
|
|
221
227
|
raise FileNotFoundError(f"Cache not found: {name}")
|
|
222
228
|
|
|
223
229
|
with open(cache_file) as f:
|
|
224
|
-
return
|
|
230
|
+
return _json.load(f)
|
|
225
231
|
|
|
226
232
|
|
|
227
233
|
def query(
|
|
228
234
|
name: str,
|
|
229
|
-
fields:
|
|
235
|
+
fields: _Optional[_List[str]] = None,
|
|
230
236
|
include_abstract: bool = False,
|
|
231
237
|
include_references: bool = False,
|
|
232
238
|
include_citations: bool = False,
|
|
233
|
-
year_min:
|
|
234
|
-
year_max:
|
|
235
|
-
journal:
|
|
236
|
-
limit:
|
|
237
|
-
|
|
239
|
+
year_min: _Optional[int] = None,
|
|
240
|
+
year_max: _Optional[int] = None,
|
|
241
|
+
journal: _Optional[str] = None,
|
|
242
|
+
limit: _Optional[int] = None,
|
|
243
|
+
user_id: _Optional[str] = None,
|
|
244
|
+
) -> _List[_Dict[str, _Any]]:
|
|
238
245
|
"""Query cache with field filtering.
|
|
239
246
|
|
|
240
247
|
Args:
|
|
@@ -247,6 +254,7 @@ def query(
|
|
|
247
254
|
year_max: Filter by maximum year
|
|
248
255
|
journal: Filter by journal name (substring match)
|
|
249
256
|
limit: Max results to return
|
|
257
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
250
258
|
|
|
251
259
|
Returns:
|
|
252
260
|
Filtered list of paper dictionaries
|
|
@@ -257,7 +265,7 @@ def query(
|
|
|
257
265
|
>>> # With filters
|
|
258
266
|
>>> papers = query("epilepsy", year_min=2020, include_citations=True)
|
|
259
267
|
"""
|
|
260
|
-
papers = load(name)
|
|
268
|
+
papers = load(name, user_id=user_id)
|
|
261
269
|
|
|
262
270
|
# Apply filters
|
|
263
271
|
if year_min is not None:
|
|
@@ -295,29 +303,31 @@ def query(
|
|
|
295
303
|
return papers
|
|
296
304
|
|
|
297
305
|
|
|
298
|
-
def query_dois(name: str) ->
|
|
306
|
+
def query_dois(name: str, user_id: _Optional[str] = None) -> _List[str]:
|
|
299
307
|
"""Get just DOIs from cache.
|
|
300
308
|
|
|
301
309
|
Args:
|
|
302
310
|
name: Cache name
|
|
311
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
303
312
|
|
|
304
313
|
Returns:
|
|
305
314
|
List of DOIs
|
|
306
315
|
"""
|
|
307
|
-
papers = load(name)
|
|
316
|
+
papers = load(name, user_id=user_id)
|
|
308
317
|
return [p["doi"] for p in papers if p.get("doi")]
|
|
309
318
|
|
|
310
319
|
|
|
311
|
-
def stats(name: str) ->
|
|
320
|
+
def stats(name: str, user_id: _Optional[str] = None) -> _Dict[str, _Any]:
|
|
312
321
|
"""Get cache statistics.
|
|
313
322
|
|
|
314
323
|
Args:
|
|
315
324
|
name: Cache name
|
|
325
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
316
326
|
|
|
317
327
|
Returns:
|
|
318
328
|
Dictionary with statistics
|
|
319
329
|
"""
|
|
320
|
-
papers = load(name)
|
|
330
|
+
papers = load(name, user_id=user_id)
|
|
321
331
|
|
|
322
332
|
# Year distribution
|
|
323
333
|
years = [p.get("year") for p in papers if p.get("year")]
|
|
@@ -360,26 +370,27 @@ def stats(name: str) -> Dict[str, Any]:
|
|
|
360
370
|
}
|
|
361
371
|
|
|
362
372
|
|
|
363
|
-
def info(name: str) -> CacheInfo:
|
|
373
|
+
def info(name: str, user_id: _Optional[str] = None) -> CacheInfo:
|
|
364
374
|
"""Get cache information.
|
|
365
375
|
|
|
366
376
|
Args:
|
|
367
377
|
name: Cache name
|
|
378
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
368
379
|
|
|
369
380
|
Returns:
|
|
370
381
|
CacheInfo object
|
|
371
382
|
"""
|
|
372
|
-
cache_file = _cache_path(name)
|
|
383
|
+
cache_file = _cache_path(name, user_id)
|
|
373
384
|
if not cache_file.exists():
|
|
374
385
|
raise FileNotFoundError(f"Cache not found: {name}")
|
|
375
386
|
|
|
376
|
-
meta_file = _meta_path(name)
|
|
387
|
+
meta_file = _meta_path(name, user_id)
|
|
377
388
|
meta = {}
|
|
378
389
|
if meta_file.exists():
|
|
379
390
|
with open(meta_file) as f:
|
|
380
|
-
meta =
|
|
391
|
+
meta = _json.load(f)
|
|
381
392
|
|
|
382
|
-
papers = load(name)
|
|
393
|
+
papers = load(name, user_id=user_id)
|
|
383
394
|
|
|
384
395
|
return CacheInfo(
|
|
385
396
|
name=name,
|
|
@@ -391,25 +402,29 @@ def info(name: str) -> CacheInfo:
|
|
|
391
402
|
)
|
|
392
403
|
|
|
393
404
|
|
|
394
|
-
def exists(name: str) -> bool:
|
|
405
|
+
def exists(name: str, user_id: _Optional[str] = None) -> bool:
|
|
395
406
|
"""Check if cache exists.
|
|
396
407
|
|
|
397
408
|
Args:
|
|
398
409
|
name: Cache name
|
|
410
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
399
411
|
|
|
400
412
|
Returns:
|
|
401
413
|
True if cache exists
|
|
402
414
|
"""
|
|
403
|
-
return _cache_path(name).exists()
|
|
415
|
+
return _cache_path(name, user_id).exists()
|
|
404
416
|
|
|
405
417
|
|
|
406
|
-
def list_caches() ->
|
|
418
|
+
def list_caches(user_id: _Optional[str] = None) -> _List[CacheInfo]:
|
|
407
419
|
"""List all available caches.
|
|
408
420
|
|
|
421
|
+
Args:
|
|
422
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
423
|
+
|
|
409
424
|
Returns:
|
|
410
425
|
List of CacheInfo objects
|
|
411
426
|
"""
|
|
412
|
-
cache_dir = _get_cache_dir()
|
|
427
|
+
cache_dir = _get_cache_dir(user_id)
|
|
413
428
|
caches = []
|
|
414
429
|
|
|
415
430
|
for f in cache_dir.glob("*.json"):
|
|
@@ -417,24 +432,25 @@ def list_caches() -> List[CacheInfo]:
|
|
|
417
432
|
continue
|
|
418
433
|
name = f.stem
|
|
419
434
|
try:
|
|
420
|
-
caches.append(info(name))
|
|
435
|
+
caches.append(info(name, user_id=user_id))
|
|
421
436
|
except Exception:
|
|
422
437
|
pass
|
|
423
438
|
|
|
424
439
|
return sorted(caches, key=lambda c: c.name)
|
|
425
440
|
|
|
426
441
|
|
|
427
|
-
def delete(name: str) -> bool:
|
|
442
|
+
def delete(name: str, user_id: _Optional[str] = None) -> bool:
|
|
428
443
|
"""Delete a cache.
|
|
429
444
|
|
|
430
445
|
Args:
|
|
431
446
|
name: Cache name
|
|
447
|
+
user_id: _Optional user ID for multi-tenant scoping
|
|
432
448
|
|
|
433
449
|
Returns:
|
|
434
450
|
True if deleted
|
|
435
451
|
"""
|
|
436
|
-
cache_file = _cache_path(name)
|
|
437
|
-
meta_file = _meta_path(name)
|
|
452
|
+
cache_file = _cache_path(name, user_id)
|
|
453
|
+
meta_file = _meta_path(name, user_id)
|
|
438
454
|
|
|
439
455
|
deleted = False
|
|
440
456
|
if cache_file.exists():
|
|
@@ -446,21 +462,5 @@ def delete(name: str) -> bool:
|
|
|
446
462
|
return deleted
|
|
447
463
|
|
|
448
464
|
|
|
449
|
-
|
|
450
465
|
# Re-export from cache_export for backwards compatibility
|
|
451
|
-
from .
|
|
452
|
-
|
|
453
|
-
__all__ = [
|
|
454
|
-
"CacheInfo",
|
|
455
|
-
"create",
|
|
456
|
-
"append",
|
|
457
|
-
"load",
|
|
458
|
-
"query",
|
|
459
|
-
"query_dois",
|
|
460
|
-
"stats",
|
|
461
|
-
"info",
|
|
462
|
-
"exists",
|
|
463
|
-
"list_caches",
|
|
464
|
-
"delete",
|
|
465
|
-
"export",
|
|
466
|
-
]
|
|
466
|
+
from ._cache.export import export
|