crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +18 -10
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/{cache_export.py → _cache/export.py} +27 -10
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cli.py +512 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +413 -0
- crossref_local/_core/__init__.py +58 -0
- crossref_local/{api.py → _core/api.py} +24 -5
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +40 -22
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/{fts.py → _core/fts.py} +18 -14
- crossref_local/{models.py → _core/models.py} +11 -6
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +356 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +129 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +128 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +100 -100
- crossref_local/cli.py +5 -515
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -410
- crossref_local/remote.py +5 -266
- crossref_local/server.py +5 -349
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
- crossref_local-0.5.0.dist-info/RECORD +47 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
- crossref_local/cli_mcp.py +0 -275
- crossref_local-0.4.0.dist-info/RECORD +0 -27
- /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
- /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
- /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
- /crossref_local/{cli_main.py → _cli/main.py} +0 -0
- /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Work search and retrieval endpoints."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, Query, HTTPException
|
|
7
|
+
|
|
8
|
+
from .._core import fts
|
|
9
|
+
from .._core.db import get_db
|
|
10
|
+
from .._core.models import Work
|
|
11
|
+
from .models import WorkResponse, SearchResponse, BatchRequest, BatchResponse
|
|
12
|
+
|
|
13
|
+
router = APIRouter(tags=["works"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@router.get("/works", response_model=SearchResponse)
|
|
17
|
+
def search_works(
|
|
18
|
+
q: str = Query(..., description="Search query (FTS5 syntax supported)"),
|
|
19
|
+
limit: int = Query(10, ge=1, le=100, description="Max results"),
|
|
20
|
+
offset: int = Query(0, ge=0, description="Skip first N results"),
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Full-text search across works.
|
|
24
|
+
|
|
25
|
+
Uses FTS5 index for fast searching across titles, abstracts, and authors.
|
|
26
|
+
Supports FTS5 query syntax like AND, OR, NOT, "exact phrases".
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
/works?q=machine learning
|
|
30
|
+
/works?q="neural network" AND hippocampus
|
|
31
|
+
/works?q=CRISPR&limit=20
|
|
32
|
+
"""
|
|
33
|
+
start = time.perf_counter()
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
results = fts.search(q, limit=limit, offset=offset)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise HTTPException(status_code=400, detail=f"Search error: {e}")
|
|
39
|
+
|
|
40
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
41
|
+
|
|
42
|
+
return SearchResponse(
|
|
43
|
+
query=q,
|
|
44
|
+
total=results.total,
|
|
45
|
+
returned=len(results.works),
|
|
46
|
+
elapsed_ms=round(elapsed_ms, 2),
|
|
47
|
+
results=[
|
|
48
|
+
WorkResponse(
|
|
49
|
+
doi=w.doi,
|
|
50
|
+
title=w.title,
|
|
51
|
+
authors=w.authors,
|
|
52
|
+
year=w.year,
|
|
53
|
+
journal=w.journal,
|
|
54
|
+
issn=w.issn,
|
|
55
|
+
volume=w.volume,
|
|
56
|
+
issue=w.issue,
|
|
57
|
+
page=w.page,
|
|
58
|
+
abstract=w.abstract,
|
|
59
|
+
citation_count=w.citation_count,
|
|
60
|
+
)
|
|
61
|
+
for w in results.works
|
|
62
|
+
],
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@router.get("/works/{doi:path}", response_model=Optional[WorkResponse])
|
|
67
|
+
def get_work(doi: str):
|
|
68
|
+
"""
|
|
69
|
+
Get work metadata by DOI.
|
|
70
|
+
|
|
71
|
+
Examples:
|
|
72
|
+
/works/10.1038/nature12373
|
|
73
|
+
/works/10.1016/j.cell.2020.01.001
|
|
74
|
+
"""
|
|
75
|
+
db = get_db()
|
|
76
|
+
metadata = db.get_metadata(doi)
|
|
77
|
+
|
|
78
|
+
if metadata is None:
|
|
79
|
+
raise HTTPException(status_code=404, detail=f"DOI not found: {doi}")
|
|
80
|
+
|
|
81
|
+
work = Work.from_metadata(doi, metadata)
|
|
82
|
+
|
|
83
|
+
return WorkResponse(
|
|
84
|
+
doi=work.doi,
|
|
85
|
+
title=work.title,
|
|
86
|
+
authors=work.authors,
|
|
87
|
+
year=work.year,
|
|
88
|
+
journal=work.journal,
|
|
89
|
+
issn=work.issn,
|
|
90
|
+
volume=work.volume,
|
|
91
|
+
issue=work.issue,
|
|
92
|
+
page=work.page,
|
|
93
|
+
abstract=work.abstract,
|
|
94
|
+
citation_count=work.citation_count,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@router.post("/works/batch", response_model=BatchResponse)
|
|
99
|
+
def get_works_batch(request: BatchRequest):
|
|
100
|
+
"""
|
|
101
|
+
Get multiple works by DOI.
|
|
102
|
+
|
|
103
|
+
Request body: {"dois": ["10.1038/...", "10.1016/..."]}
|
|
104
|
+
"""
|
|
105
|
+
db = get_db()
|
|
106
|
+
results = []
|
|
107
|
+
|
|
108
|
+
for doi in request.dois:
|
|
109
|
+
metadata = db.get_metadata(doi)
|
|
110
|
+
if metadata:
|
|
111
|
+
work = Work.from_metadata(doi, metadata)
|
|
112
|
+
results.append(
|
|
113
|
+
WorkResponse(
|
|
114
|
+
doi=work.doi,
|
|
115
|
+
title=work.title,
|
|
116
|
+
authors=work.authors,
|
|
117
|
+
year=work.year,
|
|
118
|
+
journal=work.journal,
|
|
119
|
+
abstract=work.abstract,
|
|
120
|
+
citation_count=work.citation_count,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return BatchResponse(
|
|
125
|
+
requested=len(request.dois),
|
|
126
|
+
found=len(results),
|
|
127
|
+
results=results,
|
|
128
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""FastAPI server for CrossRef Local with FTS5 search.
|
|
2
|
+
|
|
3
|
+
This module re-exports from the modular server package for backwards compatibility.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
crossref-local api # Run on default port 31291
|
|
7
|
+
crossref-local api --port 8080 # Custom port
|
|
8
|
+
|
|
9
|
+
# Or directly:
|
|
10
|
+
uvicorn crossref_local.server:app --host 0.0.0.0 --port 31291
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# Re-export from modular server package
|
|
14
|
+
from .server import app, run_server, DEFAULT_PORT, DEFAULT_HOST
|
|
15
|
+
|
|
16
|
+
__all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
run_server()
|
crossref_local/aio.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
Async API for crossref_local.
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Async API module for crossref_local.
|
|
3
3
|
|
|
4
4
|
Provides async versions of all API functions. Uses thread pool execution
|
|
5
5
|
with per-thread database connections for thread safety.
|
|
@@ -19,210 +19,30 @@ Usage:
|
|
|
19
19
|
counts = await aio.count_many(["CRISPR", "machine learning"])
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
db = _get_thread_db()
|
|
47
|
-
return fts._search_with_db(db, query, limit, offset)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _count_sync(query: str) -> int:
|
|
51
|
-
"""Thread-safe sync count."""
|
|
52
|
-
from . import fts
|
|
53
|
-
db = _get_thread_db()
|
|
54
|
-
return fts._count_with_db(db, query)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _get_sync(doi: str) -> Optional[Work]:
|
|
58
|
-
"""Thread-safe sync get."""
|
|
59
|
-
db = _get_thread_db()
|
|
60
|
-
metadata = db.get_metadata(doi)
|
|
61
|
-
if metadata:
|
|
62
|
-
return Work.from_metadata(doi, metadata)
|
|
63
|
-
return None
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def _get_many_sync(dois: List[str]) -> List[Work]:
|
|
67
|
-
"""Thread-safe sync get_many."""
|
|
68
|
-
db = _get_thread_db()
|
|
69
|
-
works = []
|
|
70
|
-
for doi in dois:
|
|
71
|
-
metadata = db.get_metadata(doi)
|
|
72
|
-
if metadata:
|
|
73
|
-
works.append(Work.from_metadata(doi, metadata))
|
|
74
|
-
return works
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _exists_sync(doi: str) -> bool:
|
|
78
|
-
"""Thread-safe sync exists."""
|
|
79
|
-
db = _get_thread_db()
|
|
80
|
-
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
|
|
81
|
-
return row is not None
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _info_sync() -> dict:
|
|
85
|
-
"""Thread-safe sync info."""
|
|
86
|
-
db = _get_thread_db()
|
|
87
|
-
|
|
88
|
-
row = db.fetchone("SELECT COUNT(*) as count FROM works")
|
|
89
|
-
work_count = row["count"] if row else 0
|
|
90
|
-
|
|
91
|
-
try:
|
|
92
|
-
row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
|
|
93
|
-
fts_count = row["count"] if row else 0
|
|
94
|
-
except Exception:
|
|
95
|
-
fts_count = 0
|
|
96
|
-
|
|
97
|
-
try:
|
|
98
|
-
row = db.fetchone("SELECT COUNT(*) as count FROM citations")
|
|
99
|
-
citation_count = row["count"] if row else 0
|
|
100
|
-
except Exception:
|
|
101
|
-
citation_count = 0
|
|
102
|
-
|
|
103
|
-
return {
|
|
104
|
-
"db_path": str(Config.get_db_path()),
|
|
105
|
-
"works": work_count,
|
|
106
|
-
"fts_indexed": fts_count,
|
|
107
|
-
"citations": citation_count,
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
async def search(
|
|
112
|
-
query: str,
|
|
113
|
-
limit: int = 10,
|
|
114
|
-
offset: int = 0,
|
|
115
|
-
) -> SearchResult:
|
|
116
|
-
"""
|
|
117
|
-
Async full-text search across works.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
query: Search query (supports FTS5 syntax)
|
|
121
|
-
limit: Maximum results to return
|
|
122
|
-
offset: Skip first N results (for pagination)
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
SearchResult with matching works
|
|
126
|
-
"""
|
|
127
|
-
return await asyncio.to_thread(_search_sync, query, limit, offset)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
async def count(query: str) -> int:
|
|
131
|
-
"""
|
|
132
|
-
Async count matching works without fetching results.
|
|
133
|
-
|
|
134
|
-
Args:
|
|
135
|
-
query: FTS5 search query
|
|
136
|
-
|
|
137
|
-
Returns:
|
|
138
|
-
Number of matching works
|
|
139
|
-
"""
|
|
140
|
-
return await asyncio.to_thread(_count_sync, query)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
async def get(doi: str) -> Optional[Work]:
|
|
144
|
-
"""
|
|
145
|
-
Async get a work by DOI.
|
|
146
|
-
|
|
147
|
-
Args:
|
|
148
|
-
doi: Digital Object Identifier
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
Work object or None if not found
|
|
152
|
-
"""
|
|
153
|
-
return await asyncio.to_thread(_get_sync, doi)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
async def get_many(dois: List[str]) -> List[Work]:
|
|
157
|
-
"""
|
|
158
|
-
Async get multiple works by DOI.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
dois: List of DOIs
|
|
162
|
-
|
|
163
|
-
Returns:
|
|
164
|
-
List of Work objects (missing DOIs are skipped)
|
|
165
|
-
"""
|
|
166
|
-
return await asyncio.to_thread(_get_many_sync, dois)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
async def exists(doi: str) -> bool:
|
|
170
|
-
"""
|
|
171
|
-
Async check if a DOI exists in the database.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
doi: Digital Object Identifier
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
True if DOI exists
|
|
178
|
-
"""
|
|
179
|
-
return await asyncio.to_thread(_exists_sync, doi)
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
async def info() -> dict:
|
|
183
|
-
"""
|
|
184
|
-
Async get database information.
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
Dictionary with database stats
|
|
188
|
-
"""
|
|
189
|
-
return await asyncio.to_thread(_info_sync)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
|
|
193
|
-
"""
|
|
194
|
-
Run multiple searches concurrently.
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
queries: List of search queries
|
|
198
|
-
limit: Maximum results per query
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
List of SearchResult objects
|
|
202
|
-
"""
|
|
203
|
-
tasks = [search(q, limit=limit) for q in queries]
|
|
204
|
-
return await asyncio.gather(*tasks)
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
async def count_many(queries: List[str]) -> dict:
|
|
208
|
-
"""
|
|
209
|
-
Count matches for multiple queries concurrently.
|
|
210
|
-
|
|
211
|
-
Args:
|
|
212
|
-
queries: List of search queries
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
Dict mapping query -> count
|
|
216
|
-
|
|
217
|
-
Example:
|
|
218
|
-
>>> counts = await count_many(["CRISPR", "machine learning"])
|
|
219
|
-
>>> print(counts)
|
|
220
|
-
{'CRISPR': 45000, 'machine learning': 477922}
|
|
221
|
-
"""
|
|
222
|
-
tasks = [count(q) for q in queries]
|
|
223
|
-
results = await asyncio.gather(*tasks)
|
|
224
|
-
return dict(zip(queries, results))
|
|
225
|
-
|
|
22
|
+
from ._aio import (
|
|
23
|
+
SearchResult as _SearchResult,
|
|
24
|
+
Work as _Work,
|
|
25
|
+
count as _count,
|
|
26
|
+
count_many as _count_many,
|
|
27
|
+
exists as _exists,
|
|
28
|
+
get as _get,
|
|
29
|
+
get_many as _get_many,
|
|
30
|
+
info as _info,
|
|
31
|
+
search as _search,
|
|
32
|
+
search_many as _search_many,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Re-export with clean names
|
|
36
|
+
search = _search
|
|
37
|
+
count = _count
|
|
38
|
+
get = _get
|
|
39
|
+
get_many = _get_many
|
|
40
|
+
exists = _exists
|
|
41
|
+
info = _info
|
|
42
|
+
search_many = _search_many
|
|
43
|
+
count_many = _count_many
|
|
44
|
+
SearchResult = _SearchResult
|
|
45
|
+
Work = _Work
|
|
226
46
|
|
|
227
47
|
__all__ = [
|
|
228
48
|
"search",
|
|
@@ -233,4 +53,8 @@ __all__ = [
|
|
|
233
53
|
"info",
|
|
234
54
|
"search_many",
|
|
235
55
|
"count_many",
|
|
56
|
+
"SearchResult",
|
|
57
|
+
"Work",
|
|
236
58
|
]
|
|
59
|
+
|
|
60
|
+
# EOF
|