crossref-local 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +78 -0
- crossref_local/aio.py +236 -0
- crossref_local/api.py +153 -0
- crossref_local/citations.py +413 -0
- crossref_local/cli.py +257 -0
- crossref_local/config.py +72 -0
- crossref_local/db.py +136 -0
- crossref_local/fts.py +138 -0
- crossref_local/impact_factor/__init__.py +20 -0
- crossref_local/impact_factor/calculator.py +479 -0
- crossref_local/impact_factor/journal_lookup.py +274 -0
- crossref_local/models.py +186 -0
- crossref_local-0.3.0.dist-info/METADATA +200 -0
- crossref_local-0.3.0.dist-info/RECORD +16 -0
- crossref_local-0.3.0.dist-info/WHEEL +4 -0
- crossref_local-0.3.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
crossref_local - Local CrossRef database with full-text search.
|
|
3
|
+
|
|
4
|
+
A Python package for querying a local mirror of the CrossRef database
|
|
5
|
+
with 167M+ scholarly works, full-text search, and impact factor calculation.
|
|
6
|
+
|
|
7
|
+
Sync usage:
|
|
8
|
+
>>> from crossref_local import search, get
|
|
9
|
+
>>> results = search("hippocampal sharp wave ripples")
|
|
10
|
+
>>> work = get("10.1126/science.aax0758")
|
|
11
|
+
|
|
12
|
+
Async usage:
|
|
13
|
+
>>> from crossref_local import aio
|
|
14
|
+
>>> results = await aio.search("machine learning")
|
|
15
|
+
>>> counts = await aio.count_many(["CRISPR", "neural network"])
|
|
16
|
+
|
|
17
|
+
Configuration:
|
|
18
|
+
>>> from crossref_local import configure
|
|
19
|
+
>>> configure("/path/to/crossref.db")
|
|
20
|
+
|
|
21
|
+
Or set CROSSREF_LOCAL_DB environment variable.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__version__ = "0.3.0"
|
|
25
|
+
|
|
26
|
+
# Core API
|
|
27
|
+
from .api import (
|
|
28
|
+
search,
|
|
29
|
+
count,
|
|
30
|
+
get,
|
|
31
|
+
get_many,
|
|
32
|
+
exists,
|
|
33
|
+
configure,
|
|
34
|
+
info,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Models
|
|
38
|
+
from .models import Work, SearchResult
|
|
39
|
+
|
|
40
|
+
# Database utilities
|
|
41
|
+
from .db import Database, connection
|
|
42
|
+
|
|
43
|
+
# Configuration
|
|
44
|
+
from .config import Config
|
|
45
|
+
|
|
46
|
+
# Async API
|
|
47
|
+
from . import aio
|
|
48
|
+
|
|
49
|
+
# Citation network
|
|
50
|
+
from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# Version
|
|
54
|
+
"__version__",
|
|
55
|
+
# Core API
|
|
56
|
+
"search",
|
|
57
|
+
"count",
|
|
58
|
+
"get",
|
|
59
|
+
"get_many",
|
|
60
|
+
"exists",
|
|
61
|
+
"configure",
|
|
62
|
+
"info",
|
|
63
|
+
# Models
|
|
64
|
+
"Work",
|
|
65
|
+
"SearchResult",
|
|
66
|
+
# Database
|
|
67
|
+
"Database",
|
|
68
|
+
"connection",
|
|
69
|
+
# Config
|
|
70
|
+
"Config",
|
|
71
|
+
# Async
|
|
72
|
+
"aio",
|
|
73
|
+
# Citations
|
|
74
|
+
"get_citing",
|
|
75
|
+
"get_cited",
|
|
76
|
+
"get_citation_count",
|
|
77
|
+
"CitationNetwork",
|
|
78
|
+
]
|
crossref_local/aio.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Async API for crossref_local.
|
|
3
|
+
|
|
4
|
+
Provides async versions of all API functions. Uses thread pool execution
|
|
5
|
+
with per-thread database connections for thread safety.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from crossref_local import aio
|
|
9
|
+
|
|
10
|
+
async def main():
|
|
11
|
+
results = await aio.search("machine learning")
|
|
12
|
+
work = await aio.get("10.1038/nature12373")
|
|
13
|
+
n = await aio.count("CRISPR")
|
|
14
|
+
|
|
15
|
+
# Or import individual functions
|
|
16
|
+
from crossref_local.aio import search, get, count
|
|
17
|
+
|
|
18
|
+
# Concurrent operations
|
|
19
|
+
counts = await aio.count_many(["CRISPR", "machine learning"])
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import asyncio
|
|
23
|
+
import threading
|
|
24
|
+
from typing import List, Optional
|
|
25
|
+
|
|
26
|
+
from .models import Work, SearchResult
|
|
27
|
+
from .config import Config
|
|
28
|
+
from .db import Database
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Thread-local storage for database connections
|
|
32
|
+
_thread_local = threading.local()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_thread_db() -> Database:
|
|
36
|
+
"""Get thread-local database connection."""
|
|
37
|
+
if not hasattr(_thread_local, 'db'):
|
|
38
|
+
_thread_local.db = Database(Config.get_db_path())
|
|
39
|
+
return _thread_local.db
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _search_sync(query: str, limit: int, offset: int) -> SearchResult:
|
|
43
|
+
"""Thread-safe sync search."""
|
|
44
|
+
from . import fts
|
|
45
|
+
# Use thread-local DB
|
|
46
|
+
db = _get_thread_db()
|
|
47
|
+
return fts._search_with_db(db, query, limit, offset)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _count_sync(query: str) -> int:
|
|
51
|
+
"""Thread-safe sync count."""
|
|
52
|
+
from . import fts
|
|
53
|
+
db = _get_thread_db()
|
|
54
|
+
return fts._count_with_db(db, query)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_sync(doi: str) -> Optional[Work]:
|
|
58
|
+
"""Thread-safe sync get."""
|
|
59
|
+
db = _get_thread_db()
|
|
60
|
+
metadata = db.get_metadata(doi)
|
|
61
|
+
if metadata:
|
|
62
|
+
return Work.from_metadata(doi, metadata)
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_many_sync(dois: List[str]) -> List[Work]:
|
|
67
|
+
"""Thread-safe sync get_many."""
|
|
68
|
+
db = _get_thread_db()
|
|
69
|
+
works = []
|
|
70
|
+
for doi in dois:
|
|
71
|
+
metadata = db.get_metadata(doi)
|
|
72
|
+
if metadata:
|
|
73
|
+
works.append(Work.from_metadata(doi, metadata))
|
|
74
|
+
return works
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _exists_sync(doi: str) -> bool:
|
|
78
|
+
"""Thread-safe sync exists."""
|
|
79
|
+
db = _get_thread_db()
|
|
80
|
+
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
|
|
81
|
+
return row is not None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _info_sync() -> dict:
|
|
85
|
+
"""Thread-safe sync info."""
|
|
86
|
+
db = _get_thread_db()
|
|
87
|
+
|
|
88
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works")
|
|
89
|
+
work_count = row["count"] if row else 0
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
|
|
93
|
+
fts_count = row["count"] if row else 0
|
|
94
|
+
except Exception:
|
|
95
|
+
fts_count = 0
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM citations")
|
|
99
|
+
citation_count = row["count"] if row else 0
|
|
100
|
+
except Exception:
|
|
101
|
+
citation_count = 0
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"db_path": str(Config.get_db_path()),
|
|
105
|
+
"works": work_count,
|
|
106
|
+
"fts_indexed": fts_count,
|
|
107
|
+
"citations": citation_count,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def search(
|
|
112
|
+
query: str,
|
|
113
|
+
limit: int = 10,
|
|
114
|
+
offset: int = 0,
|
|
115
|
+
) -> SearchResult:
|
|
116
|
+
"""
|
|
117
|
+
Async full-text search across works.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
query: Search query (supports FTS5 syntax)
|
|
121
|
+
limit: Maximum results to return
|
|
122
|
+
offset: Skip first N results (for pagination)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
SearchResult with matching works
|
|
126
|
+
"""
|
|
127
|
+
return await asyncio.to_thread(_search_sync, query, limit, offset)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
async def count(query: str) -> int:
|
|
131
|
+
"""
|
|
132
|
+
Async count matching works without fetching results.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
query: FTS5 search query
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Number of matching works
|
|
139
|
+
"""
|
|
140
|
+
return await asyncio.to_thread(_count_sync, query)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
async def get(doi: str) -> Optional[Work]:
|
|
144
|
+
"""
|
|
145
|
+
Async get a work by DOI.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
doi: Digital Object Identifier
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Work object or None if not found
|
|
152
|
+
"""
|
|
153
|
+
return await asyncio.to_thread(_get_sync, doi)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
async def get_many(dois: List[str]) -> List[Work]:
|
|
157
|
+
"""
|
|
158
|
+
Async get multiple works by DOI.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
dois: List of DOIs
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of Work objects (missing DOIs are skipped)
|
|
165
|
+
"""
|
|
166
|
+
return await asyncio.to_thread(_get_many_sync, dois)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def exists(doi: str) -> bool:
|
|
170
|
+
"""
|
|
171
|
+
Async check if a DOI exists in the database.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
doi: Digital Object Identifier
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
True if DOI exists
|
|
178
|
+
"""
|
|
179
|
+
return await asyncio.to_thread(_exists_sync, doi)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
async def info() -> dict:
|
|
183
|
+
"""
|
|
184
|
+
Async get database information.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Dictionary with database stats
|
|
188
|
+
"""
|
|
189
|
+
return await asyncio.to_thread(_info_sync)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
async def search_many(queries: List[str], limit: int = 10) -> List[SearchResult]:
|
|
193
|
+
"""
|
|
194
|
+
Run multiple searches concurrently.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
queries: List of search queries
|
|
198
|
+
limit: Maximum results per query
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List of SearchResult objects
|
|
202
|
+
"""
|
|
203
|
+
tasks = [search(q, limit=limit) for q in queries]
|
|
204
|
+
return await asyncio.gather(*tasks)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
async def count_many(queries: List[str]) -> dict:
|
|
208
|
+
"""
|
|
209
|
+
Count matches for multiple queries concurrently.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
queries: List of search queries
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict mapping query -> count
|
|
216
|
+
|
|
217
|
+
Example:
|
|
218
|
+
>>> counts = await count_many(["CRISPR", "machine learning"])
|
|
219
|
+
>>> print(counts)
|
|
220
|
+
{'CRISPR': 45000, 'machine learning': 477922}
|
|
221
|
+
"""
|
|
222
|
+
tasks = [count(q) for q in queries]
|
|
223
|
+
results = await asyncio.gather(*tasks)
|
|
224
|
+
return dict(zip(queries, results))
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
__all__ = [
|
|
228
|
+
"search",
|
|
229
|
+
"count",
|
|
230
|
+
"get",
|
|
231
|
+
"get_many",
|
|
232
|
+
"exists",
|
|
233
|
+
"info",
|
|
234
|
+
"search_many",
|
|
235
|
+
"count_many",
|
|
236
|
+
]
|
crossref_local/api.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Main API for crossref_local."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from .config import Config
|
|
6
|
+
from .db import Database, get_db, close_db, connection
|
|
7
|
+
from .models import Work, SearchResult
|
|
8
|
+
from . import fts
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def search(
|
|
12
|
+
query: str,
|
|
13
|
+
limit: int = 10,
|
|
14
|
+
offset: int = 0,
|
|
15
|
+
) -> SearchResult:
|
|
16
|
+
"""
|
|
17
|
+
Full-text search across works.
|
|
18
|
+
|
|
19
|
+
Uses FTS5 index for fast searching across titles, abstracts, and authors.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
query: Search query (supports FTS5 syntax)
|
|
23
|
+
limit: Maximum results to return
|
|
24
|
+
offset: Skip first N results (for pagination)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
SearchResult with matching works
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
>>> from crossref_local import search
|
|
31
|
+
>>> results = search("machine learning")
|
|
32
|
+
>>> print(f"Found {results.total} matches")
|
|
33
|
+
"""
|
|
34
|
+
return fts.search(query, limit, offset)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def count(query: str) -> int:
|
|
38
|
+
"""
|
|
39
|
+
Count matching works without fetching results.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
query: FTS5 search query
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Number of matching works
|
|
46
|
+
"""
|
|
47
|
+
return fts.count(query)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get(doi: str) -> Optional[Work]:
|
|
51
|
+
"""
|
|
52
|
+
Get a work by DOI.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
doi: Digital Object Identifier
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Work object or None if not found
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> from crossref_local import get
|
|
62
|
+
>>> work = get("10.1038/nature12373")
|
|
63
|
+
>>> print(work.title)
|
|
64
|
+
"""
|
|
65
|
+
db = get_db()
|
|
66
|
+
metadata = db.get_metadata(doi)
|
|
67
|
+
if metadata:
|
|
68
|
+
return Work.from_metadata(doi, metadata)
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_many(dois: List[str]) -> List[Work]:
|
|
73
|
+
"""
|
|
74
|
+
Get multiple works by DOI.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
dois: List of DOIs
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List of Work objects (missing DOIs are skipped)
|
|
81
|
+
"""
|
|
82
|
+
db = get_db()
|
|
83
|
+
works = []
|
|
84
|
+
for doi in dois:
|
|
85
|
+
metadata = db.get_metadata(doi)
|
|
86
|
+
if metadata:
|
|
87
|
+
works.append(Work.from_metadata(doi, metadata))
|
|
88
|
+
return works
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def exists(doi: str) -> bool:
|
|
92
|
+
"""
|
|
93
|
+
Check if a DOI exists in the database.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
doi: Digital Object Identifier
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
True if DOI exists
|
|
100
|
+
"""
|
|
101
|
+
db = get_db()
|
|
102
|
+
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
|
|
103
|
+
return row is not None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def configure(db_path: str) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Configure database path.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
db_path: Path to CrossRef SQLite database
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> from crossref_local import configure
|
|
115
|
+
>>> configure("/path/to/crossref.db")
|
|
116
|
+
"""
|
|
117
|
+
Config.set_db_path(db_path)
|
|
118
|
+
close_db() # Reset singleton to use new path
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def info() -> dict:
|
|
122
|
+
"""
|
|
123
|
+
Get database information.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Dictionary with database stats
|
|
127
|
+
"""
|
|
128
|
+
db = get_db()
|
|
129
|
+
|
|
130
|
+
# Get work count
|
|
131
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works")
|
|
132
|
+
work_count = row["count"] if row else 0
|
|
133
|
+
|
|
134
|
+
# Get FTS count
|
|
135
|
+
try:
|
|
136
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
|
|
137
|
+
fts_count = row["count"] if row else 0
|
|
138
|
+
except Exception:
|
|
139
|
+
fts_count = 0
|
|
140
|
+
|
|
141
|
+
# Get citations count
|
|
142
|
+
try:
|
|
143
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM citations")
|
|
144
|
+
citation_count = row["count"] if row else 0
|
|
145
|
+
except Exception:
|
|
146
|
+
citation_count = 0
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
"db_path": str(Config.get_db_path()),
|
|
150
|
+
"works": work_count,
|
|
151
|
+
"fts_indexed": fts_count,
|
|
152
|
+
"citations": citation_count,
|
|
153
|
+
}
|