crossref-local 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +86 -22
- crossref_local/__main__.py +6 -0
- crossref_local/aio.py +0 -0
- crossref_local/api.py +148 -5
- crossref_local/cache.py +466 -0
- crossref_local/cache_export.py +83 -0
- crossref_local/cache_viz.py +296 -0
- crossref_local/citations.py +0 -0
- crossref_local/cli.py +358 -97
- crossref_local/cli_cache.py +179 -0
- crossref_local/cli_completion.py +245 -0
- crossref_local/cli_main.py +20 -0
- crossref_local/cli_mcp.py +275 -0
- crossref_local/config.py +99 -3
- crossref_local/db.py +3 -1
- crossref_local/fts.py +38 -4
- crossref_local/impact_factor/__init__.py +0 -0
- crossref_local/impact_factor/calculator.py +0 -0
- crossref_local/impact_factor/journal_lookup.py +0 -0
- crossref_local/mcp_server.py +413 -0
- crossref_local/models.py +0 -0
- crossref_local/remote.py +269 -0
- crossref_local/server.py +352 -0
- {crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/METADATA +152 -7
- crossref_local-0.4.0.dist-info/RECORD +27 -0
- crossref_local-0.4.0.dist-info/entry_points.txt +3 -0
- crossref_local-0.3.0.dist-info/RECORD +0 -16
- crossref_local-0.3.0.dist-info/entry_points.txt +0 -2
- {crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/WHEEL +0 -0
crossref_local/__init__.py
CHANGED
|
@@ -4,6 +4,9 @@ crossref_local - Local CrossRef database with full-text search.
|
|
|
4
4
|
A Python package for querying a local mirror of the CrossRef database
|
|
5
5
|
with 167M+ scholarly works, full-text search, and impact factor calculation.
|
|
6
6
|
|
|
7
|
+
Quick Start
|
|
8
|
+
-----------
|
|
9
|
+
|
|
7
10
|
Sync usage:
|
|
8
11
|
>>> from crossref_local import search, get
|
|
9
12
|
>>> results = search("hippocampal sharp wave ripples")
|
|
@@ -14,65 +17,126 @@ Async usage:
|
|
|
14
17
|
>>> results = await aio.search("machine learning")
|
|
15
18
|
>>> counts = await aio.count_many(["CRISPR", "neural network"])
|
|
16
19
|
|
|
17
|
-
Configuration
|
|
20
|
+
Configuration
|
|
21
|
+
-------------
|
|
22
|
+
|
|
23
|
+
DB mode (direct database access):
|
|
18
24
|
>>> from crossref_local import configure
|
|
19
25
|
>>> configure("/path/to/crossref.db")
|
|
20
|
-
|
|
21
26
|
Or set CROSSREF_LOCAL_DB environment variable.
|
|
27
|
+
|
|
28
|
+
HTTP mode (API access via HTTP):
|
|
29
|
+
>>> from crossref_local import configure_http
|
|
30
|
+
>>> configure_http("http://localhost:8333")
|
|
31
|
+
Or set CROSSREF_LOCAL_API_URL environment variable.
|
|
32
|
+
|
|
33
|
+
Typical setup with SSH tunnel:
|
|
34
|
+
$ ssh -L 8333:127.0.0.1:8333 your-server # In terminal
|
|
35
|
+
>>> configure_http() # Uses default localhost:8333
|
|
36
|
+
|
|
37
|
+
Public API
|
|
38
|
+
----------
|
|
39
|
+
|
|
40
|
+
Functions:
|
|
41
|
+
search(query, limit, offset) -> SearchResult
|
|
42
|
+
count(query) -> int
|
|
43
|
+
get(doi) -> Work | None
|
|
44
|
+
get_many(dois) -> list[Work]
|
|
45
|
+
exists(doi) -> bool
|
|
46
|
+
enrich(results) -> SearchResult
|
|
47
|
+
enrich_dois(dois) -> list[Work]
|
|
48
|
+
configure(db_path) -> None
|
|
49
|
+
configure_remote(api_url) -> None
|
|
50
|
+
get_mode() -> str
|
|
51
|
+
info() -> dict
|
|
52
|
+
|
|
53
|
+
Citation functions:
|
|
54
|
+
get_citing(doi) -> list[str]
|
|
55
|
+
get_cited(doi) -> list[str]
|
|
56
|
+
get_citation_count(doi) -> int
|
|
57
|
+
|
|
58
|
+
Classes:
|
|
59
|
+
Work - Scholarly work with title, authors, DOI, etc.
|
|
60
|
+
SearchResult - Container for search results
|
|
61
|
+
CitationNetwork - Citation graph builder and visualizer
|
|
62
|
+
|
|
63
|
+
Modules:
|
|
64
|
+
aio - Async versions of all API functions
|
|
22
65
|
"""
|
|
23
66
|
|
|
24
|
-
__version__ = "0.3.
|
|
67
|
+
__version__ = "0.3.1"
|
|
25
68
|
|
|
26
|
-
# Core API
|
|
69
|
+
# Core API (public functions)
|
|
27
70
|
from .api import (
|
|
28
71
|
search,
|
|
29
72
|
count,
|
|
30
73
|
get,
|
|
31
74
|
get_many,
|
|
32
75
|
exists,
|
|
76
|
+
enrich,
|
|
77
|
+
enrich_dois,
|
|
33
78
|
configure,
|
|
79
|
+
configure_http,
|
|
80
|
+
configure_remote, # Backward compatibility alias
|
|
81
|
+
get_mode,
|
|
34
82
|
info,
|
|
35
83
|
)
|
|
36
84
|
|
|
37
|
-
# Models
|
|
85
|
+
# Models (public classes)
|
|
38
86
|
from .models import Work, SearchResult
|
|
39
87
|
|
|
40
|
-
#
|
|
41
|
-
from .db import Database, connection
|
|
42
|
-
|
|
43
|
-
# Configuration
|
|
44
|
-
from .config import Config
|
|
45
|
-
|
|
46
|
-
# Async API
|
|
88
|
+
# Async API (public module)
|
|
47
89
|
from . import aio
|
|
48
90
|
|
|
49
|
-
# Citation network
|
|
91
|
+
# Citation network (public functions and classes)
|
|
50
92
|
from .citations import get_citing, get_cited, get_citation_count, CitationNetwork
|
|
51
93
|
|
|
94
|
+
# Cache module (public)
|
|
95
|
+
from . import cache
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Public API - what users should import
|
|
52
99
|
__all__ = [
|
|
53
100
|
# Version
|
|
54
101
|
"__version__",
|
|
55
|
-
# Core
|
|
102
|
+
# Core search/retrieval
|
|
56
103
|
"search",
|
|
57
104
|
"count",
|
|
58
105
|
"get",
|
|
59
106
|
"get_many",
|
|
60
107
|
"exists",
|
|
108
|
+
# Enrichment (add citations/references to search results)
|
|
109
|
+
"enrich",
|
|
110
|
+
"enrich_dois",
|
|
111
|
+
# Configuration
|
|
61
112
|
"configure",
|
|
113
|
+
"configure_http",
|
|
114
|
+
"configure_remote", # Backward compatibility alias
|
|
115
|
+
"get_mode",
|
|
62
116
|
"info",
|
|
63
|
-
#
|
|
117
|
+
# Data models
|
|
64
118
|
"Work",
|
|
65
119
|
"SearchResult",
|
|
66
|
-
#
|
|
67
|
-
"Database",
|
|
68
|
-
"connection",
|
|
69
|
-
# Config
|
|
70
|
-
"Config",
|
|
71
|
-
# Async
|
|
120
|
+
# Async API
|
|
72
121
|
"aio",
|
|
73
|
-
#
|
|
122
|
+
# Cache module
|
|
123
|
+
"cache",
|
|
124
|
+
# Citation network
|
|
74
125
|
"get_citing",
|
|
75
126
|
"get_cited",
|
|
76
127
|
"get_citation_count",
|
|
77
128
|
"CitationNetwork",
|
|
78
129
|
]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ============================================================================
|
|
133
|
+
# Advanced / Internal APIs (not in __all__, but importable if needed)
|
|
134
|
+
# ============================================================================
|
|
135
|
+
# These are exposed for advanced users but not part of the stable public API.
|
|
136
|
+
# Use at your own risk - they may change without notice.
|
|
137
|
+
#
|
|
138
|
+
# from crossref_local.db import Database, connection
|
|
139
|
+
# from crossref_local.config import Config
|
|
140
|
+
# from crossref_local.remote import RemoteClient
|
|
141
|
+
# from crossref_local.fts import search_dois
|
|
142
|
+
# ============================================================================
|
crossref_local/aio.py
CHANGED
|
File without changes
|
crossref_local/api.py
CHANGED
|
@@ -1,13 +1,30 @@
|
|
|
1
|
-
"""Main API for crossref_local.
|
|
1
|
+
"""Main API for crossref_local.
|
|
2
|
+
|
|
3
|
+
Supports two modes:
|
|
4
|
+
- db: Direct database access (requires database file)
|
|
5
|
+
- http: HTTP API access (requires API server)
|
|
6
|
+
|
|
7
|
+
Mode is auto-detected or can be set explicitly via:
|
|
8
|
+
- CROSSREF_LOCAL_MODE environment variable ("db" or "http")
|
|
9
|
+
- CROSSREF_LOCAL_API_URL environment variable (API URL)
|
|
10
|
+
- configure() or configure_http() functions
|
|
11
|
+
"""
|
|
2
12
|
|
|
3
13
|
from typing import List, Optional
|
|
4
14
|
|
|
5
15
|
from .config import Config
|
|
6
|
-
from .db import
|
|
16
|
+
from .db import get_db, close_db
|
|
7
17
|
from .models import Work, SearchResult
|
|
8
18
|
from . import fts
|
|
9
19
|
|
|
10
20
|
|
|
21
|
+
def _get_http_client():
|
|
22
|
+
"""Get HTTP client (lazy import to avoid circular dependency)."""
|
|
23
|
+
from .remote import RemoteClient
|
|
24
|
+
|
|
25
|
+
return RemoteClient(Config.get_api_url())
|
|
26
|
+
|
|
27
|
+
|
|
11
28
|
def search(
|
|
12
29
|
query: str,
|
|
13
30
|
limit: int = 10,
|
|
@@ -31,6 +48,9 @@ def search(
|
|
|
31
48
|
>>> results = search("machine learning")
|
|
32
49
|
>>> print(f"Found {results.total} matches")
|
|
33
50
|
"""
|
|
51
|
+
if Config.get_mode() == "http":
|
|
52
|
+
client = _get_http_client()
|
|
53
|
+
return client.search(query=query, limit=limit)
|
|
34
54
|
return fts.search(query, limit, offset)
|
|
35
55
|
|
|
36
56
|
|
|
@@ -44,6 +64,10 @@ def count(query: str) -> int:
|
|
|
44
64
|
Returns:
|
|
45
65
|
Number of matching works
|
|
46
66
|
"""
|
|
67
|
+
if Config.get_mode() == "http":
|
|
68
|
+
client = _get_http_client()
|
|
69
|
+
result = client.search(query=query, limit=1)
|
|
70
|
+
return result.total
|
|
47
71
|
return fts.count(query)
|
|
48
72
|
|
|
49
73
|
|
|
@@ -62,6 +86,9 @@ def get(doi: str) -> Optional[Work]:
|
|
|
62
86
|
>>> work = get("10.1038/nature12373")
|
|
63
87
|
>>> print(work.title)
|
|
64
88
|
"""
|
|
89
|
+
if Config.get_mode() == "http":
|
|
90
|
+
client = _get_http_client()
|
|
91
|
+
return client.get(doi)
|
|
65
92
|
db = get_db()
|
|
66
93
|
metadata = db.get_metadata(doi)
|
|
67
94
|
if metadata:
|
|
@@ -79,6 +106,9 @@ def get_many(dois: List[str]) -> List[Work]:
|
|
|
79
106
|
Returns:
|
|
80
107
|
List of Work objects (missing DOIs are skipped)
|
|
81
108
|
"""
|
|
109
|
+
if Config.get_mode() == "http":
|
|
110
|
+
client = _get_http_client()
|
|
111
|
+
return client.get_many(dois)
|
|
82
112
|
db = get_db()
|
|
83
113
|
works = []
|
|
84
114
|
for doi in dois:
|
|
@@ -98,6 +128,9 @@ def exists(doi: str) -> bool:
|
|
|
98
128
|
Returns:
|
|
99
129
|
True if DOI exists
|
|
100
130
|
"""
|
|
131
|
+
if Config.get_mode() == "http":
|
|
132
|
+
client = _get_http_client()
|
|
133
|
+
return client.exists(doi)
|
|
101
134
|
db = get_db()
|
|
102
135
|
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
|
|
103
136
|
return row is not None
|
|
@@ -105,7 +138,7 @@ def exists(doi: str) -> bool:
|
|
|
105
138
|
|
|
106
139
|
def configure(db_path: str) -> None:
|
|
107
140
|
"""
|
|
108
|
-
Configure database
|
|
141
|
+
Configure for local database access.
|
|
109
142
|
|
|
110
143
|
Args:
|
|
111
144
|
db_path: Path to CrossRef SQLite database
|
|
@@ -118,13 +151,122 @@ def configure(db_path: str) -> None:
|
|
|
118
151
|
close_db() # Reset singleton to use new path
|
|
119
152
|
|
|
120
153
|
|
|
154
|
+
def configure_http(api_url: str = "http://localhost:8333") -> None:
|
|
155
|
+
"""
|
|
156
|
+
Configure for HTTP API access.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
api_url: URL of CrossRef Local API server
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> from crossref_local import configure_http
|
|
163
|
+
>>> configure_http("http://localhost:8333")
|
|
164
|
+
>>> # Or via SSH tunnel:
|
|
165
|
+
>>> # ssh -L 8333:127.0.0.1:8333 your-server
|
|
166
|
+
>>> configure_http() # Uses default localhost:8333
|
|
167
|
+
"""
|
|
168
|
+
Config.set_api_url(api_url)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# Backward compatibility alias
|
|
172
|
+
configure_remote = configure_http
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def enrich(
|
|
176
|
+
results: SearchResult,
|
|
177
|
+
include_citations: bool = True,
|
|
178
|
+
include_references: bool = True,
|
|
179
|
+
) -> SearchResult:
|
|
180
|
+
"""
|
|
181
|
+
Enrich search results with full metadata (citations, references).
|
|
182
|
+
|
|
183
|
+
The search() function returns basic metadata for speed. This function
|
|
184
|
+
fetches full metadata for each work, adding citation counts and references.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
results: SearchResult from search()
|
|
188
|
+
include_citations: Include citation counts
|
|
189
|
+
include_references: Include reference DOIs
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
SearchResult with enriched works
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
>>> from crossref_local import search, enrich
|
|
196
|
+
>>> results = search("machine learning", limit=10)
|
|
197
|
+
>>> enriched = enrich(results)
|
|
198
|
+
>>> for work in enriched:
|
|
199
|
+
... print(f"{work.title}: {work.citation_count} citations")
|
|
200
|
+
"""
|
|
201
|
+
enriched_works = []
|
|
202
|
+
for work in results.works:
|
|
203
|
+
full_work = get(work.doi)
|
|
204
|
+
if full_work:
|
|
205
|
+
enriched_works.append(full_work)
|
|
206
|
+
else:
|
|
207
|
+
# Keep original if full metadata not available
|
|
208
|
+
enriched_works.append(work)
|
|
209
|
+
|
|
210
|
+
return SearchResult(
|
|
211
|
+
works=enriched_works,
|
|
212
|
+
total=results.total,
|
|
213
|
+
query=results.query,
|
|
214
|
+
elapsed_ms=results.elapsed_ms,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def enrich_dois(
|
|
219
|
+
dois: List[str],
|
|
220
|
+
include_citations: bool = True,
|
|
221
|
+
include_references: bool = True,
|
|
222
|
+
) -> List[Work]:
|
|
223
|
+
"""
|
|
224
|
+
Enrich a list of DOIs with full metadata.
|
|
225
|
+
|
|
226
|
+
Fetches complete metadata for each DOI including citation counts
|
|
227
|
+
and reference lists.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
dois: List of DOIs to enrich
|
|
231
|
+
include_citations: Include citation counts
|
|
232
|
+
include_references: Include reference DOIs
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
List of Work objects with full metadata
|
|
236
|
+
|
|
237
|
+
Example:
|
|
238
|
+
>>> from crossref_local import enrich_dois
|
|
239
|
+
>>> works = enrich_dois(["10.1038/nature12373", "10.1126/science.aax0758"])
|
|
240
|
+
>>> for w in works:
|
|
241
|
+
... print(f"{w.doi}: {w.citation_count} citations, {len(w.references)} refs")
|
|
242
|
+
"""
|
|
243
|
+
return get_many(dois)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def get_mode() -> str:
|
|
247
|
+
"""
|
|
248
|
+
Get current mode.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
"db" or "http"
|
|
252
|
+
"""
|
|
253
|
+
return Config.get_mode()
|
|
254
|
+
|
|
255
|
+
|
|
121
256
|
def info() -> dict:
|
|
122
257
|
"""
|
|
123
|
-
Get database information.
|
|
258
|
+
Get database/API information.
|
|
124
259
|
|
|
125
260
|
Returns:
|
|
126
|
-
Dictionary with database stats
|
|
261
|
+
Dictionary with database stats and mode info
|
|
127
262
|
"""
|
|
263
|
+
mode = Config.get_mode()
|
|
264
|
+
|
|
265
|
+
if mode == "http":
|
|
266
|
+
client = _get_http_client()
|
|
267
|
+
http_info = client.info()
|
|
268
|
+
return {"mode": "http", **http_info}
|
|
269
|
+
|
|
128
270
|
db = get_db()
|
|
129
271
|
|
|
130
272
|
# Get work count
|
|
@@ -146,6 +288,7 @@ def info() -> dict:
|
|
|
146
288
|
citation_count = 0
|
|
147
289
|
|
|
148
290
|
return {
|
|
291
|
+
"mode": "db",
|
|
149
292
|
"db_path": str(Config.get_db_path()),
|
|
150
293
|
"works": work_count,
|
|
151
294
|
"fts_indexed": fts_count,
|