crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +38 -16
- crossref_local/__main__.py +0 -0
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/_cache/export.py +100 -0
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cache/viz.py +296 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cache.py +179 -0
- crossref_local/_cli/cli.py +512 -0
- crossref_local/_cli/completion.py +245 -0
- crossref_local/_cli/main.py +20 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +413 -0
- crossref_local/_core/__init__.py +58 -0
- crossref_local/{api.py → _core/api.py} +130 -36
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +57 -42
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/{fts.py → _core/fts.py} +18 -14
- crossref_local/{models.py → _core/models.py} +11 -6
- crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +356 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +129 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +128 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +466 -0
- crossref_local/cli.py +5 -447
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -199
- crossref_local/remote.py +5 -261
- crossref_local/server.py +5 -349
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
- crossref_local-0.5.0.dist-info/RECORD +47 -0
- crossref_local-0.3.1.dist-info/RECORD +0 -20
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,26 +1,45 @@
|
|
|
1
1
|
"""Main API for crossref_local.
|
|
2
2
|
|
|
3
3
|
Supports two modes:
|
|
4
|
-
-
|
|
5
|
-
-
|
|
4
|
+
- db: Direct database access (requires database file)
|
|
5
|
+
- http: HTTP API access (requires API server)
|
|
6
6
|
|
|
7
7
|
Mode is auto-detected or can be set explicitly via:
|
|
8
|
-
- CROSSREF_LOCAL_MODE environment variable ("
|
|
9
|
-
-
|
|
10
|
-
- configure() or
|
|
8
|
+
- CROSSREF_LOCAL_MODE environment variable ("db" or "http")
|
|
9
|
+
- CROSSREF_LOCAL_API_URL environment variable (API URL)
|
|
10
|
+
- configure() or configure_http() functions
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
from typing import List, Optional
|
|
14
14
|
|
|
15
|
-
from .config import Config
|
|
16
|
-
from .db import get_db, close_db
|
|
17
|
-
from .models import Work, SearchResult
|
|
18
15
|
from . import fts
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
from .config import Config
|
|
17
|
+
from .db import close_db, get_db
|
|
18
|
+
from .models import SearchResult, Work
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"search",
|
|
22
|
+
"count",
|
|
23
|
+
"get",
|
|
24
|
+
"get_many",
|
|
25
|
+
"exists",
|
|
26
|
+
"configure",
|
|
27
|
+
"configure_http",
|
|
28
|
+
"configure_remote",
|
|
29
|
+
"enrich",
|
|
30
|
+
"enrich_dois",
|
|
31
|
+
"get_mode",
|
|
32
|
+
"info",
|
|
33
|
+
# Re-exported for convenience
|
|
34
|
+
"Work",
|
|
35
|
+
"SearchResult",
|
|
36
|
+
"Config",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _get_http_client():
|
|
41
|
+
"""Get HTTP client (lazy import to avoid circular dependency)."""
|
|
42
|
+
from .._remote import RemoteClient # Uses enhanced client with collections
|
|
24
43
|
|
|
25
44
|
return RemoteClient(Config.get_api_url())
|
|
26
45
|
|
|
@@ -48,9 +67,9 @@ def search(
|
|
|
48
67
|
>>> results = search("machine learning")
|
|
49
68
|
>>> print(f"Found {results.total} matches")
|
|
50
69
|
"""
|
|
51
|
-
if Config.get_mode() == "
|
|
52
|
-
client =
|
|
53
|
-
return client.search(query=query, limit=limit)
|
|
70
|
+
if Config.get_mode() == "http":
|
|
71
|
+
client = _get_http_client()
|
|
72
|
+
return client.search(query=query, limit=limit, offset=offset)
|
|
54
73
|
return fts.search(query, limit, offset)
|
|
55
74
|
|
|
56
75
|
|
|
@@ -64,8 +83,8 @@ def count(query: str) -> int:
|
|
|
64
83
|
Returns:
|
|
65
84
|
Number of matching works
|
|
66
85
|
"""
|
|
67
|
-
if Config.get_mode() == "
|
|
68
|
-
client =
|
|
86
|
+
if Config.get_mode() == "http":
|
|
87
|
+
client = _get_http_client()
|
|
69
88
|
result = client.search(query=query, limit=1)
|
|
70
89
|
return result.total
|
|
71
90
|
return fts.count(query)
|
|
@@ -86,8 +105,8 @@ def get(doi: str) -> Optional[Work]:
|
|
|
86
105
|
>>> work = get("10.1038/nature12373")
|
|
87
106
|
>>> print(work.title)
|
|
88
107
|
"""
|
|
89
|
-
if Config.get_mode() == "
|
|
90
|
-
client =
|
|
108
|
+
if Config.get_mode() == "http":
|
|
109
|
+
client = _get_http_client()
|
|
91
110
|
return client.get(doi)
|
|
92
111
|
db = get_db()
|
|
93
112
|
metadata = db.get_metadata(doi)
|
|
@@ -106,8 +125,8 @@ def get_many(dois: List[str]) -> List[Work]:
|
|
|
106
125
|
Returns:
|
|
107
126
|
List of Work objects (missing DOIs are skipped)
|
|
108
127
|
"""
|
|
109
|
-
if Config.get_mode() == "
|
|
110
|
-
client =
|
|
128
|
+
if Config.get_mode() == "http":
|
|
129
|
+
client = _get_http_client()
|
|
111
130
|
return client.get_many(dois)
|
|
112
131
|
db = get_db()
|
|
113
132
|
works = []
|
|
@@ -128,8 +147,8 @@ def exists(doi: str) -> bool:
|
|
|
128
147
|
Returns:
|
|
129
148
|
True if DOI exists
|
|
130
149
|
"""
|
|
131
|
-
if Config.get_mode() == "
|
|
132
|
-
client =
|
|
150
|
+
if Config.get_mode() == "http":
|
|
151
|
+
client = _get_http_client()
|
|
133
152
|
return client.exists(doi)
|
|
134
153
|
db = get_db()
|
|
135
154
|
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (doi,))
|
|
@@ -151,29 +170,104 @@ def configure(db_path: str) -> None:
|
|
|
151
170
|
close_db() # Reset singleton to use new path
|
|
152
171
|
|
|
153
172
|
|
|
154
|
-
def
|
|
173
|
+
def configure_http(api_url: str = "http://localhost:8333") -> None:
|
|
155
174
|
"""
|
|
156
|
-
Configure for
|
|
175
|
+
Configure for HTTP API access.
|
|
157
176
|
|
|
158
177
|
Args:
|
|
159
178
|
api_url: URL of CrossRef Local API server
|
|
160
179
|
|
|
161
180
|
Example:
|
|
162
|
-
>>> from crossref_local import
|
|
163
|
-
>>>
|
|
181
|
+
>>> from crossref_local import configure_http
|
|
182
|
+
>>> configure_http("http://localhost:8333")
|
|
164
183
|
>>> # Or via SSH tunnel:
|
|
165
|
-
>>> # ssh -L
|
|
166
|
-
>>>
|
|
184
|
+
>>> # ssh -L 8333:127.0.0.1:8333 your-server
|
|
185
|
+
>>> configure_http() # Uses default localhost:8333
|
|
167
186
|
"""
|
|
168
187
|
Config.set_api_url(api_url)
|
|
169
188
|
|
|
170
189
|
|
|
190
|
+
# Backward compatibility alias
|
|
191
|
+
configure_remote = configure_http
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def enrich(
|
|
195
|
+
results: SearchResult,
|
|
196
|
+
include_citations: bool = True,
|
|
197
|
+
include_references: bool = True,
|
|
198
|
+
) -> SearchResult:
|
|
199
|
+
"""
|
|
200
|
+
Enrich search results with full metadata (citations, references).
|
|
201
|
+
|
|
202
|
+
The search() function returns basic metadata for speed. This function
|
|
203
|
+
fetches full metadata for each work, adding citation counts and references.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
results: SearchResult from search()
|
|
207
|
+
include_citations: Include citation counts
|
|
208
|
+
include_references: Include reference DOIs
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
SearchResult with enriched works
|
|
212
|
+
|
|
213
|
+
Example:
|
|
214
|
+
>>> from crossref_local import search, enrich
|
|
215
|
+
>>> results = search("machine learning", limit=10)
|
|
216
|
+
>>> enriched = enrich(results)
|
|
217
|
+
>>> for work in enriched:
|
|
218
|
+
... print(f"{work.title}: {work.citation_count} citations")
|
|
219
|
+
"""
|
|
220
|
+
enriched_works = []
|
|
221
|
+
for work in results.works:
|
|
222
|
+
full_work = get(work.doi)
|
|
223
|
+
if full_work:
|
|
224
|
+
enriched_works.append(full_work)
|
|
225
|
+
else:
|
|
226
|
+
# Keep original if full metadata not available
|
|
227
|
+
enriched_works.append(work)
|
|
228
|
+
|
|
229
|
+
return SearchResult(
|
|
230
|
+
works=enriched_works,
|
|
231
|
+
total=results.total,
|
|
232
|
+
query=results.query,
|
|
233
|
+
elapsed_ms=results.elapsed_ms,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def enrich_dois(
|
|
238
|
+
dois: List[str],
|
|
239
|
+
include_citations: bool = True,
|
|
240
|
+
include_references: bool = True,
|
|
241
|
+
) -> List[Work]:
|
|
242
|
+
"""
|
|
243
|
+
Enrich a list of DOIs with full metadata.
|
|
244
|
+
|
|
245
|
+
Fetches complete metadata for each DOI including citation counts
|
|
246
|
+
and reference lists.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
dois: List of DOIs to enrich
|
|
250
|
+
include_citations: Include citation counts
|
|
251
|
+
include_references: Include reference DOIs
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
List of Work objects with full metadata
|
|
255
|
+
|
|
256
|
+
Example:
|
|
257
|
+
>>> from crossref_local import enrich_dois
|
|
258
|
+
>>> works = enrich_dois(["10.1038/nature12373", "10.1126/science.aax0758"])
|
|
259
|
+
>>> for w in works:
|
|
260
|
+
... print(f"{w.doi}: {w.citation_count} citations, {len(w.references)} refs")
|
|
261
|
+
"""
|
|
262
|
+
return get_many(dois)
|
|
263
|
+
|
|
264
|
+
|
|
171
265
|
def get_mode() -> str:
|
|
172
266
|
"""
|
|
173
267
|
Get current mode.
|
|
174
268
|
|
|
175
269
|
Returns:
|
|
176
|
-
"
|
|
270
|
+
"db" or "http"
|
|
177
271
|
"""
|
|
178
272
|
return Config.get_mode()
|
|
179
273
|
|
|
@@ -187,10 +281,10 @@ def info() -> dict:
|
|
|
187
281
|
"""
|
|
188
282
|
mode = Config.get_mode()
|
|
189
283
|
|
|
190
|
-
if mode == "
|
|
191
|
-
client =
|
|
192
|
-
|
|
193
|
-
return {"mode": "
|
|
284
|
+
if mode == "http":
|
|
285
|
+
client = _get_http_client()
|
|
286
|
+
http_info = client.info()
|
|
287
|
+
return {"mode": "http", **http_info}
|
|
194
288
|
|
|
195
289
|
db = get_db()
|
|
196
290
|
|
|
@@ -213,7 +307,7 @@ def info() -> dict:
|
|
|
213
307
|
citation_count = 0
|
|
214
308
|
|
|
215
309
|
return {
|
|
216
|
-
"mode": "
|
|
310
|
+
"mode": "db",
|
|
217
311
|
"db_path": str(Config.get_db_path()),
|
|
218
312
|
"works": work_count,
|
|
219
313
|
"fts_indexed": fts_count,
|
|
@@ -16,13 +16,22 @@ Usage:
|
|
|
16
16
|
network.save_html("citation_network.html")
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
from dataclasses import dataclass
|
|
20
|
-
from
|
|
21
|
-
from
|
|
19
|
+
from dataclasses import dataclass as _dataclass
|
|
20
|
+
from dataclasses import field as _field
|
|
21
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
22
22
|
|
|
23
|
-
from .db import
|
|
23
|
+
from .db import Database, get_db
|
|
24
24
|
from .models import Work
|
|
25
25
|
|
|
26
|
+
__all__ = [
|
|
27
|
+
"get_citing",
|
|
28
|
+
"get_cited",
|
|
29
|
+
"get_citation_count",
|
|
30
|
+
"CitationNode",
|
|
31
|
+
"CitationEdge",
|
|
32
|
+
"CitationNetwork",
|
|
33
|
+
]
|
|
34
|
+
|
|
26
35
|
|
|
27
36
|
def get_citing(doi: str, limit: int = 100, db: Optional[Database] = None) -> List[str]:
|
|
28
37
|
"""
|
|
@@ -46,7 +55,7 @@ def get_citing(doi: str, limit: int = 100, db: Optional[Database] = None) -> Lis
|
|
|
46
55
|
WHERE cited_doi = ?
|
|
47
56
|
LIMIT ?
|
|
48
57
|
""",
|
|
49
|
-
(doi, limit)
|
|
58
|
+
(doi, limit),
|
|
50
59
|
)
|
|
51
60
|
return [row["citing_doi"] for row in rows]
|
|
52
61
|
|
|
@@ -73,7 +82,7 @@ def get_cited(doi: str, limit: int = 100, db: Optional[Database] = None) -> List
|
|
|
73
82
|
WHERE citing_doi = ?
|
|
74
83
|
LIMIT ?
|
|
75
84
|
""",
|
|
76
|
-
(doi, limit)
|
|
85
|
+
(doi, limit),
|
|
77
86
|
)
|
|
78
87
|
return [row["cited_doi"] for row in rows]
|
|
79
88
|
|
|
@@ -93,18 +102,18 @@ def get_citation_count(doi: str, db: Optional[Database] = None) -> int:
|
|
|
93
102
|
db = get_db()
|
|
94
103
|
|
|
95
104
|
row = db.fetchone(
|
|
96
|
-
"SELECT COUNT(*) as count FROM citations WHERE cited_doi = ?",
|
|
97
|
-
(doi,)
|
|
105
|
+
"SELECT COUNT(*) as count FROM citations WHERE cited_doi = ?", (doi,)
|
|
98
106
|
)
|
|
99
107
|
return row["count"] if row else 0
|
|
100
108
|
|
|
101
109
|
|
|
102
|
-
@
|
|
110
|
+
@_dataclass
|
|
103
111
|
class CitationNode:
|
|
104
112
|
"""A node in the citation network."""
|
|
113
|
+
|
|
105
114
|
doi: str
|
|
106
115
|
title: str = ""
|
|
107
|
-
authors: List[str] =
|
|
116
|
+
authors: List[str] = _field(default_factory=list)
|
|
108
117
|
year: Optional[int] = None
|
|
109
118
|
journal: str = ""
|
|
110
119
|
citation_count: int = 0
|
|
@@ -122,9 +131,10 @@ class CitationNode:
|
|
|
122
131
|
}
|
|
123
132
|
|
|
124
133
|
|
|
125
|
-
@
|
|
134
|
+
@_dataclass
|
|
126
135
|
class CitationEdge:
|
|
127
136
|
"""An edge in the citation network (citing -> cited)."""
|
|
137
|
+
|
|
128
138
|
citing_doi: str
|
|
129
139
|
cited_doi: str
|
|
130
140
|
year: Optional[int] = None
|
|
@@ -272,6 +282,8 @@ class CitationNetwork:
|
|
|
272
282
|
Raises:
|
|
273
283
|
ImportError: If pyvis is not installed
|
|
274
284
|
"""
|
|
285
|
+
import math as _math
|
|
286
|
+
|
|
275
287
|
try:
|
|
276
288
|
from pyvis.network import Network
|
|
277
289
|
except ImportError:
|
|
@@ -284,7 +296,7 @@ class CitationNetwork:
|
|
|
284
296
|
directed=True,
|
|
285
297
|
bgcolor="#ffffff",
|
|
286
298
|
font_color="#333333",
|
|
287
|
-
**kwargs
|
|
299
|
+
**kwargs,
|
|
288
300
|
)
|
|
289
301
|
|
|
290
302
|
# Configure physics
|
|
@@ -298,15 +310,16 @@ class CitationNetwork:
|
|
|
298
310
|
# Add nodes with styling based on depth and citation count
|
|
299
311
|
for doi, node in self.nodes.items():
|
|
300
312
|
# Size based on citation count (log scale)
|
|
301
|
-
|
|
302
|
-
size = 10 + min(30, math.log1p(node.citation_count) * 5)
|
|
313
|
+
size = 10 + min(30, _math.log1p(node.citation_count) * 5)
|
|
303
314
|
|
|
304
315
|
# Color based on depth
|
|
305
316
|
colors = ["#e74c3c", "#3498db", "#2ecc71", "#9b59b6", "#f39c12"]
|
|
306
317
|
color = colors[min(node.depth, len(colors) - 1)]
|
|
307
318
|
|
|
308
319
|
# Label
|
|
309
|
-
title_short = (
|
|
320
|
+
title_short = (
|
|
321
|
+
(node.title[:50] + "...") if len(node.title) > 50 else node.title
|
|
322
|
+
)
|
|
310
323
|
label = f"{title_short}\n({node.year or 'N/A'})"
|
|
311
324
|
|
|
312
325
|
# Tooltip
|
|
@@ -316,7 +329,7 @@ class CitationNetwork:
|
|
|
316
329
|
tooltip = f"""
|
|
317
330
|
<b>{node.title}</b><br>
|
|
318
331
|
{authors_str}<br>
|
|
319
|
-
{node.journal} ({node.year or
|
|
332
|
+
{node.journal} ({node.year or "N/A"})<br>
|
|
320
333
|
Citations: {node.citation_count}<br>
|
|
321
334
|
DOI: {doi}
|
|
322
335
|
"""
|
|
@@ -340,7 +353,9 @@ class CitationNetwork:
|
|
|
340
353
|
net.save_graph(path)
|
|
341
354
|
return path
|
|
342
355
|
|
|
343
|
-
def save_png(
|
|
356
|
+
def save_png(
|
|
357
|
+
self, path: str = "citation_network.png", figsize: Tuple[int, int] = (12, 10)
|
|
358
|
+
):
|
|
344
359
|
"""
|
|
345
360
|
Save static PNG visualization using matplotlib.
|
|
346
361
|
|
|
@@ -351,6 +366,8 @@ class CitationNetwork:
|
|
|
351
366
|
Raises:
|
|
352
367
|
ImportError: If matplotlib is not installed
|
|
353
368
|
"""
|
|
369
|
+
import math as _math
|
|
370
|
+
|
|
354
371
|
try:
|
|
355
372
|
import matplotlib.pyplot as plt
|
|
356
373
|
import networkx as nx
|
|
@@ -365,24 +382,34 @@ class CitationNetwork:
|
|
|
365
382
|
pos = nx.spring_layout(G, k=2, iterations=50)
|
|
366
383
|
|
|
367
384
|
# Node sizes based on citation count
|
|
368
|
-
|
|
369
|
-
|
|
385
|
+
sizes = [
|
|
386
|
+
100 + min(500, _math.log1p(self.nodes[n].citation_count) * 50)
|
|
387
|
+
for n in G.nodes()
|
|
388
|
+
]
|
|
370
389
|
|
|
371
390
|
# Node colors based on depth
|
|
372
391
|
colors = [self.nodes[n].depth for n in G.nodes()]
|
|
373
392
|
|
|
374
393
|
# Draw
|
|
375
|
-
nx.draw_networkx_nodes(
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
394
|
+
nx.draw_networkx_nodes(
|
|
395
|
+
G,
|
|
396
|
+
pos,
|
|
397
|
+
node_size=sizes,
|
|
398
|
+
node_color=colors,
|
|
399
|
+
cmap=plt.cm.RdYlBu_r,
|
|
400
|
+
alpha=0.8,
|
|
401
|
+
ax=ax,
|
|
402
|
+
)
|
|
403
|
+
nx.draw_networkx_edges(G, pos, alpha=0.3, arrows=True, arrowsize=10, ax=ax)
|
|
379
404
|
|
|
380
405
|
# Labels for important nodes (high citation count)
|
|
381
406
|
labels = {}
|
|
382
407
|
for doi in G.nodes():
|
|
383
408
|
node = self.nodes[doi]
|
|
384
409
|
if node.citation_count > 10 or doi == self.center_doi:
|
|
385
|
-
short_title = (
|
|
410
|
+
short_title = (
|
|
411
|
+
(node.title[:30] + "...") if len(node.title) > 30 else node.title
|
|
412
|
+
)
|
|
386
413
|
labels[doi] = f"{short_title}\n({node.year or 'N/A'})"
|
|
387
414
|
|
|
388
415
|
nx.draw_networkx_labels(G, pos, labels, font_size=8, ax=ax)
|
|
@@ -402,11 +429,13 @@ class CitationNetwork:
|
|
|
402
429
|
"center_doi": self.center_doi,
|
|
403
430
|
"depth": self.depth,
|
|
404
431
|
"nodes": [n.to_dict() for n in self.nodes.values()],
|
|
405
|
-
"edges": [
|
|
432
|
+
"edges": [
|
|
433
|
+
{"citing": e.citing_doi, "cited": e.cited_doi} for e in self.edges
|
|
434
|
+
],
|
|
406
435
|
"stats": {
|
|
407
436
|
"total_nodes": len(self.nodes),
|
|
408
437
|
"total_edges": len(self.edges),
|
|
409
|
-
}
|
|
438
|
+
},
|
|
410
439
|
}
|
|
411
440
|
|
|
412
441
|
def __repr__(self):
|
|
@@ -1,32 +1,42 @@
|
|
|
1
1
|
"""Configuration for crossref_local."""
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
3
|
+
import os as _os
|
|
4
|
+
from pathlib import Path as _Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Config",
|
|
9
|
+
"get_db_path",
|
|
10
|
+
"DEFAULT_PORT",
|
|
11
|
+
"DEFAULT_API_URL",
|
|
12
|
+
]
|
|
13
|
+
|
|
7
14
|
# Default database locations (checked in order)
|
|
8
15
|
DEFAULT_DB_PATHS = [
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Path("/mnt/nas_ug/crossref_local/data/crossref.db"),
|
|
12
|
-
Path.home() / ".crossref_local" / "crossref.db",
|
|
13
|
-
Path.cwd() / "data" / "crossref.db",
|
|
16
|
+
_Path.cwd() / "data" / "crossref.db",
|
|
17
|
+
_Path.home() / ".crossref_local" / "crossref.db",
|
|
14
18
|
]
|
|
15
19
|
|
|
16
|
-
# Default
|
|
20
|
+
# Default port: SCITEX convention (3129X scheme)
|
|
21
|
+
# 31290: scitex-cloud, 31291: crossref-local, 31292: openalex-local, 31293: audio relay
|
|
22
|
+
DEFAULT_PORT = 31291
|
|
23
|
+
|
|
24
|
+
# Default remote API URLs (checked in order)
|
|
17
25
|
DEFAULT_API_URLS = [
|
|
18
|
-
"http://localhost:
|
|
26
|
+
f"http://localhost:{DEFAULT_PORT}", # SCITEX default
|
|
27
|
+
"http://localhost:8333", # Legacy port (backwards compatibility)
|
|
19
28
|
]
|
|
20
29
|
DEFAULT_API_URL = DEFAULT_API_URLS[0]
|
|
21
30
|
|
|
22
31
|
|
|
23
|
-
def get_db_path() ->
|
|
32
|
+
def get_db_path() -> _Path:
|
|
24
33
|
"""
|
|
25
34
|
Get database path from environment or auto-detect.
|
|
26
35
|
|
|
27
36
|
Priority:
|
|
28
|
-
1.
|
|
29
|
-
2.
|
|
37
|
+
1. SCITEX_SCHOLAR_CROSSREF_DB environment variable
|
|
38
|
+
2. CROSSREF_LOCAL_DB environment variable
|
|
39
|
+
3. First existing path from DEFAULT_DB_PATHS
|
|
30
40
|
|
|
31
41
|
Returns:
|
|
32
42
|
Path to the database file
|
|
@@ -34,13 +44,15 @@ def get_db_path() -> Path:
|
|
|
34
44
|
Raises:
|
|
35
45
|
FileNotFoundError: If no database found
|
|
36
46
|
"""
|
|
37
|
-
# Check environment variable first
|
|
38
|
-
env_path =
|
|
47
|
+
# Check SCITEX environment variable first (takes priority)
|
|
48
|
+
env_path = _os.environ.get("SCITEX_SCHOLAR_CROSSREF_DB")
|
|
49
|
+
if not env_path:
|
|
50
|
+
env_path = _os.environ.get("CROSSREF_LOCAL_DB")
|
|
39
51
|
if env_path:
|
|
40
|
-
path =
|
|
52
|
+
path = _Path(env_path)
|
|
41
53
|
if path.exists():
|
|
42
54
|
return path
|
|
43
|
-
raise FileNotFoundError(f"
|
|
55
|
+
raise FileNotFoundError(f"Database path not found: {env_path}")
|
|
44
56
|
|
|
45
57
|
# Auto-detect from default locations
|
|
46
58
|
for path in DEFAULT_DB_PATHS:
|
|
@@ -56,9 +68,9 @@ def get_db_path() -> Path:
|
|
|
56
68
|
class Config:
|
|
57
69
|
"""Configuration container."""
|
|
58
70
|
|
|
59
|
-
_db_path: Optional[
|
|
71
|
+
_db_path: Optional[_Path] = None
|
|
60
72
|
_api_url: Optional[str] = None
|
|
61
|
-
_mode: str = "auto" # "auto", "
|
|
73
|
+
_mode: str = "auto" # "auto", "db", or "http"
|
|
62
74
|
|
|
63
75
|
@classmethod
|
|
64
76
|
def get_mode(cls) -> str:
|
|
@@ -66,53 +78,56 @@ class Config:
|
|
|
66
78
|
Get current mode.
|
|
67
79
|
|
|
68
80
|
Returns:
|
|
69
|
-
"
|
|
70
|
-
"
|
|
81
|
+
"db" if using direct database access
|
|
82
|
+
"http" if using HTTP API
|
|
71
83
|
"""
|
|
72
84
|
if cls._mode == "auto":
|
|
73
|
-
# Check environment
|
|
74
|
-
env_mode =
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
85
|
+
# Check environment variables (SCITEX takes priority)
|
|
86
|
+
env_mode = _os.environ.get(
|
|
87
|
+
"SCITEX_SCHOLAR_CROSSREF_MODE",
|
|
88
|
+
_os.environ.get("CROSSREF_LOCAL_MODE", ""),
|
|
89
|
+
).lower()
|
|
90
|
+
if env_mode in ("http", "remote", "api"):
|
|
91
|
+
return "http"
|
|
92
|
+
if env_mode in ("db", "local"):
|
|
93
|
+
return "db"
|
|
79
94
|
|
|
80
95
|
# Check if API URL is set
|
|
81
|
-
if cls._api_url or
|
|
82
|
-
return "
|
|
96
|
+
if cls._api_url or _os.environ.get("CROSSREF_LOCAL_API_URL"):
|
|
97
|
+
return "http"
|
|
83
98
|
|
|
84
99
|
# Check if local database exists
|
|
85
100
|
try:
|
|
86
101
|
get_db_path()
|
|
87
|
-
return "
|
|
102
|
+
return "db"
|
|
88
103
|
except FileNotFoundError:
|
|
89
|
-
# No local DB, try
|
|
90
|
-
return "
|
|
104
|
+
# No local DB, try http
|
|
105
|
+
return "http"
|
|
91
106
|
|
|
92
107
|
return cls._mode
|
|
93
108
|
|
|
94
109
|
@classmethod
|
|
95
110
|
def set_mode(cls, mode: str) -> None:
|
|
96
|
-
"""Set mode explicitly: '
|
|
97
|
-
if mode not in ("auto", "
|
|
98
|
-
raise ValueError(f"Invalid mode: {mode}. Use 'auto', '
|
|
111
|
+
"""Set mode explicitly: 'db', 'http', or 'auto'."""
|
|
112
|
+
if mode not in ("auto", "db", "http"):
|
|
113
|
+
raise ValueError(f"Invalid mode: {mode}. Use 'auto', 'db', or 'http'")
|
|
99
114
|
cls._mode = mode
|
|
100
115
|
|
|
101
116
|
@classmethod
|
|
102
|
-
def get_db_path(cls) ->
|
|
117
|
+
def get_db_path(cls) -> _Path:
|
|
103
118
|
"""Get or auto-detect database path."""
|
|
104
119
|
if cls._db_path is None:
|
|
105
120
|
cls._db_path = get_db_path()
|
|
106
121
|
return cls._db_path
|
|
107
122
|
|
|
108
123
|
@classmethod
|
|
109
|
-
def set_db_path(cls, path: str |
|
|
124
|
+
def set_db_path(cls, path: str | _Path) -> None:
|
|
110
125
|
"""Set database path explicitly."""
|
|
111
|
-
path =
|
|
126
|
+
path = _Path(path)
|
|
112
127
|
if not path.exists():
|
|
113
128
|
raise FileNotFoundError(f"Database not found: {path}")
|
|
114
129
|
cls._db_path = path
|
|
115
|
-
cls._mode = "
|
|
130
|
+
cls._mode = "db"
|
|
116
131
|
|
|
117
132
|
@classmethod
|
|
118
133
|
def get_api_url(cls, auto_detect: bool = True) -> str:
|
|
@@ -128,7 +143,7 @@ class Config:
|
|
|
128
143
|
if cls._api_url:
|
|
129
144
|
return cls._api_url
|
|
130
145
|
|
|
131
|
-
env_url =
|
|
146
|
+
env_url = _os.environ.get("CROSSREF_LOCAL_API_URL")
|
|
132
147
|
if env_url:
|
|
133
148
|
return env_url
|
|
134
149
|
|
|
@@ -143,8 +158,8 @@ class Config:
|
|
|
143
158
|
@classmethod
|
|
144
159
|
def _find_working_api(cls) -> Optional[str]:
|
|
145
160
|
"""Try each default API URL and return first working one."""
|
|
146
|
-
import urllib.request
|
|
147
161
|
import urllib.error
|
|
162
|
+
import urllib.request
|
|
148
163
|
|
|
149
164
|
for url in DEFAULT_API_URLS:
|
|
150
165
|
try:
|
|
@@ -159,9 +174,9 @@ class Config:
|
|
|
159
174
|
|
|
160
175
|
@classmethod
|
|
161
176
|
def set_api_url(cls, url: str) -> None:
|
|
162
|
-
"""Set API URL for
|
|
177
|
+
"""Set API URL for http mode."""
|
|
163
178
|
cls._api_url = url.rstrip("/")
|
|
164
|
-
cls._mode = "
|
|
179
|
+
cls._mode = "http"
|
|
165
180
|
|
|
166
181
|
@classmethod
|
|
167
182
|
def reset(cls) -> None:
|