crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +18 -10
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/{cache_export.py → _cache/export.py} +27 -10
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cli.py +512 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +413 -0
- crossref_local/_core/__init__.py +58 -0
- crossref_local/{api.py → _core/api.py} +24 -5
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +40 -22
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/{fts.py → _core/fts.py} +18 -14
- crossref_local/{models.py → _core/models.py} +11 -6
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +356 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +129 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +128 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +100 -100
- crossref_local/cli.py +5 -515
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -410
- crossref_local/remote.py +5 -266
- crossref_local/server.py +5 -349
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
- crossref_local-0.5.0.dist-info/RECORD +47 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
- crossref_local/cli_mcp.py +0 -275
- crossref_local-0.4.0.dist-info/RECORD +0 -27
- /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
- /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
- /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
- /crossref_local/{cli_main.py → _cli/main.py} +0 -0
- /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
|
@@ -16,13 +16,22 @@ Usage:
|
|
|
16
16
|
network.save_html("citation_network.html")
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
from dataclasses import dataclass
|
|
20
|
-
from
|
|
21
|
-
from
|
|
19
|
+
from dataclasses import dataclass as _dataclass
|
|
20
|
+
from dataclasses import field as _field
|
|
21
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
22
22
|
|
|
23
|
-
from .db import
|
|
23
|
+
from .db import Database, get_db
|
|
24
24
|
from .models import Work
|
|
25
25
|
|
|
26
|
+
__all__ = [
|
|
27
|
+
"get_citing",
|
|
28
|
+
"get_cited",
|
|
29
|
+
"get_citation_count",
|
|
30
|
+
"CitationNode",
|
|
31
|
+
"CitationEdge",
|
|
32
|
+
"CitationNetwork",
|
|
33
|
+
]
|
|
34
|
+
|
|
26
35
|
|
|
27
36
|
def get_citing(doi: str, limit: int = 100, db: Optional[Database] = None) -> List[str]:
|
|
28
37
|
"""
|
|
@@ -46,7 +55,7 @@ def get_citing(doi: str, limit: int = 100, db: Optional[Database] = None) -> Lis
|
|
|
46
55
|
WHERE cited_doi = ?
|
|
47
56
|
LIMIT ?
|
|
48
57
|
""",
|
|
49
|
-
(doi, limit)
|
|
58
|
+
(doi, limit),
|
|
50
59
|
)
|
|
51
60
|
return [row["citing_doi"] for row in rows]
|
|
52
61
|
|
|
@@ -73,7 +82,7 @@ def get_cited(doi: str, limit: int = 100, db: Optional[Database] = None) -> List
|
|
|
73
82
|
WHERE citing_doi = ?
|
|
74
83
|
LIMIT ?
|
|
75
84
|
""",
|
|
76
|
-
(doi, limit)
|
|
85
|
+
(doi, limit),
|
|
77
86
|
)
|
|
78
87
|
return [row["cited_doi"] for row in rows]
|
|
79
88
|
|
|
@@ -93,18 +102,18 @@ def get_citation_count(doi: str, db: Optional[Database] = None) -> int:
|
|
|
93
102
|
db = get_db()
|
|
94
103
|
|
|
95
104
|
row = db.fetchone(
|
|
96
|
-
"SELECT COUNT(*) as count FROM citations WHERE cited_doi = ?",
|
|
97
|
-
(doi,)
|
|
105
|
+
"SELECT COUNT(*) as count FROM citations WHERE cited_doi = ?", (doi,)
|
|
98
106
|
)
|
|
99
107
|
return row["count"] if row else 0
|
|
100
108
|
|
|
101
109
|
|
|
102
|
-
@
|
|
110
|
+
@_dataclass
|
|
103
111
|
class CitationNode:
|
|
104
112
|
"""A node in the citation network."""
|
|
113
|
+
|
|
105
114
|
doi: str
|
|
106
115
|
title: str = ""
|
|
107
|
-
authors: List[str] =
|
|
116
|
+
authors: List[str] = _field(default_factory=list)
|
|
108
117
|
year: Optional[int] = None
|
|
109
118
|
journal: str = ""
|
|
110
119
|
citation_count: int = 0
|
|
@@ -122,9 +131,10 @@ class CitationNode:
|
|
|
122
131
|
}
|
|
123
132
|
|
|
124
133
|
|
|
125
|
-
@
|
|
134
|
+
@_dataclass
|
|
126
135
|
class CitationEdge:
|
|
127
136
|
"""An edge in the citation network (citing -> cited)."""
|
|
137
|
+
|
|
128
138
|
citing_doi: str
|
|
129
139
|
cited_doi: str
|
|
130
140
|
year: Optional[int] = None
|
|
@@ -272,6 +282,8 @@ class CitationNetwork:
|
|
|
272
282
|
Raises:
|
|
273
283
|
ImportError: If pyvis is not installed
|
|
274
284
|
"""
|
|
285
|
+
import math as _math
|
|
286
|
+
|
|
275
287
|
try:
|
|
276
288
|
from pyvis.network import Network
|
|
277
289
|
except ImportError:
|
|
@@ -284,7 +296,7 @@ class CitationNetwork:
|
|
|
284
296
|
directed=True,
|
|
285
297
|
bgcolor="#ffffff",
|
|
286
298
|
font_color="#333333",
|
|
287
|
-
**kwargs
|
|
299
|
+
**kwargs,
|
|
288
300
|
)
|
|
289
301
|
|
|
290
302
|
# Configure physics
|
|
@@ -298,15 +310,16 @@ class CitationNetwork:
|
|
|
298
310
|
# Add nodes with styling based on depth and citation count
|
|
299
311
|
for doi, node in self.nodes.items():
|
|
300
312
|
# Size based on citation count (log scale)
|
|
301
|
-
|
|
302
|
-
size = 10 + min(30, math.log1p(node.citation_count) * 5)
|
|
313
|
+
size = 10 + min(30, _math.log1p(node.citation_count) * 5)
|
|
303
314
|
|
|
304
315
|
# Color based on depth
|
|
305
316
|
colors = ["#e74c3c", "#3498db", "#2ecc71", "#9b59b6", "#f39c12"]
|
|
306
317
|
color = colors[min(node.depth, len(colors) - 1)]
|
|
307
318
|
|
|
308
319
|
# Label
|
|
309
|
-
title_short = (
|
|
320
|
+
title_short = (
|
|
321
|
+
(node.title[:50] + "...") if len(node.title) > 50 else node.title
|
|
322
|
+
)
|
|
310
323
|
label = f"{title_short}\n({node.year or 'N/A'})"
|
|
311
324
|
|
|
312
325
|
# Tooltip
|
|
@@ -316,7 +329,7 @@ class CitationNetwork:
|
|
|
316
329
|
tooltip = f"""
|
|
317
330
|
<b>{node.title}</b><br>
|
|
318
331
|
{authors_str}<br>
|
|
319
|
-
{node.journal} ({node.year or
|
|
332
|
+
{node.journal} ({node.year or "N/A"})<br>
|
|
320
333
|
Citations: {node.citation_count}<br>
|
|
321
334
|
DOI: {doi}
|
|
322
335
|
"""
|
|
@@ -340,7 +353,9 @@ class CitationNetwork:
|
|
|
340
353
|
net.save_graph(path)
|
|
341
354
|
return path
|
|
342
355
|
|
|
343
|
-
def save_png(
|
|
356
|
+
def save_png(
|
|
357
|
+
self, path: str = "citation_network.png", figsize: Tuple[int, int] = (12, 10)
|
|
358
|
+
):
|
|
344
359
|
"""
|
|
345
360
|
Save static PNG visualization using matplotlib.
|
|
346
361
|
|
|
@@ -351,6 +366,8 @@ class CitationNetwork:
|
|
|
351
366
|
Raises:
|
|
352
367
|
ImportError: If matplotlib is not installed
|
|
353
368
|
"""
|
|
369
|
+
import math as _math
|
|
370
|
+
|
|
354
371
|
try:
|
|
355
372
|
import matplotlib.pyplot as plt
|
|
356
373
|
import networkx as nx
|
|
@@ -365,24 +382,34 @@ class CitationNetwork:
|
|
|
365
382
|
pos = nx.spring_layout(G, k=2, iterations=50)
|
|
366
383
|
|
|
367
384
|
# Node sizes based on citation count
|
|
368
|
-
|
|
369
|
-
|
|
385
|
+
sizes = [
|
|
386
|
+
100 + min(500, _math.log1p(self.nodes[n].citation_count) * 50)
|
|
387
|
+
for n in G.nodes()
|
|
388
|
+
]
|
|
370
389
|
|
|
371
390
|
# Node colors based on depth
|
|
372
391
|
colors = [self.nodes[n].depth for n in G.nodes()]
|
|
373
392
|
|
|
374
393
|
# Draw
|
|
375
|
-
nx.draw_networkx_nodes(
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
394
|
+
nx.draw_networkx_nodes(
|
|
395
|
+
G,
|
|
396
|
+
pos,
|
|
397
|
+
node_size=sizes,
|
|
398
|
+
node_color=colors,
|
|
399
|
+
cmap=plt.cm.RdYlBu_r,
|
|
400
|
+
alpha=0.8,
|
|
401
|
+
ax=ax,
|
|
402
|
+
)
|
|
403
|
+
nx.draw_networkx_edges(G, pos, alpha=0.3, arrows=True, arrowsize=10, ax=ax)
|
|
379
404
|
|
|
380
405
|
# Labels for important nodes (high citation count)
|
|
381
406
|
labels = {}
|
|
382
407
|
for doi in G.nodes():
|
|
383
408
|
node = self.nodes[doi]
|
|
384
409
|
if node.citation_count > 10 or doi == self.center_doi:
|
|
385
|
-
short_title = (
|
|
410
|
+
short_title = (
|
|
411
|
+
(node.title[:30] + "...") if len(node.title) > 30 else node.title
|
|
412
|
+
)
|
|
386
413
|
labels[doi] = f"{short_title}\n({node.year or 'N/A'})"
|
|
387
414
|
|
|
388
415
|
nx.draw_networkx_labels(G, pos, labels, font_size=8, ax=ax)
|
|
@@ -402,11 +429,13 @@ class CitationNetwork:
|
|
|
402
429
|
"center_doi": self.center_doi,
|
|
403
430
|
"depth": self.depth,
|
|
404
431
|
"nodes": [n.to_dict() for n in self.nodes.values()],
|
|
405
|
-
"edges": [
|
|
432
|
+
"edges": [
|
|
433
|
+
{"citing": e.citing_doi, "cited": e.cited_doi} for e in self.edges
|
|
434
|
+
],
|
|
406
435
|
"stats": {
|
|
407
436
|
"total_nodes": len(self.nodes),
|
|
408
437
|
"total_edges": len(self.edges),
|
|
409
|
-
}
|
|
438
|
+
},
|
|
410
439
|
}
|
|
411
440
|
|
|
412
441
|
def __repr__(self):
|
|
@@ -1,29 +1,42 @@
|
|
|
1
1
|
"""Configuration for crossref_local."""
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
3
|
+
import os as _os
|
|
4
|
+
from pathlib import Path as _Path
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Config",
|
|
9
|
+
"get_db_path",
|
|
10
|
+
"DEFAULT_PORT",
|
|
11
|
+
"DEFAULT_API_URL",
|
|
12
|
+
]
|
|
13
|
+
|
|
7
14
|
# Default database locations (checked in order)
|
|
8
15
|
DEFAULT_DB_PATHS = [
|
|
9
|
-
|
|
10
|
-
|
|
16
|
+
_Path.cwd() / "data" / "crossref.db",
|
|
17
|
+
_Path.home() / ".crossref_local" / "crossref.db",
|
|
11
18
|
]
|
|
12
19
|
|
|
13
|
-
# Default
|
|
20
|
+
# Default port: SCITEX convention (3129X scheme)
|
|
21
|
+
# 31290: scitex-cloud, 31291: crossref-local, 31292: openalex-local, 31293: audio relay
|
|
22
|
+
DEFAULT_PORT = 31291
|
|
23
|
+
|
|
24
|
+
# Default remote API URLs (checked in order)
|
|
14
25
|
DEFAULT_API_URLS = [
|
|
15
|
-
"http://localhost:
|
|
26
|
+
f"http://localhost:{DEFAULT_PORT}", # SCITEX default
|
|
27
|
+
"http://localhost:8333", # Legacy port (backwards compatibility)
|
|
16
28
|
]
|
|
17
29
|
DEFAULT_API_URL = DEFAULT_API_URLS[0]
|
|
18
30
|
|
|
19
31
|
|
|
20
|
-
def get_db_path() ->
|
|
32
|
+
def get_db_path() -> _Path:
|
|
21
33
|
"""
|
|
22
34
|
Get database path from environment or auto-detect.
|
|
23
35
|
|
|
24
36
|
Priority:
|
|
25
|
-
1.
|
|
26
|
-
2.
|
|
37
|
+
1. SCITEX_SCHOLAR_CROSSREF_DB environment variable
|
|
38
|
+
2. CROSSREF_LOCAL_DB environment variable
|
|
39
|
+
3. First existing path from DEFAULT_DB_PATHS
|
|
27
40
|
|
|
28
41
|
Returns:
|
|
29
42
|
Path to the database file
|
|
@@ -31,13 +44,15 @@ def get_db_path() -> Path:
|
|
|
31
44
|
Raises:
|
|
32
45
|
FileNotFoundError: If no database found
|
|
33
46
|
"""
|
|
34
|
-
# Check environment variable first
|
|
35
|
-
env_path =
|
|
47
|
+
# Check SCITEX environment variable first (takes priority)
|
|
48
|
+
env_path = _os.environ.get("SCITEX_SCHOLAR_CROSSREF_DB")
|
|
49
|
+
if not env_path:
|
|
50
|
+
env_path = _os.environ.get("CROSSREF_LOCAL_DB")
|
|
36
51
|
if env_path:
|
|
37
|
-
path =
|
|
52
|
+
path = _Path(env_path)
|
|
38
53
|
if path.exists():
|
|
39
54
|
return path
|
|
40
|
-
raise FileNotFoundError(f"
|
|
55
|
+
raise FileNotFoundError(f"Database path not found: {env_path}")
|
|
41
56
|
|
|
42
57
|
# Auto-detect from default locations
|
|
43
58
|
for path in DEFAULT_DB_PATHS:
|
|
@@ -53,7 +68,7 @@ def get_db_path() -> Path:
|
|
|
53
68
|
class Config:
|
|
54
69
|
"""Configuration container."""
|
|
55
70
|
|
|
56
|
-
_db_path: Optional[
|
|
71
|
+
_db_path: Optional[_Path] = None
|
|
57
72
|
_api_url: Optional[str] = None
|
|
58
73
|
_mode: str = "auto" # "auto", "db", or "http"
|
|
59
74
|
|
|
@@ -67,15 +82,18 @@ class Config:
|
|
|
67
82
|
"http" if using HTTP API
|
|
68
83
|
"""
|
|
69
84
|
if cls._mode == "auto":
|
|
70
|
-
# Check environment
|
|
71
|
-
env_mode =
|
|
85
|
+
# Check environment variables (SCITEX takes priority)
|
|
86
|
+
env_mode = _os.environ.get(
|
|
87
|
+
"SCITEX_SCHOLAR_CROSSREF_MODE",
|
|
88
|
+
_os.environ.get("CROSSREF_LOCAL_MODE", ""),
|
|
89
|
+
).lower()
|
|
72
90
|
if env_mode in ("http", "remote", "api"):
|
|
73
91
|
return "http"
|
|
74
92
|
if env_mode in ("db", "local"):
|
|
75
93
|
return "db"
|
|
76
94
|
|
|
77
95
|
# Check if API URL is set
|
|
78
|
-
if cls._api_url or
|
|
96
|
+
if cls._api_url or _os.environ.get("CROSSREF_LOCAL_API_URL"):
|
|
79
97
|
return "http"
|
|
80
98
|
|
|
81
99
|
# Check if local database exists
|
|
@@ -96,16 +114,16 @@ class Config:
|
|
|
96
114
|
cls._mode = mode
|
|
97
115
|
|
|
98
116
|
@classmethod
|
|
99
|
-
def get_db_path(cls) ->
|
|
117
|
+
def get_db_path(cls) -> _Path:
|
|
100
118
|
"""Get or auto-detect database path."""
|
|
101
119
|
if cls._db_path is None:
|
|
102
120
|
cls._db_path = get_db_path()
|
|
103
121
|
return cls._db_path
|
|
104
122
|
|
|
105
123
|
@classmethod
|
|
106
|
-
def set_db_path(cls, path: str |
|
|
124
|
+
def set_db_path(cls, path: str | _Path) -> None:
|
|
107
125
|
"""Set database path explicitly."""
|
|
108
|
-
path =
|
|
126
|
+
path = _Path(path)
|
|
109
127
|
if not path.exists():
|
|
110
128
|
raise FileNotFoundError(f"Database not found: {path}")
|
|
111
129
|
cls._db_path = path
|
|
@@ -125,7 +143,7 @@ class Config:
|
|
|
125
143
|
if cls._api_url:
|
|
126
144
|
return cls._api_url
|
|
127
145
|
|
|
128
|
-
env_url =
|
|
146
|
+
env_url = _os.environ.get("CROSSREF_LOCAL_API_URL")
|
|
129
147
|
if env_url:
|
|
130
148
|
return env_url
|
|
131
149
|
|
|
@@ -140,8 +158,8 @@ class Config:
|
|
|
140
158
|
@classmethod
|
|
141
159
|
def _find_working_api(cls) -> Optional[str]:
|
|
142
160
|
"""Try each default API URL and return first working one."""
|
|
143
|
-
import urllib.request
|
|
144
161
|
import urllib.error
|
|
162
|
+
import urllib.request
|
|
145
163
|
|
|
146
164
|
for url in DEFAULT_API_URLS:
|
|
147
165
|
try:
|
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
"""Database connection handling for crossref_local."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import zlib
|
|
6
|
-
from contextlib import contextmanager
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import
|
|
3
|
+
import json as _json
|
|
4
|
+
import sqlite3 as _sqlite3
|
|
5
|
+
import zlib as _zlib
|
|
6
|
+
from contextlib import contextmanager as _contextmanager
|
|
7
|
+
from pathlib import Path as _Path
|
|
8
|
+
from typing import Generator, Optional
|
|
9
9
|
|
|
10
|
-
from .config import Config
|
|
10
|
+
from .config import Config as _Config
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Database",
|
|
14
|
+
"get_db",
|
|
15
|
+
"close_db",
|
|
16
|
+
"connection",
|
|
17
|
+
]
|
|
11
18
|
|
|
12
19
|
|
|
13
20
|
class Database:
|
|
@@ -17,7 +24,7 @@ class Database:
|
|
|
17
24
|
Supports both direct usage and context manager pattern.
|
|
18
25
|
"""
|
|
19
26
|
|
|
20
|
-
def __init__(self, db_path: Optional[str |
|
|
27
|
+
def __init__(self, db_path: Optional[str | _Path] = None):
|
|
21
28
|
"""
|
|
22
29
|
Initialize database connection.
|
|
23
30
|
|
|
@@ -25,19 +32,19 @@ class Database:
|
|
|
25
32
|
db_path: Path to database. If None, auto-detects.
|
|
26
33
|
"""
|
|
27
34
|
if db_path:
|
|
28
|
-
self.db_path =
|
|
35
|
+
self.db_path = _Path(db_path)
|
|
29
36
|
else:
|
|
30
|
-
self.db_path =
|
|
37
|
+
self.db_path = _Config.get_db_path()
|
|
31
38
|
|
|
32
|
-
self.conn: Optional[
|
|
39
|
+
self.conn: Optional[_sqlite3.Connection] = None
|
|
33
40
|
self._connect()
|
|
34
41
|
|
|
35
42
|
def _connect(self) -> None:
|
|
36
43
|
"""Establish database connection."""
|
|
37
44
|
# check_same_thread=False allows connection to be used across threads
|
|
38
45
|
# Safe for read-only operations (which is our use case)
|
|
39
|
-
self.conn =
|
|
40
|
-
self.conn.row_factory =
|
|
46
|
+
self.conn = _sqlite3.connect(self.db_path, check_same_thread=False)
|
|
47
|
+
self.conn.row_factory = _sqlite3.Row
|
|
41
48
|
|
|
42
49
|
def close(self) -> None:
|
|
43
50
|
"""Close database connection."""
|
|
@@ -51,11 +58,11 @@ class Database:
|
|
|
51
58
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
52
59
|
self.close()
|
|
53
60
|
|
|
54
|
-
def execute(self, query: str, params: tuple = ()) ->
|
|
61
|
+
def execute(self, query: str, params: tuple = ()) -> _sqlite3.Cursor:
|
|
55
62
|
"""Execute SQL query."""
|
|
56
63
|
return self.conn.execute(query, params)
|
|
57
64
|
|
|
58
|
-
def fetchone(self, query: str, params: tuple = ()) -> Optional[
|
|
65
|
+
def fetchone(self, query: str, params: tuple = ()) -> Optional[_sqlite3.Row]:
|
|
59
66
|
"""Execute query and fetch one result."""
|
|
60
67
|
cursor = self.execute(query, params)
|
|
61
68
|
return cursor.fetchone()
|
|
@@ -75,10 +82,7 @@ class Database:
|
|
|
75
82
|
Returns:
|
|
76
83
|
Metadata dictionary or None
|
|
77
84
|
"""
|
|
78
|
-
row = self.fetchone(
|
|
79
|
-
"SELECT metadata FROM works WHERE doi = ?",
|
|
80
|
-
(doi,)
|
|
81
|
-
)
|
|
85
|
+
row = self.fetchone("SELECT metadata FROM works WHERE doi = ?", (doi,))
|
|
82
86
|
if row and row["metadata"]:
|
|
83
87
|
return self._decompress_metadata(row["metadata"])
|
|
84
88
|
return None
|
|
@@ -87,15 +91,15 @@ class Database:
|
|
|
87
91
|
"""Decompress and parse metadata (handles both compressed and plain JSON)."""
|
|
88
92
|
# If it's already a string, parse directly
|
|
89
93
|
if isinstance(data, str):
|
|
90
|
-
return
|
|
94
|
+
return _json.loads(data)
|
|
91
95
|
|
|
92
96
|
# If bytes, try decompression
|
|
93
97
|
if isinstance(data, bytes):
|
|
94
98
|
try:
|
|
95
|
-
decompressed =
|
|
96
|
-
return
|
|
97
|
-
except
|
|
98
|
-
return
|
|
99
|
+
decompressed = _zlib.decompress(data)
|
|
100
|
+
return _json.loads(decompressed)
|
|
101
|
+
except _zlib.error:
|
|
102
|
+
return _json.loads(data.decode("utf-8"))
|
|
99
103
|
|
|
100
104
|
return data
|
|
101
105
|
|
|
@@ -120,8 +124,10 @@ def close_db() -> None:
|
|
|
120
124
|
_db = None
|
|
121
125
|
|
|
122
126
|
|
|
123
|
-
@
|
|
124
|
-
def connection(
|
|
127
|
+
@_contextmanager
|
|
128
|
+
def connection(
|
|
129
|
+
db_path: Optional[str | _Path] = None,
|
|
130
|
+
) -> Generator[Database, None, None]:
|
|
125
131
|
"""
|
|
126
132
|
Context manager for database connection.
|
|
127
133
|
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"""Full-text search using FTS5."""
|
|
2
2
|
|
|
3
|
-
import re
|
|
4
|
-
import time
|
|
3
|
+
import re as _re
|
|
4
|
+
import time as _time
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
7
|
from .db import Database, get_db
|
|
8
|
-
from .models import
|
|
8
|
+
from .models import SearchResult, Work
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"search",
|
|
12
|
+
"count",
|
|
13
|
+
"search_dois",
|
|
14
|
+
]
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
def _sanitize_query(query: str) -> str:
|
|
@@ -24,13 +30,13 @@ def _sanitize_query(query: str) -> str:
|
|
|
24
30
|
|
|
25
31
|
# Check for problematic patterns (hyphenated words, special chars)
|
|
26
32
|
# But allow explicit FTS5 operators: AND, OR, NOT, NEAR
|
|
27
|
-
has_hyphenated_word =
|
|
28
|
-
has_special =
|
|
33
|
+
has_hyphenated_word = _re.search(r"\w+-\w+", query)
|
|
34
|
+
has_special = _re.search(r"[/\\@#$%^&]", query)
|
|
29
35
|
|
|
30
36
|
if has_hyphenated_word or has_special:
|
|
31
37
|
# Quote each word to treat as literal
|
|
32
38
|
words = query.split()
|
|
33
|
-
quoted =
|
|
39
|
+
quoted = " ".join(f'"{w}"' for w in words)
|
|
34
40
|
return quoted
|
|
35
41
|
|
|
36
42
|
return query
|
|
@@ -65,15 +71,14 @@ def search(
|
|
|
65
71
|
if db is None:
|
|
66
72
|
db = get_db()
|
|
67
73
|
|
|
68
|
-
start =
|
|
74
|
+
start = _time.perf_counter()
|
|
69
75
|
|
|
70
76
|
# Sanitize query for FTS5
|
|
71
77
|
safe_query = _sanitize_query(query)
|
|
72
78
|
|
|
73
79
|
# Get total count
|
|
74
80
|
count_row = db.fetchone(
|
|
75
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
76
|
-
(safe_query,)
|
|
81
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
77
82
|
)
|
|
78
83
|
total = count_row["total"] if count_row else 0
|
|
79
84
|
|
|
@@ -86,10 +91,10 @@ def search(
|
|
|
86
91
|
WHERE works_fts MATCH ?
|
|
87
92
|
LIMIT ? OFFSET ?
|
|
88
93
|
""",
|
|
89
|
-
(safe_query, limit, offset)
|
|
94
|
+
(safe_query, limit, offset),
|
|
90
95
|
)
|
|
91
96
|
|
|
92
|
-
elapsed_ms = (
|
|
97
|
+
elapsed_ms = (_time.perf_counter() - start) * 1000
|
|
93
98
|
|
|
94
99
|
# Convert to Work objects
|
|
95
100
|
works = []
|
|
@@ -121,8 +126,7 @@ def count(query: str, db: Optional[Database] = None) -> int:
|
|
|
121
126
|
|
|
122
127
|
safe_query = _sanitize_query(query)
|
|
123
128
|
row = db.fetchone(
|
|
124
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
125
|
-
(safe_query,)
|
|
129
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
126
130
|
)
|
|
127
131
|
return row["total"] if row else 0
|
|
128
132
|
|
|
@@ -155,7 +159,7 @@ def search_dois(
|
|
|
155
159
|
WHERE works_fts MATCH ?
|
|
156
160
|
LIMIT ?
|
|
157
161
|
""",
|
|
158
|
-
(safe_query, limit)
|
|
162
|
+
(safe_query, limit),
|
|
159
163
|
)
|
|
160
164
|
|
|
161
165
|
return [row["doi"] for row in rows]
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
"""Data models for crossref_local."""
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass as _dataclass
|
|
4
|
+
from dataclasses import field as _field
|
|
4
5
|
from typing import List, Optional
|
|
5
|
-
import json
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Work",
|
|
9
|
+
"SearchResult",
|
|
10
|
+
]
|
|
7
11
|
|
|
8
|
-
|
|
12
|
+
|
|
13
|
+
@_dataclass
|
|
9
14
|
class Work:
|
|
10
15
|
"""
|
|
11
16
|
Represents a scholarly work from CrossRef.
|
|
@@ -30,7 +35,7 @@ class Work:
|
|
|
30
35
|
|
|
31
36
|
doi: str
|
|
32
37
|
title: Optional[str] = None
|
|
33
|
-
authors: List[str] =
|
|
38
|
+
authors: List[str] = _field(default_factory=list)
|
|
34
39
|
year: Optional[int] = None
|
|
35
40
|
journal: Optional[str] = None
|
|
36
41
|
issn: Optional[str] = None
|
|
@@ -42,7 +47,7 @@ class Work:
|
|
|
42
47
|
abstract: Optional[str] = None
|
|
43
48
|
url: Optional[str] = None
|
|
44
49
|
citation_count: Optional[int] = None
|
|
45
|
-
references: List[str] =
|
|
50
|
+
references: List[str] = _field(default_factory=list)
|
|
46
51
|
|
|
47
52
|
@classmethod
|
|
48
53
|
def from_metadata(cls, doi: str, metadata: dict) -> "Work":
|
|
@@ -159,7 +164,7 @@ class Work:
|
|
|
159
164
|
return ". ".join(filter(None, parts))
|
|
160
165
|
|
|
161
166
|
|
|
162
|
-
@
|
|
167
|
+
@_dataclass
|
|
163
168
|
class SearchResult:
|
|
164
169
|
"""
|
|
165
170
|
Container for search results with metadata.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Remote API client package with collection support.
|
|
2
|
+
|
|
3
|
+
Provides RemoteClient for connecting to CrossRef Local API server.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
RemoteClient as _BaseClient,
|
|
10
|
+
DEFAULT_API_URL,
|
|
11
|
+
)
|
|
12
|
+
from .collections import CollectionsMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RemoteClient(CollectionsMixin, _BaseClient):
|
|
16
|
+
"""Remote client with collection support.
|
|
17
|
+
|
|
18
|
+
Extends base RemoteClient with collection management methods.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> client = RemoteClient("http://localhost:31291")
|
|
22
|
+
>>> # Create a collection
|
|
23
|
+
>>> client.create_collection("epilepsy", query="epilepsy seizure")
|
|
24
|
+
>>> # Query collection
|
|
25
|
+
>>> papers = client.get_collection("epilepsy", fields=["doi", "title"])
|
|
26
|
+
>>> # Download as file
|
|
27
|
+
>>> client.download_collection("epilepsy", "papers.bib", format="bibtex")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Module-level client singleton
|
|
34
|
+
_client: Optional[RemoteClient] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
|
|
38
|
+
"""Get or create singleton remote client with collection support."""
|
|
39
|
+
global _client
|
|
40
|
+
if _client is None or _client.base_url != base_url:
|
|
41
|
+
_client = RemoteClient(base_url)
|
|
42
|
+
return _client
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def reset_client() -> None:
|
|
46
|
+
"""Reset singleton client."""
|
|
47
|
+
global _client
|
|
48
|
+
_client = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"RemoteClient",
|
|
53
|
+
"DEFAULT_API_URL",
|
|
54
|
+
"get_client",
|
|
55
|
+
"reset_client",
|
|
56
|
+
]
|