crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +38 -16
- crossref_local/__main__.py +0 -0
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/_cache/export.py +100 -0
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cache/viz.py +296 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cache.py +179 -0
- crossref_local/_cli/cli.py +512 -0
- crossref_local/_cli/completion.py +245 -0
- crossref_local/_cli/main.py +20 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +413 -0
- crossref_local/_core/__init__.py +58 -0
- crossref_local/{api.py → _core/api.py} +130 -36
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +57 -42
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/{fts.py → _core/fts.py} +18 -14
- crossref_local/{models.py → _core/models.py} +11 -6
- crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +356 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +129 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +128 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +466 -0
- crossref_local/cli.py +5 -447
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -199
- crossref_local/remote.py +5 -261
- crossref_local/server.py +5 -349
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
- crossref_local-0.5.0.dist-info/RECORD +47 -0
- crossref_local-0.3.1.dist-info/RECORD +0 -20
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
- {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
"""Database connection handling for crossref_local."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import zlib
|
|
6
|
-
from contextlib import contextmanager
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import
|
|
3
|
+
import json as _json
|
|
4
|
+
import sqlite3 as _sqlite3
|
|
5
|
+
import zlib as _zlib
|
|
6
|
+
from contextlib import contextmanager as _contextmanager
|
|
7
|
+
from pathlib import Path as _Path
|
|
8
|
+
from typing import Generator, Optional
|
|
9
9
|
|
|
10
|
-
from .config import Config
|
|
10
|
+
from .config import Config as _Config
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Database",
|
|
14
|
+
"get_db",
|
|
15
|
+
"close_db",
|
|
16
|
+
"connection",
|
|
17
|
+
]
|
|
11
18
|
|
|
12
19
|
|
|
13
20
|
class Database:
|
|
@@ -17,7 +24,7 @@ class Database:
|
|
|
17
24
|
Supports both direct usage and context manager pattern.
|
|
18
25
|
"""
|
|
19
26
|
|
|
20
|
-
def __init__(self, db_path: Optional[str |
|
|
27
|
+
def __init__(self, db_path: Optional[str | _Path] = None):
|
|
21
28
|
"""
|
|
22
29
|
Initialize database connection.
|
|
23
30
|
|
|
@@ -25,19 +32,19 @@ class Database:
|
|
|
25
32
|
db_path: Path to database. If None, auto-detects.
|
|
26
33
|
"""
|
|
27
34
|
if db_path:
|
|
28
|
-
self.db_path =
|
|
35
|
+
self.db_path = _Path(db_path)
|
|
29
36
|
else:
|
|
30
|
-
self.db_path =
|
|
37
|
+
self.db_path = _Config.get_db_path()
|
|
31
38
|
|
|
32
|
-
self.conn: Optional[
|
|
39
|
+
self.conn: Optional[_sqlite3.Connection] = None
|
|
33
40
|
self._connect()
|
|
34
41
|
|
|
35
42
|
def _connect(self) -> None:
|
|
36
43
|
"""Establish database connection."""
|
|
37
44
|
# check_same_thread=False allows connection to be used across threads
|
|
38
45
|
# Safe for read-only operations (which is our use case)
|
|
39
|
-
self.conn =
|
|
40
|
-
self.conn.row_factory =
|
|
46
|
+
self.conn = _sqlite3.connect(self.db_path, check_same_thread=False)
|
|
47
|
+
self.conn.row_factory = _sqlite3.Row
|
|
41
48
|
|
|
42
49
|
def close(self) -> None:
|
|
43
50
|
"""Close database connection."""
|
|
@@ -51,11 +58,11 @@ class Database:
|
|
|
51
58
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
52
59
|
self.close()
|
|
53
60
|
|
|
54
|
-
def execute(self, query: str, params: tuple = ()) ->
|
|
61
|
+
def execute(self, query: str, params: tuple = ()) -> _sqlite3.Cursor:
|
|
55
62
|
"""Execute SQL query."""
|
|
56
63
|
return self.conn.execute(query, params)
|
|
57
64
|
|
|
58
|
-
def fetchone(self, query: str, params: tuple = ()) -> Optional[
|
|
65
|
+
def fetchone(self, query: str, params: tuple = ()) -> Optional[_sqlite3.Row]:
|
|
59
66
|
"""Execute query and fetch one result."""
|
|
60
67
|
cursor = self.execute(query, params)
|
|
61
68
|
return cursor.fetchone()
|
|
@@ -75,10 +82,7 @@ class Database:
|
|
|
75
82
|
Returns:
|
|
76
83
|
Metadata dictionary or None
|
|
77
84
|
"""
|
|
78
|
-
row = self.fetchone(
|
|
79
|
-
"SELECT metadata FROM works WHERE doi = ?",
|
|
80
|
-
(doi,)
|
|
81
|
-
)
|
|
85
|
+
row = self.fetchone("SELECT metadata FROM works WHERE doi = ?", (doi,))
|
|
82
86
|
if row and row["metadata"]:
|
|
83
87
|
return self._decompress_metadata(row["metadata"])
|
|
84
88
|
return None
|
|
@@ -87,15 +91,15 @@ class Database:
|
|
|
87
91
|
"""Decompress and parse metadata (handles both compressed and plain JSON)."""
|
|
88
92
|
# If it's already a string, parse directly
|
|
89
93
|
if isinstance(data, str):
|
|
90
|
-
return
|
|
94
|
+
return _json.loads(data)
|
|
91
95
|
|
|
92
96
|
# If bytes, try decompression
|
|
93
97
|
if isinstance(data, bytes):
|
|
94
98
|
try:
|
|
95
|
-
decompressed =
|
|
96
|
-
return
|
|
97
|
-
except
|
|
98
|
-
return
|
|
99
|
+
decompressed = _zlib.decompress(data)
|
|
100
|
+
return _json.loads(decompressed)
|
|
101
|
+
except _zlib.error:
|
|
102
|
+
return _json.loads(data.decode("utf-8"))
|
|
99
103
|
|
|
100
104
|
return data
|
|
101
105
|
|
|
@@ -120,8 +124,10 @@ def close_db() -> None:
|
|
|
120
124
|
_db = None
|
|
121
125
|
|
|
122
126
|
|
|
123
|
-
@
|
|
124
|
-
def connection(
|
|
127
|
+
@_contextmanager
|
|
128
|
+
def connection(
|
|
129
|
+
db_path: Optional[str | _Path] = None,
|
|
130
|
+
) -> Generator[Database, None, None]:
|
|
125
131
|
"""
|
|
126
132
|
Context manager for database connection.
|
|
127
133
|
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"""Full-text search using FTS5."""
|
|
2
2
|
|
|
3
|
-
import re
|
|
4
|
-
import time
|
|
3
|
+
import re as _re
|
|
4
|
+
import time as _time
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
7
|
from .db import Database, get_db
|
|
8
|
-
from .models import
|
|
8
|
+
from .models import SearchResult, Work
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"search",
|
|
12
|
+
"count",
|
|
13
|
+
"search_dois",
|
|
14
|
+
]
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
def _sanitize_query(query: str) -> str:
|
|
@@ -24,13 +30,13 @@ def _sanitize_query(query: str) -> str:
|
|
|
24
30
|
|
|
25
31
|
# Check for problematic patterns (hyphenated words, special chars)
|
|
26
32
|
# But allow explicit FTS5 operators: AND, OR, NOT, NEAR
|
|
27
|
-
has_hyphenated_word =
|
|
28
|
-
has_special =
|
|
33
|
+
has_hyphenated_word = _re.search(r"\w+-\w+", query)
|
|
34
|
+
has_special = _re.search(r"[/\\@#$%^&]", query)
|
|
29
35
|
|
|
30
36
|
if has_hyphenated_word or has_special:
|
|
31
37
|
# Quote each word to treat as literal
|
|
32
38
|
words = query.split()
|
|
33
|
-
quoted =
|
|
39
|
+
quoted = " ".join(f'"{w}"' for w in words)
|
|
34
40
|
return quoted
|
|
35
41
|
|
|
36
42
|
return query
|
|
@@ -65,15 +71,14 @@ def search(
|
|
|
65
71
|
if db is None:
|
|
66
72
|
db = get_db()
|
|
67
73
|
|
|
68
|
-
start =
|
|
74
|
+
start = _time.perf_counter()
|
|
69
75
|
|
|
70
76
|
# Sanitize query for FTS5
|
|
71
77
|
safe_query = _sanitize_query(query)
|
|
72
78
|
|
|
73
79
|
# Get total count
|
|
74
80
|
count_row = db.fetchone(
|
|
75
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
76
|
-
(safe_query,)
|
|
81
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
77
82
|
)
|
|
78
83
|
total = count_row["total"] if count_row else 0
|
|
79
84
|
|
|
@@ -86,10 +91,10 @@ def search(
|
|
|
86
91
|
WHERE works_fts MATCH ?
|
|
87
92
|
LIMIT ? OFFSET ?
|
|
88
93
|
""",
|
|
89
|
-
(safe_query, limit, offset)
|
|
94
|
+
(safe_query, limit, offset),
|
|
90
95
|
)
|
|
91
96
|
|
|
92
|
-
elapsed_ms = (
|
|
97
|
+
elapsed_ms = (_time.perf_counter() - start) * 1000
|
|
93
98
|
|
|
94
99
|
# Convert to Work objects
|
|
95
100
|
works = []
|
|
@@ -121,8 +126,7 @@ def count(query: str, db: Optional[Database] = None) -> int:
|
|
|
121
126
|
|
|
122
127
|
safe_query = _sanitize_query(query)
|
|
123
128
|
row = db.fetchone(
|
|
124
|
-
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
125
|
-
(safe_query,)
|
|
129
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?", (safe_query,)
|
|
126
130
|
)
|
|
127
131
|
return row["total"] if row else 0
|
|
128
132
|
|
|
@@ -155,7 +159,7 @@ def search_dois(
|
|
|
155
159
|
WHERE works_fts MATCH ?
|
|
156
160
|
LIMIT ?
|
|
157
161
|
""",
|
|
158
|
-
(safe_query, limit)
|
|
162
|
+
(safe_query, limit),
|
|
159
163
|
)
|
|
160
164
|
|
|
161
165
|
return [row["doi"] for row in rows]
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
"""Data models for crossref_local."""
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass as _dataclass
|
|
4
|
+
from dataclasses import field as _field
|
|
4
5
|
from typing import List, Optional
|
|
5
|
-
import json
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Work",
|
|
9
|
+
"SearchResult",
|
|
10
|
+
]
|
|
7
11
|
|
|
8
|
-
|
|
12
|
+
|
|
13
|
+
@_dataclass
|
|
9
14
|
class Work:
|
|
10
15
|
"""
|
|
11
16
|
Represents a scholarly work from CrossRef.
|
|
@@ -30,7 +35,7 @@ class Work:
|
|
|
30
35
|
|
|
31
36
|
doi: str
|
|
32
37
|
title: Optional[str] = None
|
|
33
|
-
authors: List[str] =
|
|
38
|
+
authors: List[str] = _field(default_factory=list)
|
|
34
39
|
year: Optional[int] = None
|
|
35
40
|
journal: Optional[str] = None
|
|
36
41
|
issn: Optional[str] = None
|
|
@@ -42,7 +47,7 @@ class Work:
|
|
|
42
47
|
abstract: Optional[str] = None
|
|
43
48
|
url: Optional[str] = None
|
|
44
49
|
citation_count: Optional[int] = None
|
|
45
|
-
references: List[str] =
|
|
50
|
+
references: List[str] = _field(default_factory=list)
|
|
46
51
|
|
|
47
52
|
@classmethod
|
|
48
53
|
def from_metadata(cls, doi: str, metadata: dict) -> "Work":
|
|
@@ -159,7 +164,7 @@ class Work:
|
|
|
159
164
|
return ". ".join(filter(None, parts))
|
|
160
165
|
|
|
161
166
|
|
|
162
|
-
@
|
|
167
|
+
@_dataclass
|
|
163
168
|
class SearchResult:
|
|
164
169
|
"""
|
|
165
170
|
Container for search results with metadata.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Remote API client package with collection support.
|
|
2
|
+
|
|
3
|
+
Provides RemoteClient for connecting to CrossRef Local API server.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
RemoteClient as _BaseClient,
|
|
10
|
+
DEFAULT_API_URL,
|
|
11
|
+
)
|
|
12
|
+
from .collections import CollectionsMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RemoteClient(CollectionsMixin, _BaseClient):
|
|
16
|
+
"""Remote client with collection support.
|
|
17
|
+
|
|
18
|
+
Extends base RemoteClient with collection management methods.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> client = RemoteClient("http://localhost:31291")
|
|
22
|
+
>>> # Create a collection
|
|
23
|
+
>>> client.create_collection("epilepsy", query="epilepsy seizure")
|
|
24
|
+
>>> # Query collection
|
|
25
|
+
>>> papers = client.get_collection("epilepsy", fields=["doi", "title"])
|
|
26
|
+
>>> # Download as file
|
|
27
|
+
>>> client.download_collection("epilepsy", "papers.bib", format="bibtex")
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Module-level client singleton
|
|
34
|
+
_client: Optional[RemoteClient] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
|
|
38
|
+
"""Get or create singleton remote client with collection support."""
|
|
39
|
+
global _client
|
|
40
|
+
if _client is None or _client.base_url != base_url:
|
|
41
|
+
_client = RemoteClient(base_url)
|
|
42
|
+
return _client
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def reset_client() -> None:
|
|
46
|
+
"""Reset singleton client."""
|
|
47
|
+
global _client
|
|
48
|
+
_client = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"RemoteClient",
|
|
53
|
+
"DEFAULT_API_URL",
|
|
54
|
+
"get_client",
|
|
55
|
+
"reset_client",
|
|
56
|
+
]
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""Remote API client for crossref_local.
|
|
2
|
+
|
|
3
|
+
Connects to a CrossRef Local API server instead of direct database access.
|
|
4
|
+
Use this when the database is on a remote server accessible via HTTP.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import urllib.request
|
|
9
|
+
import urllib.parse
|
|
10
|
+
import urllib.error
|
|
11
|
+
from typing import List, Optional, Dict, Any
|
|
12
|
+
|
|
13
|
+
from .._core.models import Work, SearchResult
|
|
14
|
+
from .._core.config import DEFAULT_PORT
|
|
15
|
+
|
|
16
|
+
# Default URL uses SCITEX port convention
|
|
17
|
+
DEFAULT_API_URL = f"http://localhost:{DEFAULT_PORT}"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RemoteClient:
|
|
21
|
+
"""
|
|
22
|
+
HTTP client for CrossRef Local API server.
|
|
23
|
+
|
|
24
|
+
Provides the same interface as the local API but connects
|
|
25
|
+
to a remote server via HTTP.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
>>> client = RemoteClient("http://localhost:31291")
|
|
29
|
+
>>> results = client.search(title="machine learning", limit=10)
|
|
30
|
+
>>> work = client.get("10.1038/nature12373")
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, base_url: str = DEFAULT_API_URL, timeout: int = 30):
|
|
34
|
+
"""
|
|
35
|
+
Initialize remote client.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
base_url: API server URL (default: http://localhost:3333)
|
|
39
|
+
timeout: Request timeout in seconds
|
|
40
|
+
"""
|
|
41
|
+
self.base_url = base_url.rstrip("/")
|
|
42
|
+
self.timeout = timeout
|
|
43
|
+
|
|
44
|
+
def _request(
|
|
45
|
+
self,
|
|
46
|
+
endpoint: str,
|
|
47
|
+
params: Optional[Dict[str, Any]] = None,
|
|
48
|
+
method: str = "GET",
|
|
49
|
+
data: Optional[Dict[str, Any]] = None,
|
|
50
|
+
) -> Dict:
|
|
51
|
+
"""Make HTTP request to API."""
|
|
52
|
+
url = f"{self.base_url}{endpoint}"
|
|
53
|
+
if params:
|
|
54
|
+
# Filter out None values
|
|
55
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
56
|
+
if params:
|
|
57
|
+
url = f"{url}?{urllib.parse.urlencode(params)}"
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
req_data = None
|
|
61
|
+
if data is not None:
|
|
62
|
+
req_data = json.dumps(data).encode("utf-8")
|
|
63
|
+
|
|
64
|
+
req = urllib.request.Request(url, data=req_data, method=method)
|
|
65
|
+
req.add_header("Accept", "application/json")
|
|
66
|
+
if req_data:
|
|
67
|
+
req.add_header("Content-Type", "application/json")
|
|
68
|
+
|
|
69
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as response:
|
|
70
|
+
return json.loads(response.read().decode("utf-8"))
|
|
71
|
+
except urllib.error.HTTPError as e:
|
|
72
|
+
if e.code == 404:
|
|
73
|
+
return None
|
|
74
|
+
raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
|
|
75
|
+
except urllib.error.URLError as e:
|
|
76
|
+
raise ConnectionError(
|
|
77
|
+
f"Cannot connect to API at {self.base_url}: {e.reason}"
|
|
78
|
+
) from e
|
|
79
|
+
|
|
80
|
+
def health(self) -> Dict:
|
|
81
|
+
"""Check API server health."""
|
|
82
|
+
return self._request("/health")
|
|
83
|
+
|
|
84
|
+
def info(self) -> Dict:
|
|
85
|
+
"""Get database/API information."""
|
|
86
|
+
root = self._request("/")
|
|
87
|
+
info_data = self._request("/info")
|
|
88
|
+
return {
|
|
89
|
+
"api_url": self.base_url,
|
|
90
|
+
"api_version": root.get("version", "unknown"),
|
|
91
|
+
"status": root.get("status", "unknown"),
|
|
92
|
+
"mode": "remote",
|
|
93
|
+
"works": info_data.get("total_papers", 0) if info_data else 0,
|
|
94
|
+
"fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
|
|
95
|
+
"citations": info_data.get("citations", 0) if info_data else 0,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
def search(
|
|
99
|
+
self,
|
|
100
|
+
query: Optional[str] = None,
|
|
101
|
+
doi: Optional[str] = None,
|
|
102
|
+
title: Optional[str] = None,
|
|
103
|
+
authors: Optional[str] = None,
|
|
104
|
+
year: Optional[int] = None,
|
|
105
|
+
limit: int = 10,
|
|
106
|
+
offset: int = 0,
|
|
107
|
+
) -> SearchResult:
|
|
108
|
+
"""
|
|
109
|
+
Search for papers.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
query: Full-text search query (searches title by default)
|
|
113
|
+
doi: Search by DOI
|
|
114
|
+
title: Search by title (explicit)
|
|
115
|
+
authors: Search by author name
|
|
116
|
+
year: Filter by publication year
|
|
117
|
+
limit: Maximum results (default: 10, max: 100)
|
|
118
|
+
offset: Skip first N results for pagination
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
SearchResult with matching works
|
|
122
|
+
"""
|
|
123
|
+
# Use new /works endpoint with FTS5 search
|
|
124
|
+
search_query = query or title
|
|
125
|
+
|
|
126
|
+
params = {
|
|
127
|
+
"q": search_query,
|
|
128
|
+
"limit": min(limit, 100),
|
|
129
|
+
"offset": offset,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
data = self._request("/works", params)
|
|
133
|
+
|
|
134
|
+
if not data:
|
|
135
|
+
return SearchResult(works=[], total=0, query=query or "", elapsed_ms=0.0)
|
|
136
|
+
|
|
137
|
+
works = []
|
|
138
|
+
for item in data.get("results", []):
|
|
139
|
+
work = Work(
|
|
140
|
+
doi=item.get("doi", ""),
|
|
141
|
+
title=item.get("title", ""),
|
|
142
|
+
authors=item.get("authors", []),
|
|
143
|
+
year=item.get("year"),
|
|
144
|
+
journal=item.get("journal"),
|
|
145
|
+
volume=item.get("volume"),
|
|
146
|
+
issue=item.get("issue"),
|
|
147
|
+
page=item.get("page") or item.get("pages"),
|
|
148
|
+
abstract=item.get("abstract"),
|
|
149
|
+
citation_count=item.get("citation_count"),
|
|
150
|
+
)
|
|
151
|
+
works.append(work)
|
|
152
|
+
|
|
153
|
+
return SearchResult(
|
|
154
|
+
works=works,
|
|
155
|
+
total=data.get("total", len(works)),
|
|
156
|
+
query=query or title or doi or "",
|
|
157
|
+
elapsed_ms=data.get("elapsed_ms", 0.0),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def get(self, doi: str) -> Optional[Work]:
|
|
161
|
+
"""
|
|
162
|
+
Get a work by DOI.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
doi: Digital Object Identifier
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Work object or None if not found
|
|
169
|
+
"""
|
|
170
|
+
# Use /works/{doi} endpoint directly
|
|
171
|
+
data = self._request(f"/works/{doi}")
|
|
172
|
+
if not data or "error" in data:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
return Work(
|
|
176
|
+
doi=data.get("doi", doi),
|
|
177
|
+
title=data.get("title", ""),
|
|
178
|
+
authors=data.get("authors", []),
|
|
179
|
+
year=data.get("year"),
|
|
180
|
+
journal=data.get("journal"),
|
|
181
|
+
volume=data.get("volume"),
|
|
182
|
+
issue=data.get("issue"),
|
|
183
|
+
page=data.get("page"),
|
|
184
|
+
abstract=data.get("abstract"),
|
|
185
|
+
citation_count=data.get("citation_count"),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def get_many(self, dois: List[str]) -> List[Work]:
|
|
189
|
+
"""
|
|
190
|
+
Get multiple works by DOI using batch endpoint.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
dois: List of DOIs
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
List of Work objects
|
|
197
|
+
"""
|
|
198
|
+
# Use batch endpoint if available
|
|
199
|
+
try:
|
|
200
|
+
data = {"dois": dois}
|
|
201
|
+
req_data = json.dumps(data).encode("utf-8")
|
|
202
|
+
req = urllib.request.Request(
|
|
203
|
+
f"{self.base_url}/works/batch", data=req_data, method="POST"
|
|
204
|
+
)
|
|
205
|
+
req.add_header("Content-Type", "application/json")
|
|
206
|
+
req.add_header("Accept", "application/json")
|
|
207
|
+
|
|
208
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as response:
|
|
209
|
+
result = json.loads(response.read().decode("utf-8"))
|
|
210
|
+
|
|
211
|
+
works = []
|
|
212
|
+
for item in result.get("results", []):
|
|
213
|
+
work = Work(
|
|
214
|
+
doi=item.get("doi", ""),
|
|
215
|
+
title=item.get("title", ""),
|
|
216
|
+
authors=item.get("authors", []),
|
|
217
|
+
year=item.get("year"),
|
|
218
|
+
journal=item.get("journal"),
|
|
219
|
+
volume=item.get("volume"),
|
|
220
|
+
issue=item.get("issue"),
|
|
221
|
+
page=item.get("page"),
|
|
222
|
+
abstract=item.get("abstract"),
|
|
223
|
+
citation_count=item.get("citation_count"),
|
|
224
|
+
)
|
|
225
|
+
works.append(work)
|
|
226
|
+
return works
|
|
227
|
+
except Exception:
|
|
228
|
+
# Fallback to individual lookups
|
|
229
|
+
works = []
|
|
230
|
+
for doi in dois:
|
|
231
|
+
work = self.get(doi)
|
|
232
|
+
if work:
|
|
233
|
+
works.append(work)
|
|
234
|
+
return works
|
|
235
|
+
|
|
236
|
+
def exists(self, doi: str) -> bool:
|
|
237
|
+
"""Check if a DOI exists."""
|
|
238
|
+
return self.get(doi) is not None
|
|
239
|
+
|
|
240
|
+
def get_citations(self, doi: str, direction: str = "both") -> Dict:
|
|
241
|
+
"""
|
|
242
|
+
Get citations for a paper (legacy endpoint).
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
doi: Paper DOI
|
|
246
|
+
direction: 'citing', 'cited_by', or 'both'
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Dict with citation information
|
|
250
|
+
"""
|
|
251
|
+
params = {"doi": doi, "direction": direction}
|
|
252
|
+
return self._request("/api/citations/", params) or {}
|
|
253
|
+
|
|
254
|
+
def get_citing(self, doi: str, limit: int = 100) -> List[str]:
|
|
255
|
+
"""
|
|
256
|
+
Get DOIs of papers that cite the given DOI.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
doi: The DOI to find citations for
|
|
260
|
+
limit: Maximum number of citing papers to return
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
List of DOIs that cite this paper
|
|
264
|
+
"""
|
|
265
|
+
data = self._request(f"/citations/{doi}/citing", {"limit": limit})
|
|
266
|
+
if not data:
|
|
267
|
+
return []
|
|
268
|
+
return data.get("papers", [])
|
|
269
|
+
|
|
270
|
+
def get_cited(self, doi: str, limit: int = 100) -> List[str]:
|
|
271
|
+
"""
|
|
272
|
+
Get DOIs of papers that the given DOI cites (references).
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
doi: The DOI to find references for
|
|
276
|
+
limit: Maximum number of referenced papers to return
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of DOIs that this paper cites
|
|
280
|
+
"""
|
|
281
|
+
data = self._request(f"/citations/{doi}/cited", {"limit": limit})
|
|
282
|
+
if not data:
|
|
283
|
+
return []
|
|
284
|
+
return data.get("papers", [])
|
|
285
|
+
|
|
286
|
+
def get_citation_count(self, doi: str) -> int:
|
|
287
|
+
"""
|
|
288
|
+
Get the number of citations for a DOI.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
doi: The DOI to count citations for
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Number of papers citing this DOI
|
|
295
|
+
"""
|
|
296
|
+
data = self._request(f"/citations/{doi}/count")
|
|
297
|
+
if not data:
|
|
298
|
+
return 0
|
|
299
|
+
return data.get("citation_count", 0)
|
|
300
|
+
|
|
301
|
+
def get_citation_network(
|
|
302
|
+
self, doi: str, depth: int = 1, max_citing: int = 25, max_cited: int = 25
|
|
303
|
+
) -> Dict:
|
|
304
|
+
"""
|
|
305
|
+
Get citation network graph for a DOI.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
doi: The DOI to build the network around
|
|
309
|
+
depth: How many levels of citations to include (1-3)
|
|
310
|
+
max_citing: Max papers citing each node to include
|
|
311
|
+
max_cited: Max papers each node cites to include
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Dict with nodes, edges, and stats
|
|
315
|
+
"""
|
|
316
|
+
params = {
|
|
317
|
+
"depth": depth,
|
|
318
|
+
"max_citing": max_citing,
|
|
319
|
+
"max_cited": max_cited,
|
|
320
|
+
}
|
|
321
|
+
data = self._request(f"/citations/{doi}/network", params)
|
|
322
|
+
return data or {}
|
|
323
|
+
|
|
324
|
+
def get_journal(
|
|
325
|
+
self, issn: Optional[str] = None, name: Optional[str] = None
|
|
326
|
+
) -> Dict:
|
|
327
|
+
"""
|
|
328
|
+
Get journal information.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
issn: Journal ISSN
|
|
332
|
+
name: Journal name
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
Dict with journal information
|
|
336
|
+
"""
|
|
337
|
+
params = {"issn": issn, "name": name}
|
|
338
|
+
return self._request("/api/journal/", params) or {}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# Module-level client for convenience
|
|
342
|
+
_client: Optional[RemoteClient] = None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
|
|
346
|
+
"""Get or create singleton remote client."""
|
|
347
|
+
global _client
|
|
348
|
+
if _client is None or _client.base_url != base_url:
|
|
349
|
+
_client = RemoteClient(base_url)
|
|
350
|
+
return _client
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def reset_client() -> None:
|
|
354
|
+
"""Reset singleton client."""
|
|
355
|
+
global _client
|
|
356
|
+
_client = None
|