openalex-local 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local/__init__.py +54 -3
- openalex_local/__main__.py +6 -0
- openalex_local/_cache/__init__.py +45 -0
- openalex_local/_cache/core.py +298 -0
- openalex_local/_cache/export.py +100 -0
- openalex_local/_cache/models.py +17 -0
- openalex_local/_cache/utils.py +85 -0
- openalex_local/_cli/__init__.py +9 -0
- openalex_local/_cli/cli.py +409 -0
- openalex_local/_cli/cli_cache.py +220 -0
- openalex_local/_cli/mcp.py +210 -0
- openalex_local/_cli/mcp_server.py +235 -0
- openalex_local/_core/__init__.py +42 -0
- openalex_local/_core/api.py +376 -0
- openalex_local/_core/config.py +120 -0
- openalex_local/_core/db.py +214 -0
- openalex_local/_core/export.py +252 -0
- openalex_local/_core/fts.py +165 -0
- openalex_local/_core/models.py +432 -0
- openalex_local/_remote/__init__.py +34 -0
- openalex_local/_remote/base.py +256 -0
- openalex_local/_server/__init__.py +117 -0
- openalex_local/_server/routes.py +175 -0
- openalex_local/aio.py +259 -0
- openalex_local/cache.py +31 -0
- openalex_local/cli.py +8 -0
- openalex_local/jobs.py +169 -0
- openalex_local/remote.py +8 -0
- openalex_local/server.py +8 -0
- openalex_local-0.3.1.dist-info/METADATA +288 -0
- openalex_local-0.3.1.dist-info/RECORD +34 -0
- {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +1 -1
- openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
- openalex_local/config.py +0 -73
- openalex_local/models.py +0 -187
- openalex_local-0.1.0.dist-info/METADATA +0 -152
- openalex_local-0.1.0.dist-info/RECORD +0 -8
- openalex_local-0.1.0.dist-info/entry_points.txt +0 -2
- {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""Main API for openalex_local.
|
|
2
|
+
|
|
3
|
+
Supports two modes:
|
|
4
|
+
- db: Direct database access (requires database file)
|
|
5
|
+
- http: HTTP API access (requires API server)
|
|
6
|
+
|
|
7
|
+
Mode is auto-detected or can be set explicitly via:
|
|
8
|
+
- OPENALEX_LOCAL_MODE environment variable ("db" or "http")
|
|
9
|
+
- OPENALEX_LOCAL_API_URL environment variable (API URL)
|
|
10
|
+
- configure() or configure_http() functions
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import List, Optional
|
|
14
|
+
|
|
15
|
+
from . import fts
|
|
16
|
+
from .config import Config
|
|
17
|
+
from .db import close_db, get_db
|
|
18
|
+
from .models import SearchResult, Work
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Core functions
|
|
22
|
+
"search",
|
|
23
|
+
"count",
|
|
24
|
+
"get",
|
|
25
|
+
"get_many",
|
|
26
|
+
"exists",
|
|
27
|
+
"info",
|
|
28
|
+
# Enrich functions
|
|
29
|
+
"enrich",
|
|
30
|
+
"enrich_ids",
|
|
31
|
+
# Configuration
|
|
32
|
+
"configure",
|
|
33
|
+
"get_mode",
|
|
34
|
+
# Models (public)
|
|
35
|
+
"Work",
|
|
36
|
+
"SearchResult",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _get_http_client():
|
|
41
|
+
"""Get HTTP client (lazy import to avoid circular dependency)."""
|
|
42
|
+
from .._remote import RemoteClient
|
|
43
|
+
|
|
44
|
+
return RemoteClient(Config.get_api_url())
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def search(
|
|
48
|
+
query: str,
|
|
49
|
+
limit: int = 20,
|
|
50
|
+
offset: int = 0,
|
|
51
|
+
) -> SearchResult:
|
|
52
|
+
"""
|
|
53
|
+
Full-text search across works.
|
|
54
|
+
|
|
55
|
+
Uses FTS5 index for fast searching across titles and abstracts.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query: Search query (supports FTS5 syntax)
|
|
59
|
+
limit: Maximum results to return
|
|
60
|
+
offset: Skip first N results (for pagination)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
SearchResult with matching works
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
>>> from openalex_local import search
|
|
67
|
+
>>> results = search("machine learning")
|
|
68
|
+
>>> print(f"Found {results.total} matches")
|
|
69
|
+
"""
|
|
70
|
+
if Config.get_mode() == "http":
|
|
71
|
+
client = _get_http_client()
|
|
72
|
+
return client.search(query=query, limit=limit, offset=offset)
|
|
73
|
+
return fts.search(query, limit, offset)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def count(query: str) -> int:
|
|
77
|
+
"""
|
|
78
|
+
Count matching works without fetching results.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
query: FTS5 search query
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Number of matching works
|
|
85
|
+
"""
|
|
86
|
+
if Config.get_mode() == "http":
|
|
87
|
+
client = _get_http_client()
|
|
88
|
+
result = client.search(query=query, limit=1)
|
|
89
|
+
return result.total
|
|
90
|
+
return fts.count(query)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get(id_or_doi: str) -> Optional[Work]:
|
|
94
|
+
"""
|
|
95
|
+
Get a work by OpenAlex ID or DOI.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
id_or_doi: OpenAlex ID (e.g., W2741809807) or DOI
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Work object or None if not found
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> from openalex_local import get
|
|
105
|
+
>>> work = get("W2741809807")
|
|
106
|
+
>>> work = get("10.1038/nature12373")
|
|
107
|
+
>>> print(work.title)
|
|
108
|
+
"""
|
|
109
|
+
if Config.get_mode() == "http":
|
|
110
|
+
client = _get_http_client()
|
|
111
|
+
return client.get(id_or_doi)
|
|
112
|
+
|
|
113
|
+
db = get_db()
|
|
114
|
+
|
|
115
|
+
# Try as OpenAlex ID first
|
|
116
|
+
if id_or_doi.startswith("W") or id_or_doi.startswith("w"):
|
|
117
|
+
data = db.get_work(id_or_doi.upper())
|
|
118
|
+
if data:
|
|
119
|
+
return Work.from_db_row(data)
|
|
120
|
+
|
|
121
|
+
# Try as DOI
|
|
122
|
+
data = db.get_work_by_doi(id_or_doi)
|
|
123
|
+
if data:
|
|
124
|
+
return Work.from_db_row(data)
|
|
125
|
+
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_many(ids: List[str]) -> List[Work]:
|
|
130
|
+
"""
|
|
131
|
+
Get multiple works by OpenAlex ID or DOI.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
ids: List of OpenAlex IDs or DOIs
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of Work objects (missing IDs are skipped)
|
|
138
|
+
"""
|
|
139
|
+
if Config.get_mode() == "http":
|
|
140
|
+
client = _get_http_client()
|
|
141
|
+
return client.get_many(ids)
|
|
142
|
+
|
|
143
|
+
works = []
|
|
144
|
+
for id_or_doi in ids:
|
|
145
|
+
work = get(id_or_doi)
|
|
146
|
+
if work:
|
|
147
|
+
works.append(work)
|
|
148
|
+
return works
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def exists(id_or_doi: str) -> bool:
|
|
152
|
+
"""
|
|
153
|
+
Check if a work exists in the database.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
id_or_doi: OpenAlex ID or DOI
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
True if work exists
|
|
160
|
+
"""
|
|
161
|
+
if Config.get_mode() == "http":
|
|
162
|
+
client = _get_http_client()
|
|
163
|
+
return client.exists(id_or_doi)
|
|
164
|
+
|
|
165
|
+
db = get_db()
|
|
166
|
+
|
|
167
|
+
# Try as OpenAlex ID first
|
|
168
|
+
if id_or_doi.startswith("W") or id_or_doi.startswith("w"):
|
|
169
|
+
row = db.fetchone(
|
|
170
|
+
"SELECT 1 FROM works WHERE openalex_id = ?", (id_or_doi.upper(),)
|
|
171
|
+
)
|
|
172
|
+
if row:
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
# Try as DOI
|
|
176
|
+
row = db.fetchone("SELECT 1 FROM works WHERE doi = ?", (id_or_doi,))
|
|
177
|
+
return row is not None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def configure(db_path: str) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Configure for local database access.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
db_path: Path to OpenAlex SQLite database
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> from openalex_local import configure
|
|
189
|
+
>>> configure("/path/to/openalex.db")
|
|
190
|
+
"""
|
|
191
|
+
Config.set_db_path(db_path)
|
|
192
|
+
close_db()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def configure_http(api_url: str = "http://localhost:31292") -> None:
|
|
196
|
+
"""
|
|
197
|
+
Configure for HTTP API access.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
api_url: URL of OpenAlex Local API server
|
|
201
|
+
|
|
202
|
+
Example:
|
|
203
|
+
>>> from openalex_local import configure_http
|
|
204
|
+
>>> configure_http("http://localhost:31292")
|
|
205
|
+
"""
|
|
206
|
+
Config.set_api_url(api_url)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_mode() -> str:
|
|
210
|
+
"""
|
|
211
|
+
Get current mode.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
"db" or "http"
|
|
215
|
+
"""
|
|
216
|
+
return Config.get_mode()
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def info() -> dict:
|
|
220
|
+
"""
|
|
221
|
+
Get database/API information.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Dictionary with database stats and mode info
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
FileNotFoundError: If no database configured and HTTP mode unavailable
|
|
228
|
+
"""
|
|
229
|
+
mode = Config.get_mode()
|
|
230
|
+
|
|
231
|
+
if mode == "http":
|
|
232
|
+
client = _get_http_client()
|
|
233
|
+
http_info = client.info()
|
|
234
|
+
return {"mode": "http", "status": "ok", **http_info}
|
|
235
|
+
|
|
236
|
+
# DB mode - will raise FileNotFoundError if no database
|
|
237
|
+
db = get_db()
|
|
238
|
+
|
|
239
|
+
# Get work count from metadata (fast) or fallback to MAX(rowid) approximation
|
|
240
|
+
work_count = 0
|
|
241
|
+
try:
|
|
242
|
+
row = db.fetchone("SELECT value FROM _metadata WHERE key = 'total_works'")
|
|
243
|
+
if row:
|
|
244
|
+
work_count = int(row["value"])
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
if work_count == 0:
|
|
249
|
+
# Fallback: use MAX(rowid) as approximation (much faster than COUNT(*))
|
|
250
|
+
try:
|
|
251
|
+
row = db.fetchone("SELECT MAX(rowid) as count FROM works")
|
|
252
|
+
work_count = row["count"] if row else 0
|
|
253
|
+
except Exception:
|
|
254
|
+
work_count = 0
|
|
255
|
+
|
|
256
|
+
# Get FTS count from metadata (fast) or fallback
|
|
257
|
+
fts_count = 0
|
|
258
|
+
try:
|
|
259
|
+
row = db.fetchone("SELECT value FROM _metadata WHERE key = 'fts_total_indexed'")
|
|
260
|
+
if row:
|
|
261
|
+
fts_count = int(row["value"])
|
|
262
|
+
except Exception:
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
if fts_count == 0:
|
|
266
|
+
try:
|
|
267
|
+
row = db.fetchone("SELECT MAX(rowid) as count FROM works_fts")
|
|
268
|
+
fts_count = row["count"] if row else 0
|
|
269
|
+
except Exception:
|
|
270
|
+
fts_count = 0
|
|
271
|
+
|
|
272
|
+
# Check for sources table
|
|
273
|
+
sources_count = 0
|
|
274
|
+
has_sources = False
|
|
275
|
+
try:
|
|
276
|
+
if db.has_sources_table():
|
|
277
|
+
has_sources = True
|
|
278
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM sources")
|
|
279
|
+
sources_count = row["count"] if row else 0
|
|
280
|
+
except Exception:
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
return {
|
|
284
|
+
"status": "ok",
|
|
285
|
+
"mode": "db",
|
|
286
|
+
"db_path": str(Config.get_db_path()),
|
|
287
|
+
"work_count": work_count,
|
|
288
|
+
"fts_indexed": fts_count,
|
|
289
|
+
"has_sources": has_sources,
|
|
290
|
+
"sources_count": sources_count,
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def enrich(
|
|
295
|
+
results: SearchResult,
|
|
296
|
+
include_abstract: bool = True,
|
|
297
|
+
include_concepts: bool = True,
|
|
298
|
+
) -> SearchResult:
|
|
299
|
+
"""
|
|
300
|
+
Enrich search results with full metadata.
|
|
301
|
+
|
|
302
|
+
This function re-fetches works from the database to ensure all fields
|
|
303
|
+
are populated, including abstract and concepts which may be truncated
|
|
304
|
+
in search results.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
results: SearchResult from a search query
|
|
308
|
+
include_abstract: Include full abstract text (default True)
|
|
309
|
+
include_concepts: Include concept/topic data (default True)
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
SearchResult with enriched Work objects
|
|
313
|
+
|
|
314
|
+
Example:
|
|
315
|
+
>>> results = search("machine learning", limit=10)
|
|
316
|
+
>>> enriched = enrich(results)
|
|
317
|
+
>>> for work in enriched:
|
|
318
|
+
... print(work.abstract) # Full abstract available
|
|
319
|
+
"""
|
|
320
|
+
if not results.works:
|
|
321
|
+
return results
|
|
322
|
+
|
|
323
|
+
# Get full work data for each work
|
|
324
|
+
ids = [w.openalex_id for w in results.works]
|
|
325
|
+
enriched_works = get_many(ids)
|
|
326
|
+
|
|
327
|
+
# If concepts/abstract not wanted, clear them
|
|
328
|
+
if not include_abstract:
|
|
329
|
+
for work in enriched_works:
|
|
330
|
+
work.abstract = None
|
|
331
|
+
if not include_concepts:
|
|
332
|
+
for work in enriched_works:
|
|
333
|
+
work.concepts = []
|
|
334
|
+
work.topics = []
|
|
335
|
+
|
|
336
|
+
return SearchResult(
|
|
337
|
+
works=enriched_works,
|
|
338
|
+
total=results.total,
|
|
339
|
+
query=results.query,
|
|
340
|
+
elapsed_ms=results.elapsed_ms,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def enrich_ids(
|
|
345
|
+
ids: List[str],
|
|
346
|
+
include_abstract: bool = True,
|
|
347
|
+
include_concepts: bool = True,
|
|
348
|
+
) -> List[Work]:
|
|
349
|
+
"""
|
|
350
|
+
Enrich a list of OpenAlex IDs or DOIs with full metadata.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
ids: List of OpenAlex IDs (e.g., W2741809807) or DOIs
|
|
354
|
+
include_abstract: Include full abstract text (default True)
|
|
355
|
+
include_concepts: Include concept/topic data (default True)
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
List of Work objects with full metadata
|
|
359
|
+
|
|
360
|
+
Example:
|
|
361
|
+
>>> ids = ["W2741809807", "10.1038/nature12373"]
|
|
362
|
+
>>> works = enrich_ids(ids)
|
|
363
|
+
>>> for work in works:
|
|
364
|
+
... print(f"{work.title}: {work.cited_by_count} citations")
|
|
365
|
+
"""
|
|
366
|
+
works = get_many(ids)
|
|
367
|
+
|
|
368
|
+
if not include_abstract:
|
|
369
|
+
for work in works:
|
|
370
|
+
work.abstract = None
|
|
371
|
+
if not include_concepts:
|
|
372
|
+
for work in works:
|
|
373
|
+
work.concepts = []
|
|
374
|
+
work.topics = []
|
|
375
|
+
|
|
376
|
+
return works
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-29
|
|
3
|
+
"""Configuration for openalex_local."""
|
|
4
|
+
|
|
5
|
+
import os as _os
|
|
6
|
+
from pathlib import Path as _Path
|
|
7
|
+
from typing import Optional as _Optional
|
|
8
|
+
|
|
9
|
+
# Default database locations (checked in order)
|
|
10
|
+
DEFAULT_DB_PATHS = [
|
|
11
|
+
_Path("/home/ywatanabe/proj/openalex-local/data/openalex.db"),
|
|
12
|
+
_Path("/home/ywatanabe/proj/openalex_local/data/openalex.db"),
|
|
13
|
+
_Path("/mnt/nas_ug/openalex_local/data/openalex.db"),
|
|
14
|
+
_Path.home() / ".openalex_local" / "openalex.db",
|
|
15
|
+
_Path.cwd() / "data" / "openalex.db",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_db_path() -> _Path:
|
|
20
|
+
"""Get database path from environment or auto-detect."""
|
|
21
|
+
env_path = _os.environ.get("OPENALEX_LOCAL_DB")
|
|
22
|
+
if env_path:
|
|
23
|
+
path = _Path(env_path)
|
|
24
|
+
if path.exists():
|
|
25
|
+
return path
|
|
26
|
+
raise FileNotFoundError(f"OPENALEX_LOCAL_DB path not found: {env_path}")
|
|
27
|
+
|
|
28
|
+
for path in DEFAULT_DB_PATHS:
|
|
29
|
+
if path.exists():
|
|
30
|
+
return path
|
|
31
|
+
|
|
32
|
+
raise FileNotFoundError(
|
|
33
|
+
"OpenAlex database not found. Set OPENALEX_LOCAL_DB environment variable."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
DEFAULT_PORT = 31292
|
|
38
|
+
DEFAULT_HOST = "0.0.0.0"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Config:
|
|
42
|
+
"""Configuration container."""
|
|
43
|
+
|
|
44
|
+
_db_path: _Optional[_Path] = None
|
|
45
|
+
_api_url: _Optional[str] = None
|
|
46
|
+
_mode: str = "auto" # "auto", "db", or "http"
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def get_db_path(cls) -> _Path:
|
|
50
|
+
if cls._db_path is None:
|
|
51
|
+
cls._db_path = get_db_path()
|
|
52
|
+
return cls._db_path
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def set_db_path(cls, path: str) -> None:
|
|
56
|
+
p = _Path(path)
|
|
57
|
+
if not p.exists():
|
|
58
|
+
raise FileNotFoundError(f"Database not found: {path}")
|
|
59
|
+
cls._db_path = p
|
|
60
|
+
cls._mode = "db"
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_api_url(cls) -> str:
|
|
64
|
+
if cls._api_url:
|
|
65
|
+
return cls._api_url
|
|
66
|
+
return _os.environ.get(
|
|
67
|
+
"OPENALEX_LOCAL_API_URL", f"http://localhost:{DEFAULT_PORT}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def set_api_url(cls, url: str) -> None:
|
|
72
|
+
cls._api_url = url.rstrip("/")
|
|
73
|
+
cls._mode = "http"
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def set_mode(cls, mode: str) -> None:
|
|
77
|
+
"""Set mode explicitly: 'db', 'http', or 'auto'."""
|
|
78
|
+
if mode not in ("auto", "db", "http"):
|
|
79
|
+
raise ValueError(f"Invalid mode: {mode}. Use 'auto', 'db', or 'http'")
|
|
80
|
+
cls._mode = mode
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def get_mode(cls) -> str:
|
|
84
|
+
"""
|
|
85
|
+
Get current mode.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
"db" if using direct database access
|
|
89
|
+
"http" if using HTTP API
|
|
90
|
+
"""
|
|
91
|
+
if cls._mode == "auto":
|
|
92
|
+
# Check environment variable for explicit mode
|
|
93
|
+
env_mode = _os.environ.get("OPENALEX_LOCAL_MODE", "").lower()
|
|
94
|
+
if env_mode in ("http", "remote", "api"):
|
|
95
|
+
return "http"
|
|
96
|
+
if env_mode in ("db", "local"):
|
|
97
|
+
return "db"
|
|
98
|
+
|
|
99
|
+
# Check if API URL is set explicitly
|
|
100
|
+
if cls._api_url or _os.environ.get("OPENALEX_LOCAL_API_URL"):
|
|
101
|
+
return "http"
|
|
102
|
+
|
|
103
|
+
# Check if local database exists
|
|
104
|
+
try:
|
|
105
|
+
get_db_path()
|
|
106
|
+
return "db"
|
|
107
|
+
except FileNotFoundError:
|
|
108
|
+
# No local DB, try http
|
|
109
|
+
return "http"
|
|
110
|
+
|
|
111
|
+
return cls._mode
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def reset(cls) -> None:
|
|
115
|
+
cls._db_path = None
|
|
116
|
+
cls._api_url = None
|
|
117
|
+
cls._mode = "auto"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# EOF
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Database connection handling for openalex_local."""
|
|
2
|
+
|
|
3
|
+
import json as _json
|
|
4
|
+
import sqlite3 as _sqlite3
|
|
5
|
+
from contextlib import contextmanager as _contextmanager
|
|
6
|
+
from pathlib import Path as _Path
|
|
7
|
+
from typing import Any, Dict, Generator, List, Optional
|
|
8
|
+
|
|
9
|
+
from .config import Config as _Config
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Database",
|
|
13
|
+
"get_db",
|
|
14
|
+
"close_db",
|
|
15
|
+
"connection",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Database:
|
|
20
|
+
"""
|
|
21
|
+
Database connection manager.
|
|
22
|
+
|
|
23
|
+
Supports both direct usage and context manager pattern.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db_path: Optional[str | _Path] = None):
|
|
27
|
+
"""
|
|
28
|
+
Initialize database connection.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
db_path: Path to database. If None, auto-detects.
|
|
32
|
+
"""
|
|
33
|
+
if db_path:
|
|
34
|
+
self.db_path = _Path(db_path)
|
|
35
|
+
else:
|
|
36
|
+
self.db_path = _Config.get_db_path()
|
|
37
|
+
|
|
38
|
+
self.conn: Optional[_sqlite3.Connection] = None
|
|
39
|
+
self._connect()
|
|
40
|
+
|
|
41
|
+
def _connect(self) -> None:
|
|
42
|
+
"""Establish database connection."""
|
|
43
|
+
self.conn = _sqlite3.connect(self.db_path, check_same_thread=False)
|
|
44
|
+
self.conn.row_factory = _sqlite3.Row
|
|
45
|
+
|
|
46
|
+
def close(self) -> None:
|
|
47
|
+
"""Close database connection."""
|
|
48
|
+
if self.conn:
|
|
49
|
+
self.conn.close()
|
|
50
|
+
self.conn = None
|
|
51
|
+
|
|
52
|
+
def __enter__(self) -> "Database":
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
56
|
+
self.close()
|
|
57
|
+
|
|
58
|
+
def execute(self, query: str, params: tuple = ()) -> _sqlite3.Cursor:
|
|
59
|
+
"""Execute SQL query."""
|
|
60
|
+
return self.conn.execute(query, params)
|
|
61
|
+
|
|
62
|
+
def fetchone(self, query: str, params: tuple = ()) -> Optional[_sqlite3.Row]:
|
|
63
|
+
"""Execute query and fetch one result."""
|
|
64
|
+
cursor = self.execute(query, params)
|
|
65
|
+
return cursor.fetchone()
|
|
66
|
+
|
|
67
|
+
def fetchall(self, query: str, params: tuple = ()) -> List[_sqlite3.Row]:
|
|
68
|
+
"""Execute query and fetch all results."""
|
|
69
|
+
cursor = self.execute(query, params)
|
|
70
|
+
return cursor.fetchall()
|
|
71
|
+
|
|
72
|
+
def get_work(self, openalex_id: str) -> Optional[Dict[str, Any]]:
|
|
73
|
+
"""
|
|
74
|
+
Get work data by OpenAlex ID.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
openalex_id: OpenAlex ID (e.g., W2741809807)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Work data dictionary or None
|
|
81
|
+
"""
|
|
82
|
+
row = self.fetchone("SELECT * FROM works WHERE openalex_id = ?", (openalex_id,))
|
|
83
|
+
if row:
|
|
84
|
+
return self._row_to_dict(row)
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def get_work_by_doi(self, doi: str) -> Optional[Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Get work data by DOI.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
doi: DOI string
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Work data dictionary or None
|
|
96
|
+
"""
|
|
97
|
+
row = self.fetchone("SELECT * FROM works WHERE doi = ?", (doi,))
|
|
98
|
+
if row:
|
|
99
|
+
return self._row_to_dict(row)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
def _row_to_dict(self, row: _sqlite3.Row) -> Dict[str, Any]:
|
|
103
|
+
"""Convert SQLite row to dictionary, parsing JSON fields."""
|
|
104
|
+
result = dict(row)
|
|
105
|
+
|
|
106
|
+
# Parse JSON fields
|
|
107
|
+
for field in ["authors_json", "concepts_json", "topics_json"]:
|
|
108
|
+
if field in result and result[field]:
|
|
109
|
+
try:
|
|
110
|
+
result[field.replace("_json", "")] = _json.loads(result[field])
|
|
111
|
+
except (TypeError, _json.JSONDecodeError):
|
|
112
|
+
result[field.replace("_json", "")] = []
|
|
113
|
+
|
|
114
|
+
# Parse raw_json if present
|
|
115
|
+
if "raw_json" in result and result["raw_json"]:
|
|
116
|
+
try:
|
|
117
|
+
result["raw"] = _json.loads(result["raw_json"])
|
|
118
|
+
except (TypeError, _json.JSONDecodeError):
|
|
119
|
+
result["raw"] = {}
|
|
120
|
+
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
def get_source_metrics(self, issn: str) -> Optional[Dict[str, Any]]:
|
|
124
|
+
"""
|
|
125
|
+
Get source/journal metrics by ISSN.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
issn: Journal ISSN
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Dictionary with impact_factor, h_index, cited_by_count or None
|
|
132
|
+
"""
|
|
133
|
+
if not issn:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
# Try lookup via issn_lookup table first (fast)
|
|
137
|
+
row = self.fetchone(
|
|
138
|
+
"""
|
|
139
|
+
SELECT s.two_year_mean_citedness as impact_factor,
|
|
140
|
+
s.h_index as source_h_index,
|
|
141
|
+
s.cited_by_count as source_cited_by_count,
|
|
142
|
+
s.display_name as source_name
|
|
143
|
+
FROM issn_lookup l
|
|
144
|
+
JOIN sources s ON l.source_id = s.id
|
|
145
|
+
WHERE l.issn = ?
|
|
146
|
+
""",
|
|
147
|
+
(issn,),
|
|
148
|
+
)
|
|
149
|
+
if row:
|
|
150
|
+
return dict(row)
|
|
151
|
+
|
|
152
|
+
# Fallback: search in sources.issns JSON field
|
|
153
|
+
row = self.fetchone(
|
|
154
|
+
"""
|
|
155
|
+
SELECT two_year_mean_citedness as impact_factor,
|
|
156
|
+
h_index as source_h_index,
|
|
157
|
+
cited_by_count as source_cited_by_count,
|
|
158
|
+
display_name as source_name
|
|
159
|
+
FROM sources
|
|
160
|
+
WHERE issn_l = ? OR issns LIKE ?
|
|
161
|
+
""",
|
|
162
|
+
(issn, f'%"{issn}"%'),
|
|
163
|
+
)
|
|
164
|
+
if row:
|
|
165
|
+
return dict(row)
|
|
166
|
+
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
def has_sources_table(self) -> bool:
|
|
170
|
+
"""Check if sources table exists."""
|
|
171
|
+
row = self.fetchone(
|
|
172
|
+
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='sources'"
|
|
173
|
+
)
|
|
174
|
+
return row is not None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# Singleton connection for convenience functions
|
|
178
|
+
_db: Optional[Database] = None
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_db() -> Database:
|
|
182
|
+
"""Get or create singleton database connection."""
|
|
183
|
+
global _db
|
|
184
|
+
if _db is None:
|
|
185
|
+
_db = Database()
|
|
186
|
+
return _db
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def close_db() -> None:
|
|
190
|
+
"""Close singleton database connection."""
|
|
191
|
+
global _db
|
|
192
|
+
if _db:
|
|
193
|
+
_db.close()
|
|
194
|
+
_db = None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@_contextmanager
|
|
198
|
+
def connection(
|
|
199
|
+
db_path: Optional[str | _Path] = None,
|
|
200
|
+
) -> Generator[Database, None, None]:
|
|
201
|
+
"""
|
|
202
|
+
Context manager for database connection.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
db_path: Path to database. If None, auto-detects.
|
|
206
|
+
|
|
207
|
+
Yields:
|
|
208
|
+
Database instance
|
|
209
|
+
"""
|
|
210
|
+
db = Database(db_path)
|
|
211
|
+
try:
|
|
212
|
+
yield db
|
|
213
|
+
finally:
|
|
214
|
+
db.close()
|