openalex-local 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local/__init__.py +28 -7
- openalex_local/_cache/__init__.py +45 -0
- openalex_local/_cache/core.py +298 -0
- openalex_local/_cache/export.py +100 -0
- openalex_local/_cache/models.py +17 -0
- openalex_local/_cache/utils.py +85 -0
- openalex_local/_cli/__init__.py +9 -0
- openalex_local/_cli/cli.py +409 -0
- openalex_local/_cli/cli_cache.py +220 -0
- openalex_local/_cli/mcp.py +210 -0
- openalex_local/_cli/mcp_server.py +235 -0
- openalex_local/_core/__init__.py +42 -0
- openalex_local/{api.py → _core/api.py} +137 -19
- openalex_local/_core/config.py +120 -0
- openalex_local/{db.py → _core/db.py} +53 -0
- openalex_local/_core/export.py +252 -0
- openalex_local/{models.py → _core/models.py} +201 -0
- openalex_local/_remote/__init__.py +34 -0
- openalex_local/_remote/base.py +256 -0
- openalex_local/_server/__init__.py +117 -0
- openalex_local/_server/routes.py +175 -0
- openalex_local/aio.py +259 -0
- openalex_local/cache.py +31 -0
- openalex_local/cli.py +4 -205
- openalex_local/jobs.py +169 -0
- openalex_local/remote.py +8 -0
- openalex_local/server.py +8 -0
- openalex_local-0.3.1.dist-info/METADATA +288 -0
- openalex_local-0.3.1.dist-info/RECORD +34 -0
- openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
- openalex_local/config.py +0 -182
- openalex_local-0.3.0.dist-info/METADATA +0 -152
- openalex_local-0.3.0.dist-info/RECORD +0 -13
- openalex_local-0.3.0.dist-info/entry_points.txt +0 -2
- /openalex_local/{fts.py → _core/fts.py} +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""Remote API client for openalex_local.
|
|
2
|
+
|
|
3
|
+
Connects to an OpenAlex Local API server instead of direct database access.
|
|
4
|
+
Use this when the database is on a remote server accessible via HTTP.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import urllib.request
|
|
9
|
+
import urllib.parse
|
|
10
|
+
import urllib.error
|
|
11
|
+
from typing import List, Optional, Dict, Any
|
|
12
|
+
|
|
13
|
+
from .._core.models import Work, SearchResult
|
|
14
|
+
from .._core.config import DEFAULT_PORT
|
|
15
|
+
|
|
16
|
+
# Default URL uses SCITEX port convention
|
|
17
|
+
DEFAULT_API_URL = f"http://localhost:{DEFAULT_PORT}"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RemoteClient:
|
|
21
|
+
"""
|
|
22
|
+
HTTP client for OpenAlex Local API server.
|
|
23
|
+
|
|
24
|
+
Provides the same interface as the local API but connects
|
|
25
|
+
to a remote server via HTTP.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
>>> client = RemoteClient("http://localhost:31292")
|
|
29
|
+
>>> results = client.search(query="machine learning", limit=10)
|
|
30
|
+
>>> work = client.get("W2741809807")
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, base_url: str = DEFAULT_API_URL, timeout: int = 30):
|
|
34
|
+
"""
|
|
35
|
+
Initialize remote client.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
base_url: API server URL (default: http://localhost:31292)
|
|
39
|
+
timeout: Request timeout in seconds
|
|
40
|
+
"""
|
|
41
|
+
self.base_url = base_url.rstrip("/")
|
|
42
|
+
self.timeout = timeout
|
|
43
|
+
|
|
44
|
+
def _request(
|
|
45
|
+
self,
|
|
46
|
+
endpoint: str,
|
|
47
|
+
params: Optional[Dict[str, Any]] = None,
|
|
48
|
+
method: str = "GET",
|
|
49
|
+
data: Optional[Dict[str, Any]] = None,
|
|
50
|
+
) -> Optional[Dict]:
|
|
51
|
+
"""Make HTTP request to API."""
|
|
52
|
+
url = f"{self.base_url}{endpoint}"
|
|
53
|
+
if params:
|
|
54
|
+
# Filter out None values
|
|
55
|
+
params = {k: v for k, v in params.items() if v is not None}
|
|
56
|
+
if params:
|
|
57
|
+
url = f"{url}?{urllib.parse.urlencode(params)}"
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
req_data = None
|
|
61
|
+
if data is not None:
|
|
62
|
+
req_data = json.dumps(data).encode("utf-8")
|
|
63
|
+
|
|
64
|
+
req = urllib.request.Request(url, data=req_data, method=method)
|
|
65
|
+
req.add_header("Accept", "application/json")
|
|
66
|
+
if req_data:
|
|
67
|
+
req.add_header("Content-Type", "application/json")
|
|
68
|
+
|
|
69
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as response:
|
|
70
|
+
return json.loads(response.read().decode("utf-8"))
|
|
71
|
+
except urllib.error.HTTPError as e:
|
|
72
|
+
if e.code == 404:
|
|
73
|
+
return None
|
|
74
|
+
raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
|
|
75
|
+
except urllib.error.URLError as e:
|
|
76
|
+
raise ConnectionError(
|
|
77
|
+
f"Cannot connect to API at {self.base_url}: {e.reason}"
|
|
78
|
+
) from e
|
|
79
|
+
except (ConnectionRefusedError, ConnectionResetError, OSError) as e:
|
|
80
|
+
raise ConnectionError(
|
|
81
|
+
f"Cannot connect to API at {self.base_url}: {e}"
|
|
82
|
+
) from e
|
|
83
|
+
|
|
84
|
+
def health(self) -> Dict:
|
|
85
|
+
"""Check API server health."""
|
|
86
|
+
return self._request("/health")
|
|
87
|
+
|
|
88
|
+
def info(self) -> Dict:
|
|
89
|
+
"""Get database/API information."""
|
|
90
|
+
root = self._request("/")
|
|
91
|
+
info_data = self._request("/info")
|
|
92
|
+
return {
|
|
93
|
+
"api_url": self.base_url,
|
|
94
|
+
"api_version": root.get("version", "unknown") if root else "unknown",
|
|
95
|
+
"status": root.get("status", "unknown") if root else "unknown",
|
|
96
|
+
"mode": "remote",
|
|
97
|
+
"works": info_data.get("total_works", 0) if info_data else 0,
|
|
98
|
+
"fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
def search(
|
|
102
|
+
self,
|
|
103
|
+
query: str,
|
|
104
|
+
limit: int = 20,
|
|
105
|
+
offset: int = 0,
|
|
106
|
+
) -> SearchResult:
|
|
107
|
+
"""
|
|
108
|
+
Search for works.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
query: Full-text search query
|
|
112
|
+
limit: Maximum results (default: 20)
|
|
113
|
+
offset: Skip first N results for pagination
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
SearchResult with matching works
|
|
117
|
+
"""
|
|
118
|
+
params = {
|
|
119
|
+
"q": query,
|
|
120
|
+
"limit": limit,
|
|
121
|
+
"offset": offset,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
data = self._request("/works", params)
|
|
125
|
+
|
|
126
|
+
if not data:
|
|
127
|
+
return SearchResult(works=[], total=0, query=query, elapsed_ms=0.0)
|
|
128
|
+
|
|
129
|
+
works = []
|
|
130
|
+
for item in data.get("results", []):
|
|
131
|
+
work = Work(
|
|
132
|
+
openalex_id=item.get("openalex_id", ""),
|
|
133
|
+
doi=item.get("doi"),
|
|
134
|
+
title=item.get("title"),
|
|
135
|
+
authors=item.get("authors", []),
|
|
136
|
+
year=item.get("year"),
|
|
137
|
+
source=item.get("source"),
|
|
138
|
+
issn=item.get("issn"),
|
|
139
|
+
volume=item.get("volume"),
|
|
140
|
+
issue=item.get("issue"),
|
|
141
|
+
pages=item.get("pages"),
|
|
142
|
+
abstract=item.get("abstract"),
|
|
143
|
+
cited_by_count=item.get("cited_by_count"),
|
|
144
|
+
concepts=item.get("concepts", []),
|
|
145
|
+
topics=item.get("topics", []),
|
|
146
|
+
is_oa=item.get("is_oa", False),
|
|
147
|
+
oa_url=item.get("oa_url"),
|
|
148
|
+
)
|
|
149
|
+
works.append(work)
|
|
150
|
+
|
|
151
|
+
return SearchResult(
|
|
152
|
+
works=works,
|
|
153
|
+
total=data.get("total", len(works)),
|
|
154
|
+
query=query,
|
|
155
|
+
elapsed_ms=data.get("elapsed_ms", 0.0),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def get(self, id_or_doi: str) -> Optional[Work]:
|
|
159
|
+
"""
|
|
160
|
+
Get a work by OpenAlex ID or DOI.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
id_or_doi: OpenAlex ID (e.g., W2741809807) or DOI
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Work object or None if not found
|
|
167
|
+
"""
|
|
168
|
+
data = self._request(f"/works/{id_or_doi}")
|
|
169
|
+
if not data or "error" in data:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
return Work(
|
|
173
|
+
openalex_id=data.get("openalex_id", ""),
|
|
174
|
+
doi=data.get("doi"),
|
|
175
|
+
title=data.get("title"),
|
|
176
|
+
authors=data.get("authors", []),
|
|
177
|
+
year=data.get("year"),
|
|
178
|
+
source=data.get("source"),
|
|
179
|
+
issn=data.get("issn"),
|
|
180
|
+
volume=data.get("volume"),
|
|
181
|
+
issue=data.get("issue"),
|
|
182
|
+
pages=data.get("pages"),
|
|
183
|
+
abstract=data.get("abstract"),
|
|
184
|
+
cited_by_count=data.get("cited_by_count"),
|
|
185
|
+
concepts=data.get("concepts", []),
|
|
186
|
+
topics=data.get("topics", []),
|
|
187
|
+
is_oa=data.get("is_oa", False),
|
|
188
|
+
oa_url=data.get("oa_url"),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def get_many(self, ids: List[str]) -> List[Work]:
|
|
192
|
+
"""
|
|
193
|
+
Get multiple works by OpenAlex ID or DOI using batch endpoint.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
ids: List of OpenAlex IDs or DOIs
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
List of Work objects
|
|
200
|
+
"""
|
|
201
|
+
try:
|
|
202
|
+
data = {"ids": ids}
|
|
203
|
+
req_data = json.dumps(data).encode("utf-8")
|
|
204
|
+
req = urllib.request.Request(
|
|
205
|
+
f"{self.base_url}/works/batch", data=req_data, method="POST"
|
|
206
|
+
)
|
|
207
|
+
req.add_header("Content-Type", "application/json")
|
|
208
|
+
req.add_header("Accept", "application/json")
|
|
209
|
+
|
|
210
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as response:
|
|
211
|
+
result = json.loads(response.read().decode("utf-8"))
|
|
212
|
+
|
|
213
|
+
works = []
|
|
214
|
+
for item in result.get("results", []):
|
|
215
|
+
work = Work(
|
|
216
|
+
openalex_id=item.get("openalex_id", ""),
|
|
217
|
+
doi=item.get("doi"),
|
|
218
|
+
title=item.get("title"),
|
|
219
|
+
authors=item.get("authors", []),
|
|
220
|
+
year=item.get("year"),
|
|
221
|
+
source=item.get("source"),
|
|
222
|
+
abstract=item.get("abstract"),
|
|
223
|
+
cited_by_count=item.get("cited_by_count"),
|
|
224
|
+
)
|
|
225
|
+
works.append(work)
|
|
226
|
+
return works
|
|
227
|
+
except Exception:
|
|
228
|
+
# Fallback to individual lookups
|
|
229
|
+
works = []
|
|
230
|
+
for id_or_doi in ids:
|
|
231
|
+
work = self.get(id_or_doi)
|
|
232
|
+
if work:
|
|
233
|
+
works.append(work)
|
|
234
|
+
return works
|
|
235
|
+
|
|
236
|
+
def exists(self, id_or_doi: str) -> bool:
|
|
237
|
+
"""Check if a work exists."""
|
|
238
|
+
return self.get(id_or_doi) is not None
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# Module-level client for convenience
|
|
242
|
+
_client: Optional[RemoteClient] = None
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
|
|
246
|
+
"""Get or create singleton remote client."""
|
|
247
|
+
global _client
|
|
248
|
+
if _client is None or _client.base_url != base_url:
|
|
249
|
+
_client = RemoteClient(base_url)
|
|
250
|
+
return _client
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def reset_client() -> None:
|
|
254
|
+
"""Reset singleton client."""
|
|
255
|
+
global _client
|
|
256
|
+
_client = None
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""FastAPI server for OpenAlex Local with FTS5 search.
|
|
2
|
+
|
|
3
|
+
Provides HTTP relay server for remote database access.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
openalex-local relay # Run on default port 31292
|
|
7
|
+
openalex-local relay --port 8080 # Custom port
|
|
8
|
+
|
|
9
|
+
# Or directly:
|
|
10
|
+
uvicorn openalex_local.server:app --host 0.0.0.0 --port 31292
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
from fastapi import FastAPI
|
|
16
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
17
|
+
|
|
18
|
+
from .. import __version__
|
|
19
|
+
from .routes import router
|
|
20
|
+
|
|
21
|
+
# Create FastAPI app
|
|
22
|
+
app = FastAPI(
|
|
23
|
+
title="OpenAlex Local API",
|
|
24
|
+
description="Fast full-text search across 284M+ scholarly works",
|
|
25
|
+
version=__version__,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# CORS middleware
|
|
29
|
+
app.add_middleware(
|
|
30
|
+
CORSMiddleware,
|
|
31
|
+
allow_origins=["*"],
|
|
32
|
+
allow_methods=["*"],
|
|
33
|
+
allow_headers=["*"],
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Include routes
|
|
37
|
+
app.include_router(router)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.get("/")
|
|
41
|
+
def root():
|
|
42
|
+
"""API root with endpoint information."""
|
|
43
|
+
return {
|
|
44
|
+
"name": "OpenAlex Local API",
|
|
45
|
+
"version": __version__,
|
|
46
|
+
"status": "running",
|
|
47
|
+
"endpoints": {
|
|
48
|
+
"health": "/health",
|
|
49
|
+
"info": "/info",
|
|
50
|
+
"search": "/works?q=<query>",
|
|
51
|
+
"get_by_id": "/works/{id_or_doi}",
|
|
52
|
+
"batch": "/works/batch",
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.get("/health")
|
|
58
|
+
def health():
|
|
59
|
+
"""Health check endpoint."""
|
|
60
|
+
from .._core.db import get_db
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
db = get_db()
|
|
64
|
+
return {
|
|
65
|
+
"status": "healthy",
|
|
66
|
+
"database_connected": db is not None,
|
|
67
|
+
"database_path": str(db.db_path) if db else None,
|
|
68
|
+
}
|
|
69
|
+
except Exception as e:
|
|
70
|
+
return {
|
|
71
|
+
"status": "unhealthy",
|
|
72
|
+
"error": str(e),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@app.get("/info")
|
|
77
|
+
def info():
|
|
78
|
+
"""Get database statistics."""
|
|
79
|
+
from .._core.db import get_db
|
|
80
|
+
|
|
81
|
+
db = get_db()
|
|
82
|
+
|
|
83
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works")
|
|
84
|
+
work_count = row["count"] if row else 0
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
|
|
88
|
+
fts_count = row["count"] if row else 0
|
|
89
|
+
except Exception:
|
|
90
|
+
fts_count = 0
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
"name": "OpenAlex Local API",
|
|
94
|
+
"version": __version__,
|
|
95
|
+
"status": "running",
|
|
96
|
+
"mode": "local",
|
|
97
|
+
"total_works": work_count,
|
|
98
|
+
"fts_indexed": fts_count,
|
|
99
|
+
"database_path": str(db.db_path),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Default port: SCITEX convention (3129X scheme)
|
|
104
|
+
DEFAULT_PORT = int(os.environ.get("OPENALEX_LOCAL_PORT", "31292"))
|
|
105
|
+
DEFAULT_HOST = os.environ.get("OPENALEX_LOCAL_HOST", "0.0.0.0")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def run_server(host: str = None, port: int = None):
|
|
109
|
+
"""Run the FastAPI server."""
|
|
110
|
+
import uvicorn
|
|
111
|
+
|
|
112
|
+
host = host or DEFAULT_HOST
|
|
113
|
+
port = port or DEFAULT_PORT
|
|
114
|
+
uvicorn.run(app, host=host, port=port)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Work search and retrieval endpoints."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, Query, HTTPException
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from .._core import fts
|
|
10
|
+
from .._core.db import get_db
|
|
11
|
+
from .._core.models import Work
|
|
12
|
+
|
|
13
|
+
router = APIRouter(tags=["works"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Pydantic models for responses
|
|
17
|
+
class WorkResponse(BaseModel):
|
|
18
|
+
"""Work metadata response."""
|
|
19
|
+
|
|
20
|
+
openalex_id: str
|
|
21
|
+
doi: Optional[str] = None
|
|
22
|
+
title: Optional[str] = None
|
|
23
|
+
authors: List[str] = []
|
|
24
|
+
year: Optional[int] = None
|
|
25
|
+
source: Optional[str] = None
|
|
26
|
+
issn: Optional[str] = None
|
|
27
|
+
volume: Optional[str] = None
|
|
28
|
+
issue: Optional[str] = None
|
|
29
|
+
pages: Optional[str] = None
|
|
30
|
+
abstract: Optional[str] = None
|
|
31
|
+
cited_by_count: Optional[int] = None
|
|
32
|
+
concepts: List[dict] = []
|
|
33
|
+
topics: List[dict] = []
|
|
34
|
+
is_oa: bool = False
|
|
35
|
+
oa_url: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SearchResponse(BaseModel):
|
|
39
|
+
"""Search results response."""
|
|
40
|
+
|
|
41
|
+
query: str
|
|
42
|
+
total: int
|
|
43
|
+
returned: int
|
|
44
|
+
elapsed_ms: float
|
|
45
|
+
results: List[WorkResponse]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BatchRequest(BaseModel):
|
|
49
|
+
"""Batch ID lookup request."""
|
|
50
|
+
|
|
51
|
+
ids: List[str]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class BatchResponse(BaseModel):
|
|
55
|
+
"""Batch ID lookup response."""
|
|
56
|
+
|
|
57
|
+
requested: int
|
|
58
|
+
found: int
|
|
59
|
+
results: List[WorkResponse]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _work_to_response(work: Work) -> WorkResponse:
|
|
63
|
+
"""Convert Work to WorkResponse."""
|
|
64
|
+
return WorkResponse(
|
|
65
|
+
openalex_id=work.openalex_id,
|
|
66
|
+
doi=work.doi,
|
|
67
|
+
title=work.title,
|
|
68
|
+
authors=work.authors,
|
|
69
|
+
year=work.year,
|
|
70
|
+
source=work.source,
|
|
71
|
+
issn=work.issn,
|
|
72
|
+
volume=work.volume,
|
|
73
|
+
issue=work.issue,
|
|
74
|
+
pages=work.pages,
|
|
75
|
+
abstract=work.abstract,
|
|
76
|
+
cited_by_count=work.cited_by_count,
|
|
77
|
+
concepts=work.concepts,
|
|
78
|
+
topics=work.topics,
|
|
79
|
+
is_oa=work.is_oa,
|
|
80
|
+
oa_url=work.oa_url,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@router.get("/works", response_model=SearchResponse)
|
|
85
|
+
def search_works(
|
|
86
|
+
q: str = Query(..., description="Search query (FTS5 syntax supported)"),
|
|
87
|
+
limit: int = Query(20, ge=1, description="Max results"),
|
|
88
|
+
offset: int = Query(0, ge=0, description="Skip first N results"),
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Full-text search across works.
|
|
92
|
+
|
|
93
|
+
Uses FTS5 index for fast searching across titles and abstracts.
|
|
94
|
+
Supports FTS5 query syntax like AND, OR, NOT, "exact phrases".
|
|
95
|
+
|
|
96
|
+
Examples:
|
|
97
|
+
/works?q=machine learning
|
|
98
|
+
/works?q="neural network" AND hippocampus
|
|
99
|
+
/works?q=CRISPR&limit=20
|
|
100
|
+
"""
|
|
101
|
+
start = time.perf_counter()
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
results = fts.search(q, limit=limit, offset=offset)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
raise HTTPException(status_code=400, detail=f"Search error: {e}")
|
|
107
|
+
|
|
108
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
109
|
+
|
|
110
|
+
return SearchResponse(
|
|
111
|
+
query=q,
|
|
112
|
+
total=results.total,
|
|
113
|
+
returned=len(results.works),
|
|
114
|
+
elapsed_ms=round(elapsed_ms, 2),
|
|
115
|
+
results=[_work_to_response(w) for w in results.works],
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@router.get("/works/{id_or_doi:path}", response_model=Optional[WorkResponse])
|
|
120
|
+
def get_work(id_or_doi: str):
|
|
121
|
+
"""
|
|
122
|
+
Get work metadata by OpenAlex ID or DOI.
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
/works/W2741809807
|
|
126
|
+
/works/10.1038/nature12373
|
|
127
|
+
"""
|
|
128
|
+
db = get_db()
|
|
129
|
+
|
|
130
|
+
# Try as OpenAlex ID first
|
|
131
|
+
if id_or_doi.upper().startswith("W"):
|
|
132
|
+
data = db.get_work(id_or_doi.upper())
|
|
133
|
+
if data:
|
|
134
|
+
work = Work.from_db_row(data)
|
|
135
|
+
return _work_to_response(work)
|
|
136
|
+
|
|
137
|
+
# Try as DOI
|
|
138
|
+
data = db.get_work_by_doi(id_or_doi)
|
|
139
|
+
if data:
|
|
140
|
+
work = Work.from_db_row(data)
|
|
141
|
+
return _work_to_response(work)
|
|
142
|
+
|
|
143
|
+
raise HTTPException(status_code=404, detail=f"Not found: {id_or_doi}")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@router.post("/works/batch", response_model=BatchResponse)
|
|
147
|
+
def get_works_batch(request: BatchRequest):
|
|
148
|
+
"""
|
|
149
|
+
Get multiple works by OpenAlex ID or DOI.
|
|
150
|
+
|
|
151
|
+
Request body: {"ids": ["W2741809807", "10.1038/..."]}
|
|
152
|
+
"""
|
|
153
|
+
db = get_db()
|
|
154
|
+
results = []
|
|
155
|
+
|
|
156
|
+
for id_or_doi in request.ids:
|
|
157
|
+
data = None
|
|
158
|
+
|
|
159
|
+
# Try as OpenAlex ID first
|
|
160
|
+
if id_or_doi.upper().startswith("W"):
|
|
161
|
+
data = db.get_work(id_or_doi.upper())
|
|
162
|
+
|
|
163
|
+
# Try as DOI
|
|
164
|
+
if not data:
|
|
165
|
+
data = db.get_work_by_doi(id_or_doi)
|
|
166
|
+
|
|
167
|
+
if data:
|
|
168
|
+
work = Work.from_db_row(data)
|
|
169
|
+
results.append(_work_to_response(work))
|
|
170
|
+
|
|
171
|
+
return BatchResponse(
|
|
172
|
+
requested=len(request.ids),
|
|
173
|
+
found=len(results),
|
|
174
|
+
results=results,
|
|
175
|
+
)
|