openalex-local 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. openalex_local/__init__.py +54 -3
  2. openalex_local/__main__.py +6 -0
  3. openalex_local/_cache/__init__.py +45 -0
  4. openalex_local/_cache/core.py +298 -0
  5. openalex_local/_cache/export.py +100 -0
  6. openalex_local/_cache/models.py +17 -0
  7. openalex_local/_cache/utils.py +85 -0
  8. openalex_local/_cli/__init__.py +9 -0
  9. openalex_local/_cli/cli.py +409 -0
  10. openalex_local/_cli/cli_cache.py +220 -0
  11. openalex_local/_cli/mcp.py +210 -0
  12. openalex_local/_cli/mcp_server.py +235 -0
  13. openalex_local/_core/__init__.py +42 -0
  14. openalex_local/_core/api.py +376 -0
  15. openalex_local/_core/config.py +120 -0
  16. openalex_local/_core/db.py +214 -0
  17. openalex_local/_core/export.py +252 -0
  18. openalex_local/_core/fts.py +165 -0
  19. openalex_local/_core/models.py +432 -0
  20. openalex_local/_remote/__init__.py +34 -0
  21. openalex_local/_remote/base.py +256 -0
  22. openalex_local/_server/__init__.py +117 -0
  23. openalex_local/_server/routes.py +175 -0
  24. openalex_local/aio.py +259 -0
  25. openalex_local/cache.py +31 -0
  26. openalex_local/cli.py +8 -0
  27. openalex_local/jobs.py +169 -0
  28. openalex_local/remote.py +8 -0
  29. openalex_local/server.py +8 -0
  30. openalex_local-0.3.1.dist-info/METADATA +288 -0
  31. openalex_local-0.3.1.dist-info/RECORD +34 -0
  32. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +1 -1
  33. openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
  34. openalex_local/config.py +0 -73
  35. openalex_local/models.py +0 -187
  36. openalex_local-0.1.0.dist-info/METADATA +0 -152
  37. openalex_local-0.1.0.dist-info/RECORD +0 -8
  38. openalex_local-0.1.0.dist-info/entry_points.txt +0 -2
  39. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,256 @@
1
+ """Remote API client for openalex_local.
2
+
3
+ Connects to an OpenAlex Local API server instead of direct database access.
4
+ Use this when the database is on a remote server accessible via HTTP.
5
+ """
6
+
7
+ import json
8
+ import urllib.request
9
+ import urllib.parse
10
+ import urllib.error
11
+ from typing import List, Optional, Dict, Any
12
+
13
+ from .._core.models import Work, SearchResult
14
+ from .._core.config import DEFAULT_PORT
15
+
16
+ # Default URL uses SCITEX port convention
17
+ DEFAULT_API_URL = f"http://localhost:{DEFAULT_PORT}"
18
+
19
+
20
+ class RemoteClient:
21
+ """
22
+ HTTP client for OpenAlex Local API server.
23
+
24
+ Provides the same interface as the local API but connects
25
+ to a remote server via HTTP.
26
+
27
+ Example:
28
+ >>> client = RemoteClient("http://localhost:31292")
29
+ >>> results = client.search(query="machine learning", limit=10)
30
+ >>> work = client.get("W2741809807")
31
+ """
32
+
33
+ def __init__(self, base_url: str = DEFAULT_API_URL, timeout: int = 30):
34
+ """
35
+ Initialize remote client.
36
+
37
+ Args:
38
+ base_url: API server URL (default: http://localhost:31292)
39
+ timeout: Request timeout in seconds
40
+ """
41
+ self.base_url = base_url.rstrip("/")
42
+ self.timeout = timeout
43
+
44
+ def _request(
45
+ self,
46
+ endpoint: str,
47
+ params: Optional[Dict[str, Any]] = None,
48
+ method: str = "GET",
49
+ data: Optional[Dict[str, Any]] = None,
50
+ ) -> Optional[Dict]:
51
+ """Make HTTP request to API."""
52
+ url = f"{self.base_url}{endpoint}"
53
+ if params:
54
+ # Filter out None values
55
+ params = {k: v for k, v in params.items() if v is not None}
56
+ if params:
57
+ url = f"{url}?{urllib.parse.urlencode(params)}"
58
+
59
+ try:
60
+ req_data = None
61
+ if data is not None:
62
+ req_data = json.dumps(data).encode("utf-8")
63
+
64
+ req = urllib.request.Request(url, data=req_data, method=method)
65
+ req.add_header("Accept", "application/json")
66
+ if req_data:
67
+ req.add_header("Content-Type", "application/json")
68
+
69
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
70
+ return json.loads(response.read().decode("utf-8"))
71
+ except urllib.error.HTTPError as e:
72
+ if e.code == 404:
73
+ return None
74
+ raise ConnectionError(f"API request failed: {e.code} {e.reason}") from e
75
+ except urllib.error.URLError as e:
76
+ raise ConnectionError(
77
+ f"Cannot connect to API at {self.base_url}: {e.reason}"
78
+ ) from e
79
+ except (ConnectionRefusedError, ConnectionResetError, OSError) as e:
80
+ raise ConnectionError(
81
+ f"Cannot connect to API at {self.base_url}: {e}"
82
+ ) from e
83
+
84
+ def health(self) -> Dict:
85
+ """Check API server health."""
86
+ return self._request("/health")
87
+
88
+ def info(self) -> Dict:
89
+ """Get database/API information."""
90
+ root = self._request("/")
91
+ info_data = self._request("/info")
92
+ return {
93
+ "api_url": self.base_url,
94
+ "api_version": root.get("version", "unknown") if root else "unknown",
95
+ "status": root.get("status", "unknown") if root else "unknown",
96
+ "mode": "remote",
97
+ "works": info_data.get("total_works", 0) if info_data else 0,
98
+ "fts_indexed": info_data.get("fts_indexed", 0) if info_data else 0,
99
+ }
100
+
101
+ def search(
102
+ self,
103
+ query: str,
104
+ limit: int = 20,
105
+ offset: int = 0,
106
+ ) -> SearchResult:
107
+ """
108
+ Search for works.
109
+
110
+ Args:
111
+ query: Full-text search query
112
+ limit: Maximum results (default: 20)
113
+ offset: Skip first N results for pagination
114
+
115
+ Returns:
116
+ SearchResult with matching works
117
+ """
118
+ params = {
119
+ "q": query,
120
+ "limit": limit,
121
+ "offset": offset,
122
+ }
123
+
124
+ data = self._request("/works", params)
125
+
126
+ if not data:
127
+ return SearchResult(works=[], total=0, query=query, elapsed_ms=0.0)
128
+
129
+ works = []
130
+ for item in data.get("results", []):
131
+ work = Work(
132
+ openalex_id=item.get("openalex_id", ""),
133
+ doi=item.get("doi"),
134
+ title=item.get("title"),
135
+ authors=item.get("authors", []),
136
+ year=item.get("year"),
137
+ source=item.get("source"),
138
+ issn=item.get("issn"),
139
+ volume=item.get("volume"),
140
+ issue=item.get("issue"),
141
+ pages=item.get("pages"),
142
+ abstract=item.get("abstract"),
143
+ cited_by_count=item.get("cited_by_count"),
144
+ concepts=item.get("concepts", []),
145
+ topics=item.get("topics", []),
146
+ is_oa=item.get("is_oa", False),
147
+ oa_url=item.get("oa_url"),
148
+ )
149
+ works.append(work)
150
+
151
+ return SearchResult(
152
+ works=works,
153
+ total=data.get("total", len(works)),
154
+ query=query,
155
+ elapsed_ms=data.get("elapsed_ms", 0.0),
156
+ )
157
+
158
+ def get(self, id_or_doi: str) -> Optional[Work]:
159
+ """
160
+ Get a work by OpenAlex ID or DOI.
161
+
162
+ Args:
163
+ id_or_doi: OpenAlex ID (e.g., W2741809807) or DOI
164
+
165
+ Returns:
166
+ Work object or None if not found
167
+ """
168
+ data = self._request(f"/works/{id_or_doi}")
169
+ if not data or "error" in data:
170
+ return None
171
+
172
+ return Work(
173
+ openalex_id=data.get("openalex_id", ""),
174
+ doi=data.get("doi"),
175
+ title=data.get("title"),
176
+ authors=data.get("authors", []),
177
+ year=data.get("year"),
178
+ source=data.get("source"),
179
+ issn=data.get("issn"),
180
+ volume=data.get("volume"),
181
+ issue=data.get("issue"),
182
+ pages=data.get("pages"),
183
+ abstract=data.get("abstract"),
184
+ cited_by_count=data.get("cited_by_count"),
185
+ concepts=data.get("concepts", []),
186
+ topics=data.get("topics", []),
187
+ is_oa=data.get("is_oa", False),
188
+ oa_url=data.get("oa_url"),
189
+ )
190
+
191
+ def get_many(self, ids: List[str]) -> List[Work]:
192
+ """
193
+ Get multiple works by OpenAlex ID or DOI using batch endpoint.
194
+
195
+ Args:
196
+ ids: List of OpenAlex IDs or DOIs
197
+
198
+ Returns:
199
+ List of Work objects
200
+ """
201
+ try:
202
+ data = {"ids": ids}
203
+ req_data = json.dumps(data).encode("utf-8")
204
+ req = urllib.request.Request(
205
+ f"{self.base_url}/works/batch", data=req_data, method="POST"
206
+ )
207
+ req.add_header("Content-Type", "application/json")
208
+ req.add_header("Accept", "application/json")
209
+
210
+ with urllib.request.urlopen(req, timeout=self.timeout) as response:
211
+ result = json.loads(response.read().decode("utf-8"))
212
+
213
+ works = []
214
+ for item in result.get("results", []):
215
+ work = Work(
216
+ openalex_id=item.get("openalex_id", ""),
217
+ doi=item.get("doi"),
218
+ title=item.get("title"),
219
+ authors=item.get("authors", []),
220
+ year=item.get("year"),
221
+ source=item.get("source"),
222
+ abstract=item.get("abstract"),
223
+ cited_by_count=item.get("cited_by_count"),
224
+ )
225
+ works.append(work)
226
+ return works
227
+ except Exception:
228
+ # Fallback to individual lookups
229
+ works = []
230
+ for id_or_doi in ids:
231
+ work = self.get(id_or_doi)
232
+ if work:
233
+ works.append(work)
234
+ return works
235
+
236
+ def exists(self, id_or_doi: str) -> bool:
237
+ """Check if a work exists."""
238
+ return self.get(id_or_doi) is not None
239
+
240
+
241
+ # Module-level client for convenience
242
+ _client: Optional[RemoteClient] = None
243
+
244
+
245
+ def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
246
+ """Get or create singleton remote client."""
247
+ global _client
248
+ if _client is None or _client.base_url != base_url:
249
+ _client = RemoteClient(base_url)
250
+ return _client
251
+
252
+
253
+ def reset_client() -> None:
254
+ """Reset singleton client."""
255
+ global _client
256
+ _client = None
@@ -0,0 +1,117 @@
1
+ """FastAPI server for OpenAlex Local with FTS5 search.
2
+
3
+ Provides HTTP relay server for remote database access.
4
+
5
+ Usage:
6
+ openalex-local relay # Run on default port 31292
7
+ openalex-local relay --port 8080 # Custom port
8
+
9
+ # Or directly:
10
+ uvicorn openalex_local.server:app --host 0.0.0.0 --port 31292
11
+ """
12
+
13
+ import os
14
+
15
+ from fastapi import FastAPI
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+
18
+ from .. import __version__
19
+ from .routes import router
20
+
21
+ # Create FastAPI app
22
+ app = FastAPI(
23
+ title="OpenAlex Local API",
24
+ description="Fast full-text search across 284M+ scholarly works",
25
+ version=__version__,
26
+ )
27
+
28
+ # CORS middleware
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Include routes
37
+ app.include_router(router)
38
+
39
+
40
+ @app.get("/")
41
+ def root():
42
+ """API root with endpoint information."""
43
+ return {
44
+ "name": "OpenAlex Local API",
45
+ "version": __version__,
46
+ "status": "running",
47
+ "endpoints": {
48
+ "health": "/health",
49
+ "info": "/info",
50
+ "search": "/works?q=<query>",
51
+ "get_by_id": "/works/{id_or_doi}",
52
+ "batch": "/works/batch",
53
+ },
54
+ }
55
+
56
+
57
+ @app.get("/health")
58
+ def health():
59
+ """Health check endpoint."""
60
+ from .._core.db import get_db
61
+
62
+ try:
63
+ db = get_db()
64
+ return {
65
+ "status": "healthy",
66
+ "database_connected": db is not None,
67
+ "database_path": str(db.db_path) if db else None,
68
+ }
69
+ except Exception as e:
70
+ return {
71
+ "status": "unhealthy",
72
+ "error": str(e),
73
+ }
74
+
75
+
76
+ @app.get("/info")
77
+ def info():
78
+ """Get database statistics."""
79
+ from .._core.db import get_db
80
+
81
+ db = get_db()
82
+
83
+ row = db.fetchone("SELECT COUNT(*) as count FROM works")
84
+ work_count = row["count"] if row else 0
85
+
86
+ try:
87
+ row = db.fetchone("SELECT COUNT(*) as count FROM works_fts")
88
+ fts_count = row["count"] if row else 0
89
+ except Exception:
90
+ fts_count = 0
91
+
92
+ return {
93
+ "name": "OpenAlex Local API",
94
+ "version": __version__,
95
+ "status": "running",
96
+ "mode": "local",
97
+ "total_works": work_count,
98
+ "fts_indexed": fts_count,
99
+ "database_path": str(db.db_path),
100
+ }
101
+
102
+
103
+ # Default port: SCITEX convention (3129X scheme)
104
+ DEFAULT_PORT = int(os.environ.get("OPENALEX_LOCAL_PORT", "31292"))
105
+ DEFAULT_HOST = os.environ.get("OPENALEX_LOCAL_HOST", "0.0.0.0")
106
+
107
+
108
+ def run_server(host: str = None, port: int = None):
109
+ """Run the FastAPI server."""
110
+ import uvicorn
111
+
112
+ host = host or DEFAULT_HOST
113
+ port = port or DEFAULT_PORT
114
+ uvicorn.run(app, host=host, port=port)
115
+
116
+
117
+ __all__ = ["app", "run_server", "DEFAULT_PORT", "DEFAULT_HOST"]
@@ -0,0 +1,175 @@
1
+ """Work search and retrieval endpoints."""
2
+
3
+ import time
4
+ from typing import Optional, List
5
+
6
+ from fastapi import APIRouter, Query, HTTPException
7
+ from pydantic import BaseModel
8
+
9
+ from .._core import fts
10
+ from .._core.db import get_db
11
+ from .._core.models import Work
12
+
13
+ router = APIRouter(tags=["works"])
14
+
15
+
16
+ # Pydantic models for responses
17
+ class WorkResponse(BaseModel):
18
+ """Work metadata response."""
19
+
20
+ openalex_id: str
21
+ doi: Optional[str] = None
22
+ title: Optional[str] = None
23
+ authors: List[str] = []
24
+ year: Optional[int] = None
25
+ source: Optional[str] = None
26
+ issn: Optional[str] = None
27
+ volume: Optional[str] = None
28
+ issue: Optional[str] = None
29
+ pages: Optional[str] = None
30
+ abstract: Optional[str] = None
31
+ cited_by_count: Optional[int] = None
32
+ concepts: List[dict] = []
33
+ topics: List[dict] = []
34
+ is_oa: bool = False
35
+ oa_url: Optional[str] = None
36
+
37
+
38
+ class SearchResponse(BaseModel):
39
+ """Search results response."""
40
+
41
+ query: str
42
+ total: int
43
+ returned: int
44
+ elapsed_ms: float
45
+ results: List[WorkResponse]
46
+
47
+
48
+ class BatchRequest(BaseModel):
49
+ """Batch ID lookup request."""
50
+
51
+ ids: List[str]
52
+
53
+
54
+ class BatchResponse(BaseModel):
55
+ """Batch ID lookup response."""
56
+
57
+ requested: int
58
+ found: int
59
+ results: List[WorkResponse]
60
+
61
+
62
+ def _work_to_response(work: Work) -> WorkResponse:
63
+ """Convert Work to WorkResponse."""
64
+ return WorkResponse(
65
+ openalex_id=work.openalex_id,
66
+ doi=work.doi,
67
+ title=work.title,
68
+ authors=work.authors,
69
+ year=work.year,
70
+ source=work.source,
71
+ issn=work.issn,
72
+ volume=work.volume,
73
+ issue=work.issue,
74
+ pages=work.pages,
75
+ abstract=work.abstract,
76
+ cited_by_count=work.cited_by_count,
77
+ concepts=work.concepts,
78
+ topics=work.topics,
79
+ is_oa=work.is_oa,
80
+ oa_url=work.oa_url,
81
+ )
82
+
83
+
84
+ @router.get("/works", response_model=SearchResponse)
85
+ def search_works(
86
+ q: str = Query(..., description="Search query (FTS5 syntax supported)"),
87
+ limit: int = Query(20, ge=1, description="Max results"),
88
+ offset: int = Query(0, ge=0, description="Skip first N results"),
89
+ ):
90
+ """
91
+ Full-text search across works.
92
+
93
+ Uses FTS5 index for fast searching across titles and abstracts.
94
+ Supports FTS5 query syntax like AND, OR, NOT, "exact phrases".
95
+
96
+ Examples:
97
+ /works?q=machine learning
98
+ /works?q="neural network" AND hippocampus
99
+ /works?q=CRISPR&limit=20
100
+ """
101
+ start = time.perf_counter()
102
+
103
+ try:
104
+ results = fts.search(q, limit=limit, offset=offset)
105
+ except Exception as e:
106
+ raise HTTPException(status_code=400, detail=f"Search error: {e}")
107
+
108
+ elapsed_ms = (time.perf_counter() - start) * 1000
109
+
110
+ return SearchResponse(
111
+ query=q,
112
+ total=results.total,
113
+ returned=len(results.works),
114
+ elapsed_ms=round(elapsed_ms, 2),
115
+ results=[_work_to_response(w) for w in results.works],
116
+ )
117
+
118
+
119
+ @router.get("/works/{id_or_doi:path}", response_model=Optional[WorkResponse])
120
+ def get_work(id_or_doi: str):
121
+ """
122
+ Get work metadata by OpenAlex ID or DOI.
123
+
124
+ Examples:
125
+ /works/W2741809807
126
+ /works/10.1038/nature12373
127
+ """
128
+ db = get_db()
129
+
130
+ # Try as OpenAlex ID first
131
+ if id_or_doi.upper().startswith("W"):
132
+ data = db.get_work(id_or_doi.upper())
133
+ if data:
134
+ work = Work.from_db_row(data)
135
+ return _work_to_response(work)
136
+
137
+ # Try as DOI
138
+ data = db.get_work_by_doi(id_or_doi)
139
+ if data:
140
+ work = Work.from_db_row(data)
141
+ return _work_to_response(work)
142
+
143
+ raise HTTPException(status_code=404, detail=f"Not found: {id_or_doi}")
144
+
145
+
146
+ @router.post("/works/batch", response_model=BatchResponse)
147
+ def get_works_batch(request: BatchRequest):
148
+ """
149
+ Get multiple works by OpenAlex ID or DOI.
150
+
151
+ Request body: {"ids": ["W2741809807", "10.1038/..."]}
152
+ """
153
+ db = get_db()
154
+ results = []
155
+
156
+ for id_or_doi in request.ids:
157
+ data = None
158
+
159
+ # Try as OpenAlex ID first
160
+ if id_or_doi.upper().startswith("W"):
161
+ data = db.get_work(id_or_doi.upper())
162
+
163
+ # Try as DOI
164
+ if not data:
165
+ data = db.get_work_by_doi(id_or_doi)
166
+
167
+ if data:
168
+ work = Work.from_db_row(data)
169
+ results.append(_work_to_response(work))
170
+
171
+ return BatchResponse(
172
+ requested=len(request.ids),
173
+ found=len(results),
174
+ results=results,
175
+ )