openalex-local 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local/__init__.py +28 -7
- openalex_local/_cache/__init__.py +45 -0
- openalex_local/_cache/core.py +298 -0
- openalex_local/_cache/export.py +100 -0
- openalex_local/_cache/models.py +17 -0
- openalex_local/_cache/utils.py +85 -0
- openalex_local/_cli/__init__.py +9 -0
- openalex_local/_cli/cli.py +409 -0
- openalex_local/_cli/cli_cache.py +220 -0
- openalex_local/_cli/mcp.py +210 -0
- openalex_local/_cli/mcp_server.py +235 -0
- openalex_local/_core/__init__.py +42 -0
- openalex_local/{api.py → _core/api.py} +137 -19
- openalex_local/_core/config.py +120 -0
- openalex_local/{db.py → _core/db.py} +53 -0
- openalex_local/_core/export.py +252 -0
- openalex_local/{models.py → _core/models.py} +201 -0
- openalex_local/_remote/__init__.py +34 -0
- openalex_local/_remote/base.py +256 -0
- openalex_local/_server/__init__.py +117 -0
- openalex_local/_server/routes.py +175 -0
- openalex_local/aio.py +259 -0
- openalex_local/cache.py +31 -0
- openalex_local/cli.py +4 -205
- openalex_local/jobs.py +169 -0
- openalex_local/remote.py +8 -0
- openalex_local/server.py +8 -0
- openalex_local-0.3.1.dist-info/METADATA +288 -0
- openalex_local-0.3.1.dist-info/RECORD +34 -0
- openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
- openalex_local/config.py +0 -182
- openalex_local-0.3.0.dist-info/METADATA +0 -152
- openalex_local-0.3.0.dist-info/RECORD +0 -13
- openalex_local-0.3.0.dist-info/entry_points.txt +0 -2
- /openalex_local/{fts.py → _core/fts.py} +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openalex-local
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
|
|
5
|
+
Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
|
|
6
|
+
License: AGPL-3.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
|
|
8
|
+
Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
|
|
9
|
+
Keywords: openalex,academic,research,abstracts,semantic-search
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Requires-Dist: awscli>=1.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
26
|
+
Provides-Extra: mcp
|
|
27
|
+
Requires-Dist: fastmcp>=0.4; extra == "mcp"
|
|
28
|
+
Provides-Extra: server
|
|
29
|
+
Requires-Dist: fastapi>=0.100; extra == "server"
|
|
30
|
+
Requires-Dist: uvicorn>=0.23; extra == "server"
|
|
31
|
+
Provides-Extra: docs
|
|
32
|
+
Requires-Dist: sphinx>=7.0; extra == "docs"
|
|
33
|
+
Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
|
|
34
|
+
Requires-Dist: myst-parser>=2.0; extra == "docs"
|
|
35
|
+
Requires-Dist: sphinx-copybutton>=0.5; extra == "docs"
|
|
36
|
+
Requires-Dist: sphinx-autodoc-typehints>=1.25; extra == "docs"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: openalex-local[dev,docs,mcp,server]; extra == "all"
|
|
39
|
+
|
|
40
|
+
# OpenAlex Local
|
|
41
|
+
|
|
42
|
+
Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
|
|
43
|
+
|
|
44
|
+
[](https://badge.fury.io/py/openalex-local)
|
|
45
|
+
[](https://openalex-local.readthedocs.io/en/latest/)
|
|
46
|
+
[](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml)
|
|
47
|
+
[](https://www.python.org/downloads/)
|
|
48
|
+
[](LICENSE)
|
|
49
|
+
|
|
50
|
+
<details>
|
|
51
|
+
<summary><strong>Why OpenAlex Local?</strong></summary>
|
|
52
|
+
|
|
53
|
+
**Built for the LLM era** - features that matter for AI research assistants:
|
|
54
|
+
|
|
55
|
+
| Feature | Benefit |
|
|
56
|
+
|---------|---------|
|
|
57
|
+
| **284M Works** | More coverage than CrossRef |
|
|
58
|
+
| **Abstracts** | ~45-60% availability for semantic search |
|
|
59
|
+
| **Concepts & Topics** | Built-in classification |
|
|
60
|
+
| **Author Disambiguation** | Linked to institutions |
|
|
61
|
+
| **Open Access Info** | OA status and URLs |
|
|
62
|
+
|
|
63
|
+
Perfect for: RAG systems, research assistants, literature review automation.
|
|
64
|
+
|
|
65
|
+
</details>
|
|
66
|
+
|
|
67
|
+
<details>
|
|
68
|
+
<summary><strong>Installation</strong></summary>
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install openalex-local
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
From source:
|
|
75
|
+
```bash
|
|
76
|
+
git clone https://github.com/ywatanabe1989/openalex-local
|
|
77
|
+
cd openalex-local && make install
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Database setup (~300 GB, ~1-2 days to build):
|
|
81
|
+
```bash
|
|
82
|
+
# Check system status
|
|
83
|
+
make status
|
|
84
|
+
|
|
85
|
+
# 1. Download OpenAlex Works snapshot (~300GB)
|
|
86
|
+
make download-screen # runs in background
|
|
87
|
+
|
|
88
|
+
# 2. Build SQLite database
|
|
89
|
+
make build-db
|
|
90
|
+
|
|
91
|
+
# 3. Build FTS5 index
|
|
92
|
+
make build-fts
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
</details>
|
|
96
|
+
|
|
97
|
+
<details>
|
|
98
|
+
<summary><strong>Python API</strong></summary>
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from openalex_local import search, get, count
|
|
102
|
+
|
|
103
|
+
# Full-text search (title + abstract)
|
|
104
|
+
results = search("machine learning neural networks")
|
|
105
|
+
for work in results:
|
|
106
|
+
print(f"{work.title} ({work.year})")
|
|
107
|
+
print(f" Abstract: {work.abstract[:200]}...")
|
|
108
|
+
print(f" Concepts: {[c['name'] for c in work.concepts]}")
|
|
109
|
+
|
|
110
|
+
# Get by OpenAlex ID or DOI
|
|
111
|
+
work = get("W2741809807")
|
|
112
|
+
work = get("10.1038/nature12373")
|
|
113
|
+
|
|
114
|
+
# Count matches
|
|
115
|
+
n = count("CRISPR")
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
</details>
|
|
119
|
+
|
|
120
|
+
<details>
|
|
121
|
+
<summary><strong>CLI</strong></summary>
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
openalex-local search "CRISPR genome editing" -n 5
|
|
125
|
+
openalex-local search-by-doi W2741809807
|
|
126
|
+
openalex-local search-by-doi 10.1038/nature12373
|
|
127
|
+
openalex-local status # Configuration and database stats
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
With abstracts (`-a` flag):
|
|
131
|
+
```
|
|
132
|
+
$ openalex-local search "neural network" -n 1 -a
|
|
133
|
+
|
|
134
|
+
Found 1,523,847 matches in 45.2ms
|
|
135
|
+
|
|
136
|
+
1. Deep learning for neural networks (2015)
|
|
137
|
+
OpenAlex ID: W2741809807
|
|
138
|
+
Abstract: This paper presents a comprehensive overview of deep learning
|
|
139
|
+
techniques for neural network architectures...
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
</details>
|
|
143
|
+
|
|
144
|
+
<details>
|
|
145
|
+
<summary><strong>HTTP API</strong></summary>
|
|
146
|
+
|
|
147
|
+
Start the FastAPI server:
|
|
148
|
+
```bash
|
|
149
|
+
openalex-local relay --host 0.0.0.0 --port 31292
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Endpoints:
|
|
153
|
+
```bash
|
|
154
|
+
# Search works (FTS5)
|
|
155
|
+
curl "http://localhost:31292/works?q=CRISPR&limit=10"
|
|
156
|
+
|
|
157
|
+
# Get by ID or DOI
|
|
158
|
+
curl "http://localhost:31292/works/W2741809807"
|
|
159
|
+
curl "http://localhost:31292/works/10.1038/nature12373"
|
|
160
|
+
|
|
161
|
+
# Batch lookup
|
|
162
|
+
curl -X POST "http://localhost:31292/works/batch" \
|
|
163
|
+
-H "Content-Type: application/json" \
|
|
164
|
+
-d '{"ids": ["W2741809807", "10.1038/nature12373"]}'
|
|
165
|
+
|
|
166
|
+
# Database info
|
|
167
|
+
curl "http://localhost:31292/info"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
HTTP mode (connect to running server):
|
|
171
|
+
```bash
|
|
172
|
+
# On local machine (if server is remote)
|
|
173
|
+
ssh -L 31292:127.0.0.1:31292 your-server
|
|
174
|
+
|
|
175
|
+
# Python client
|
|
176
|
+
from openalex_local import configure_http
|
|
177
|
+
configure_http("http://localhost:31292")
|
|
178
|
+
|
|
179
|
+
# Or via CLI
|
|
180
|
+
openalex-local --http search "CRISPR"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
</details>
|
|
184
|
+
|
|
185
|
+
<details>
|
|
186
|
+
<summary><strong>MCP Server</strong></summary>
|
|
187
|
+
|
|
188
|
+
Run as MCP (Model Context Protocol) server:
|
|
189
|
+
```bash
|
|
190
|
+
openalex-local mcp start
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Local MCP client configuration:
|
|
194
|
+
```json
|
|
195
|
+
{
|
|
196
|
+
"mcpServers": {
|
|
197
|
+
"openalex-local": {
|
|
198
|
+
"command": "openalex-local",
|
|
199
|
+
"args": ["mcp", "start"],
|
|
200
|
+
"env": {
|
|
201
|
+
"OPENALEX_LOCAL_DB": "/path/to/openalex.db"
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Remote MCP via HTTP:
|
|
209
|
+
```bash
|
|
210
|
+
# On server: start persistent MCP server
|
|
211
|
+
openalex-local mcp start -t http --host 0.0.0.0 --port 8083
|
|
212
|
+
```
|
|
213
|
+
```json
|
|
214
|
+
{
|
|
215
|
+
"mcpServers": {
|
|
216
|
+
"openalex-remote": {
|
|
217
|
+
"url": "http://your-server:8083/mcp"
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Diagnose setup:
|
|
224
|
+
```bash
|
|
225
|
+
openalex-local mcp doctor # Check dependencies and database
|
|
226
|
+
openalex-local mcp list-tools # Show available MCP tools
|
|
227
|
+
openalex-local mcp installation # Show client config examples
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Available tools:
|
|
231
|
+
- `search` - Full-text search across 284M+ papers
|
|
232
|
+
- `search_by_id` - Get paper by OpenAlex ID or DOI
|
|
233
|
+
- `enrich_ids` - Batch lookup with metadata
|
|
234
|
+
- `status` - Database statistics
|
|
235
|
+
|
|
236
|
+
</details>
|
|
237
|
+
|
|
238
|
+
<details>
|
|
239
|
+
<summary><strong>Related Projects</strong></summary>
|
|
240
|
+
|
|
241
|
+
**[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
|
|
242
|
+
|
|
243
|
+
| Feature | crossref-local | openalex-local |
|
|
244
|
+
|---------|----------------|----------------|
|
|
245
|
+
| Works | 167M | 284M |
|
|
246
|
+
| Abstracts | ~21% | ~45-60% |
|
|
247
|
+
| Update frequency | Real-time | Monthly |
|
|
248
|
+
| DOI authority | Yes (source) | Uses CrossRef |
|
|
249
|
+
| Citations | Raw references | Linked works |
|
|
250
|
+
| Concepts/Topics | No | Yes |
|
|
251
|
+
| Author IDs | No | Yes |
|
|
252
|
+
| Best for | DOI lookup, raw refs | Semantic search |
|
|
253
|
+
|
|
254
|
+
**When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
|
|
255
|
+
**When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
|
|
256
|
+
|
|
257
|
+
</details>
|
|
258
|
+
|
|
259
|
+
<details>
|
|
260
|
+
<summary><strong>Documentation</strong></summary>
|
|
261
|
+
|
|
262
|
+
Full documentation available at [openalex-local.readthedocs.io](https://openalex-local.readthedocs.io/en/latest/)
|
|
263
|
+
|
|
264
|
+
- [Installation Guide](https://openalex-local.readthedocs.io/en/latest/installation.html)
|
|
265
|
+
- [Quickstart](https://openalex-local.readthedocs.io/en/latest/quickstart.html)
|
|
266
|
+
- [CLI Reference](https://openalex-local.readthedocs.io/en/latest/cli_reference.html)
|
|
267
|
+
- [HTTP API Reference](https://openalex-local.readthedocs.io/en/latest/http_api.html)
|
|
268
|
+
- [Python API](https://openalex-local.readthedocs.io/en/latest/api/openalex_local.html)
|
|
269
|
+
|
|
270
|
+
</details>
|
|
271
|
+
|
|
272
|
+
<details>
|
|
273
|
+
<summary><strong>Data Source</strong></summary>
|
|
274
|
+
|
|
275
|
+
Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
|
|
276
|
+
Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
|
|
277
|
+
|
|
278
|
+
</details>
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
<p align="center">
|
|
283
|
+
<a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
|
|
284
|
+
<br>
|
|
285
|
+
AGPL-3.0 · ywatanabe@scitex.ai
|
|
286
|
+
</p>
|
|
287
|
+
|
|
288
|
+
<!-- EOF -->
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
openalex_local/__init__.py,sha256=USWssgVhl3PXKugOWe_VNJRD0j_PR-0vrXziWeaYX6k,1057
|
|
2
|
+
openalex_local/__main__.py,sha256=7zIPyOv659VptzHef3Zsw3k6m-WhGTN4MFq2-yVkdLE,111
|
|
3
|
+
openalex_local/aio.py,sha256=42qi3qOKgaK-e37rVj9afGvzLhzDhHAuBVOqtmXEh6M,6468
|
|
4
|
+
openalex_local/cache.py,sha256=-FdYvzd9XSM9qbuE2xHu8p-BUjF6pItv1qX7Jl0auck,415
|
|
5
|
+
openalex_local/cli.py,sha256=2pWJK_vnO1IIwUrMB3K8KW94ZWfkEF7TLSxMsaR-LrI,137
|
|
6
|
+
openalex_local/jobs.py,sha256=8yoG1um3g94wddPcnhx5w3XDPegnG7qpRshDtCcR2gI,4892
|
|
7
|
+
openalex_local/remote.py,sha256=PmvUq87mC76sM9BL9RczOaXtvwHoqMc_dN5PJCYT18M,239
|
|
8
|
+
openalex_local/server.py,sha256=SKoQ-cOoZjdXm24Sv1CFu3F8UclbD6QstfQb-7l2xtA,215
|
|
9
|
+
openalex_local/_cache/__init__.py,sha256=z56OFC31_zmngyD7k7N8Tt0vFqjIgYN7-crhmPSeG98,891
|
|
10
|
+
openalex_local/_cache/core.py,sha256=fi8lSVhnmhSSLJ4rEJIKMZWALTat4NlXuyZxu2awBvw,8277
|
|
11
|
+
openalex_local/_cache/export.py,sha256=uHpymppj8n-zz2KrveHH0XO9Mo1wR81eQ2B8CWsM6nw,2713
|
|
12
|
+
openalex_local/_cache/models.py,sha256=llxMa2gB8cVsmhz2Lt8YlpFG2yTtIbv6OO1O8lw-Aso,319
|
|
13
|
+
openalex_local/_cache/utils.py,sha256=xPQpUudWU1y-KuzKnNQN5Rt0R-fnEqcno7_EFrpgINY,2038
|
|
14
|
+
openalex_local/_cli/__init__.py,sha256=NS07Eo93dRAuO5vxGwGwS0ctvZxMMkF_py09bzIk3Hk,175
|
|
15
|
+
openalex_local/_cli/cli.py,sha256=S3RK3BzFBFWdwFPoiiImgZwMFhkRcNjK74RVP_Vp-vM,12874
|
|
16
|
+
openalex_local/_cli/cli_cache.py,sha256=cy1LHBtJ7S7ecLgbsCFisH9XnQ6gDm0o1vT2QAUrBOU,7147
|
|
17
|
+
openalex_local/_cli/mcp.py,sha256=IZ5r3rs601eTACkFhxojzOxJh9QJHLSxTjzUAilnlDs,6306
|
|
18
|
+
openalex_local/_cli/mcp_server.py,sha256=5qMWxb1Yeqf4GEH8JIPKRrM4lb-tOJlKHPbMjJxWGlY,6829
|
|
19
|
+
openalex_local/_core/__init__.py,sha256=aj7jUJ8Kv9cWoLC120JGF6h_kmgf2-5wwHkkTsgqwGU,615
|
|
20
|
+
openalex_local/_core/api.py,sha256=_C6hWhmvRjqpdn_LEzLhm_krbkE9YIGY3C-tep0yN20,9410
|
|
21
|
+
openalex_local/_core/config.py,sha256=VEoFIVCuFLO2zPRpJ0aeG5Mu8nqtHN2eFOQeGxFdQ7Y,3372
|
|
22
|
+
openalex_local/_core/db.py,sha256=wyCNRbNxNujfvdJwvnNjrzhJGC5aS2ZhoKaSuuheehs,5957
|
|
23
|
+
openalex_local/_core/export.py,sha256=NBt4dldvqPus7Ns88Qi9yPd4KgMX5FdggsIIZMfBWM4,6517
|
|
24
|
+
openalex_local/_core/fts.py,sha256=52TrRqabNj1zLR8gDtb9mOgIXioEBaTRJN7tz5mAcpE,3959
|
|
25
|
+
openalex_local/_core/models.py,sha256=m4ApslrAuTsCz8j_0Zfc6CXVWweBU8VCeBiOrtvH3Ng,13575
|
|
26
|
+
openalex_local/_remote/__init__.py,sha256=3NmHe2uY_VDR-MxP2XAZM8EW9PYQ57_s77hZFVGILms,792
|
|
27
|
+
openalex_local/_remote/base.py,sha256=qzyD-D_JPX9g5O9bJt01JYf2fzEC-7A7uW5mUA_qRIo,8404
|
|
28
|
+
openalex_local/_server/__init__.py,sha256=8mZawWpAuGFwNJaXekrtaFgrb-1wcXaHiIzB0pVmFug,2785
|
|
29
|
+
openalex_local/_server/routes.py,sha256=3qTcjiP2huLVrB6dHjCTrKossRGNFacHCJX3Ai6u4K4,4444
|
|
30
|
+
openalex_local-0.3.1.dist-info/METADATA,sha256=T0CwMb4_pEW3Ol8zeiynMNGMaqHCDFcDx7m2PtTY4fA,8294
|
|
31
|
+
openalex_local-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
32
|
+
openalex_local-0.3.1.dist-info/entry_points.txt,sha256=8G5Q3Nwg3vlKqJiHO0BoAgbSSi3R4lNBMqwANbs7Uz4,64
|
|
33
|
+
openalex_local-0.3.1.dist-info/top_level.txt,sha256=arEhuDR1f42p7soJ5JkJFAnAiCPSGKvuHIchaKTmqKg,15
|
|
34
|
+
openalex_local-0.3.1.dist-info/RECORD,,
|
openalex_local/config.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
"""Configuration for openalex_local."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
# Default database locations (checked in order)
|
|
8
|
-
DEFAULT_DB_PATHS = [
|
|
9
|
-
Path("/home/ywatanabe/proj/openalex-local/data/openalex.db"),
|
|
10
|
-
Path("/home/ywatanabe/proj/openalex_local/data/openalex.db"),
|
|
11
|
-
Path("/mnt/nas_ug/openalex_local/data/openalex.db"),
|
|
12
|
-
Path.home() / ".openalex_local" / "openalex.db",
|
|
13
|
-
Path.cwd() / "data" / "openalex.db",
|
|
14
|
-
]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def get_db_path() -> Path:
|
|
18
|
-
"""
|
|
19
|
-
Get database path from environment or auto-detect.
|
|
20
|
-
|
|
21
|
-
Priority:
|
|
22
|
-
1. OPENALEX_LOCAL_DB environment variable
|
|
23
|
-
2. First existing path from DEFAULT_DB_PATHS
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
Path to the database file
|
|
27
|
-
|
|
28
|
-
Raises:
|
|
29
|
-
FileNotFoundError: If no database found
|
|
30
|
-
"""
|
|
31
|
-
# Check environment variable first
|
|
32
|
-
env_path = os.environ.get("OPENALEX_LOCAL_DB")
|
|
33
|
-
if env_path:
|
|
34
|
-
path = Path(env_path)
|
|
35
|
-
if path.exists():
|
|
36
|
-
return path
|
|
37
|
-
raise FileNotFoundError(f"OPENALEX_LOCAL_DB path not found: {env_path}")
|
|
38
|
-
|
|
39
|
-
# Auto-detect from default locations
|
|
40
|
-
for path in DEFAULT_DB_PATHS:
|
|
41
|
-
if path.exists():
|
|
42
|
-
return path
|
|
43
|
-
|
|
44
|
-
raise FileNotFoundError(
|
|
45
|
-
"OpenAlex database not found. Set OPENALEX_LOCAL_DB environment variable "
|
|
46
|
-
f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
# Default port (SciTeX port scheme: 31292 for openalex)
|
|
51
|
-
DEFAULT_PORT = 31292
|
|
52
|
-
DEFAULT_HOST = "0.0.0.0"
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class Config:
|
|
56
|
-
"""Configuration container."""
|
|
57
|
-
|
|
58
|
-
_db_path: Optional[Path] = None
|
|
59
|
-
_api_url: Optional[str] = None
|
|
60
|
-
_mode: Optional[str] = None # "db" or "http"
|
|
61
|
-
_port: Optional[int] = None
|
|
62
|
-
_host: Optional[str] = None
|
|
63
|
-
|
|
64
|
-
@classmethod
|
|
65
|
-
def get_db_path(cls) -> Path:
|
|
66
|
-
"""Get or auto-detect database path."""
|
|
67
|
-
if cls._db_path is None:
|
|
68
|
-
cls._db_path = get_db_path()
|
|
69
|
-
return cls._db_path
|
|
70
|
-
|
|
71
|
-
@classmethod
|
|
72
|
-
def set_db_path(cls, path: str | Path) -> None:
|
|
73
|
-
"""Set database path explicitly."""
|
|
74
|
-
path = Path(path)
|
|
75
|
-
if not path.exists():
|
|
76
|
-
raise FileNotFoundError(f"Database not found: {path}")
|
|
77
|
-
cls._db_path = path
|
|
78
|
-
cls._mode = "db"
|
|
79
|
-
|
|
80
|
-
@classmethod
|
|
81
|
-
def get_api_url(cls) -> str:
|
|
82
|
-
"""Get API URL for HTTP mode."""
|
|
83
|
-
if cls._api_url:
|
|
84
|
-
return cls._api_url
|
|
85
|
-
|
|
86
|
-
# Check environment variables (scitex priority)
|
|
87
|
-
for var in [
|
|
88
|
-
"SCITEX_SCHOLAR_OPENALEX_API_URL",
|
|
89
|
-
"OPENALEX_LOCAL_API_URL",
|
|
90
|
-
]:
|
|
91
|
-
url = os.environ.get(var)
|
|
92
|
-
if url:
|
|
93
|
-
return url
|
|
94
|
-
|
|
95
|
-
return "http://localhost:31292"
|
|
96
|
-
|
|
97
|
-
@classmethod
|
|
98
|
-
def set_api_url(cls, url: str) -> None:
|
|
99
|
-
"""Set API URL explicitly."""
|
|
100
|
-
cls._api_url = url
|
|
101
|
-
cls._mode = "http"
|
|
102
|
-
|
|
103
|
-
@classmethod
|
|
104
|
-
def get_mode(cls) -> str:
|
|
105
|
-
"""
|
|
106
|
-
Get current mode.
|
|
107
|
-
|
|
108
|
-
Priority:
|
|
109
|
-
1. Explicitly set mode
|
|
110
|
-
2. OPENALEX_LOCAL_MODE environment variable
|
|
111
|
-
3. Auto-detect based on available config
|
|
112
|
-
|
|
113
|
-
Returns:
|
|
114
|
-
"db" or "http"
|
|
115
|
-
"""
|
|
116
|
-
if cls._mode:
|
|
117
|
-
return cls._mode
|
|
118
|
-
|
|
119
|
-
# Check environment variable
|
|
120
|
-
env_mode = os.environ.get("OPENALEX_LOCAL_MODE", "").lower()
|
|
121
|
-
if env_mode in ("db", "http"):
|
|
122
|
-
return env_mode
|
|
123
|
-
|
|
124
|
-
# Check if API URL is set
|
|
125
|
-
if os.environ.get("OPENALEX_LOCAL_API_URL"):
|
|
126
|
-
return "http"
|
|
127
|
-
|
|
128
|
-
# Default to db mode (will raise FileNotFoundError if no database)
|
|
129
|
-
return "db"
|
|
130
|
-
|
|
131
|
-
@classmethod
|
|
132
|
-
def get_port(cls) -> int:
|
|
133
|
-
"""Get server port."""
|
|
134
|
-
if cls._port:
|
|
135
|
-
return cls._port
|
|
136
|
-
|
|
137
|
-
# Check environment variables (scitex priority)
|
|
138
|
-
for var in [
|
|
139
|
-
"SCITEX_SCHOLAR_OPENALEX_PORT",
|
|
140
|
-
"OPENALEX_LOCAL_PORT",
|
|
141
|
-
]:
|
|
142
|
-
port = os.environ.get(var)
|
|
143
|
-
if port:
|
|
144
|
-
return int(port)
|
|
145
|
-
|
|
146
|
-
return DEFAULT_PORT
|
|
147
|
-
|
|
148
|
-
@classmethod
|
|
149
|
-
def set_port(cls, port: int) -> None:
|
|
150
|
-
"""Set server port explicitly."""
|
|
151
|
-
cls._port = port
|
|
152
|
-
|
|
153
|
-
@classmethod
|
|
154
|
-
def get_host(cls) -> str:
|
|
155
|
-
"""Get server host."""
|
|
156
|
-
if cls._host:
|
|
157
|
-
return cls._host
|
|
158
|
-
|
|
159
|
-
# Check environment variables (scitex priority)
|
|
160
|
-
for var in [
|
|
161
|
-
"SCITEX_SCHOLAR_OPENALEX_HOST",
|
|
162
|
-
"OPENALEX_LOCAL_HOST",
|
|
163
|
-
]:
|
|
164
|
-
host = os.environ.get(var)
|
|
165
|
-
if host:
|
|
166
|
-
return host
|
|
167
|
-
|
|
168
|
-
return DEFAULT_HOST
|
|
169
|
-
|
|
170
|
-
@classmethod
|
|
171
|
-
def set_host(cls, host: str) -> None:
|
|
172
|
-
"""Set server host explicitly."""
|
|
173
|
-
cls._host = host
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def reset(cls) -> None:
|
|
177
|
-
"""Reset configuration (for testing)."""
|
|
178
|
-
cls._db_path = None
|
|
179
|
-
cls._api_url = None
|
|
180
|
-
cls._mode = None
|
|
181
|
-
cls._port = None
|
|
182
|
-
cls._host = None
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: openalex-local
|
|
3
|
-
Version: 0.3.0
|
|
4
|
-
Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
|
|
5
|
-
Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
|
|
6
|
-
License: AGPL-3.0
|
|
7
|
-
Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
|
|
8
|
-
Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
|
|
9
|
-
Keywords: openalex,academic,research,abstracts,semantic-search
|
|
10
|
-
Classifier: Development Status :: 3 - Alpha
|
|
11
|
-
Classifier: Intended Audience :: Science/Research
|
|
12
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: Topic :: Scientific/Engineering
|
|
18
|
-
Requires-Python: >=3.10
|
|
19
|
-
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: click>=8.0
|
|
21
|
-
Requires-Dist: awscli>=1.0
|
|
22
|
-
Provides-Extra: dev
|
|
23
|
-
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
-
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
25
|
-
|
|
26
|
-
# OpenAlex Local
|
|
27
|
-
|
|
28
|
-
Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
|
|
29
|
-
|
|
30
|
-
[](https://www.python.org/downloads/)
|
|
31
|
-
[](LICENSE)
|
|
32
|
-
|
|
33
|
-
<details>
|
|
34
|
-
<summary><strong>Why OpenAlex Local?</strong></summary>
|
|
35
|
-
|
|
36
|
-
**Built for the LLM era** - features that matter for AI research assistants:
|
|
37
|
-
|
|
38
|
-
| Feature | Benefit |
|
|
39
|
-
|---------|---------|
|
|
40
|
-
| 📚 **284M Works** | More coverage than CrossRef |
|
|
41
|
-
| 📝 **Abstracts** | ~45-60% availability for semantic search |
|
|
42
|
-
| 🏷️ **Concepts & Topics** | Built-in classification |
|
|
43
|
-
| 👤 **Author Disambiguation** | Linked to institutions |
|
|
44
|
-
| 🔓 **Open Access Info** | OA status and URLs |
|
|
45
|
-
|
|
46
|
-
Perfect for: RAG systems, research assistants, literature review automation.
|
|
47
|
-
|
|
48
|
-
</details>
|
|
49
|
-
|
|
50
|
-
<details>
|
|
51
|
-
<summary><strong>Installation</strong></summary>
|
|
52
|
-
|
|
53
|
-
```bash
|
|
54
|
-
pip install openalex-local
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
From source:
|
|
58
|
-
```bash
|
|
59
|
-
git clone https://github.com/ywatanabe1989/openalex-local
|
|
60
|
-
cd openalex-local && make install
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
Database setup (~300 GB, ~1-2 days to build):
|
|
64
|
-
```bash
|
|
65
|
-
# Check system status
|
|
66
|
-
make status
|
|
67
|
-
|
|
68
|
-
# 1. Download OpenAlex Works snapshot (~300GB)
|
|
69
|
-
make download-screen # runs in background
|
|
70
|
-
|
|
71
|
-
# 2. Build SQLite database
|
|
72
|
-
make build-db
|
|
73
|
-
|
|
74
|
-
# 3. Build FTS5 index
|
|
75
|
-
make build-fts
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
</details>
|
|
79
|
-
|
|
80
|
-
<details>
|
|
81
|
-
<summary><strong>Python API</strong></summary>
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
from openalex_local import search, get, count
|
|
85
|
-
|
|
86
|
-
# Full-text search (title + abstract)
|
|
87
|
-
results = search("machine learning neural networks")
|
|
88
|
-
for work in results:
|
|
89
|
-
print(f"{work.title} ({work.year})")
|
|
90
|
-
print(f" Abstract: {work.abstract[:200]}...")
|
|
91
|
-
print(f" Concepts: {[c['name'] for c in work.concepts]}")
|
|
92
|
-
|
|
93
|
-
# Get by OpenAlex ID or DOI
|
|
94
|
-
work = get("W2741809807")
|
|
95
|
-
work = get("10.1038/nature12373")
|
|
96
|
-
|
|
97
|
-
# Count matches
|
|
98
|
-
n = count("CRISPR")
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
</details>
|
|
102
|
-
|
|
103
|
-
<details>
|
|
104
|
-
<summary><strong>CLI</strong></summary>
|
|
105
|
-
|
|
106
|
-
```bash
|
|
107
|
-
openalex-local search "CRISPR genome editing" -n 5
|
|
108
|
-
openalex-local get W2741809807
|
|
109
|
-
openalex-local get 10.1038/nature12373
|
|
110
|
-
openalex-local count "machine learning"
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
</details>
|
|
114
|
-
|
|
115
|
-
<details>
|
|
116
|
-
<summary><strong>Related Projects</strong></summary>
|
|
117
|
-
|
|
118
|
-
**[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
|
|
119
|
-
|
|
120
|
-
| Feature | crossref-local | openalex-local |
|
|
121
|
-
|---------|----------------|----------------|
|
|
122
|
-
| Works | 167M | 284M |
|
|
123
|
-
| Abstracts | ~21% | ~45-60% |
|
|
124
|
-
| Update frequency | Real-time | Monthly |
|
|
125
|
-
| DOI authority | ✓ (source) | Uses CrossRef |
|
|
126
|
-
| Citations | Raw references | Linked works |
|
|
127
|
-
| Concepts/Topics | ❌ | ✓ |
|
|
128
|
-
| Author IDs | ❌ | ✓ |
|
|
129
|
-
| Best for | DOI lookup, raw refs | Semantic search |
|
|
130
|
-
|
|
131
|
-
**When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
|
|
132
|
-
**When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
|
|
133
|
-
|
|
134
|
-
</details>
|
|
135
|
-
|
|
136
|
-
<details>
|
|
137
|
-
<summary><strong>Data Source</strong></summary>
|
|
138
|
-
|
|
139
|
-
Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
|
|
140
|
-
Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
|
|
141
|
-
|
|
142
|
-
</details>
|
|
143
|
-
|
|
144
|
-
---
|
|
145
|
-
|
|
146
|
-
<p align="center">
|
|
147
|
-
<a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
|
|
148
|
-
<br>
|
|
149
|
-
AGPL-3.0 · ywatanabe@scitex.ai
|
|
150
|
-
</p>
|
|
151
|
-
|
|
152
|
-
<!-- EOF -->
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
openalex_local/__init__.py,sha256=UKl7hQFZMwK1oPV3zz7V2Pw7M4ufPouUCNwmODdFZlw,759
|
|
2
|
-
openalex_local/__main__.py,sha256=7zIPyOv659VptzHef3Zsw3k6m-WhGTN4MFq2-yVkdLE,111
|
|
3
|
-
openalex_local/api.py,sha256=LRIU0JZ41c33YAK96r_MOHNP5tdweu1_Fd-ZRdZ8IT0,6036
|
|
4
|
-
openalex_local/cli.py,sha256=LLlgdAdt6qrF1LVZf5jtimsDiTmvLQ-wzw0-hCnLbr0,6464
|
|
5
|
-
openalex_local/config.py,sha256=pwvXj-CncHCWdQr4ZhtH4ItuDY7RVKWikspQYyYfcbE,4848
|
|
6
|
-
openalex_local/db.py,sha256=eNPZ4Ejqn2w2m5Gk8eCApQHT_cr2X5wt4KiOeVeU7wU,4355
|
|
7
|
-
openalex_local/fts.py,sha256=52TrRqabNj1zLR8gDtb9mOgIXioEBaTRJN7tz5mAcpE,3959
|
|
8
|
-
openalex_local/models.py,sha256=yLjQsYgDcjvjqwt_amvIAXNTpGgcy8nqEkSS70Q8_cY,7120
|
|
9
|
-
openalex_local-0.3.0.dist-info/METADATA,sha256=sL5zP_JOo9DRNcvgrX1myWczeCGnrtWhAJ78VLpuAWc,4384
|
|
10
|
-
openalex_local-0.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
11
|
-
openalex_local-0.3.0.dist-info/entry_points.txt,sha256=OYO9RohYQ52zoeb-jTjhMYR07F0PioGQXPizMnBv_Es,59
|
|
12
|
-
openalex_local-0.3.0.dist-info/top_level.txt,sha256=arEhuDR1f42p7soJ5JkJFAnAiCPSGKvuHIchaKTmqKg,15
|
|
13
|
-
openalex_local-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|