openalex-local 0.1.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local-0.3.1/PKG-INFO +288 -0
- openalex_local-0.3.1/README.md +249 -0
- {openalex_local-0.1.0 → openalex_local-0.3.1}/pyproject.toml +20 -2
- openalex_local-0.3.1/src/openalex_local/__init__.py +65 -0
- openalex_local-0.3.1/src/openalex_local/__main__.py +6 -0
- openalex_local-0.3.1/src/openalex_local/_cache/__init__.py +45 -0
- openalex_local-0.3.1/src/openalex_local/_cache/core.py +298 -0
- openalex_local-0.3.1/src/openalex_local/_cache/export.py +100 -0
- openalex_local-0.3.1/src/openalex_local/_cache/models.py +17 -0
- openalex_local-0.3.1/src/openalex_local/_cache/utils.py +85 -0
- openalex_local-0.3.1/src/openalex_local/_cli/__init__.py +9 -0
- openalex_local-0.3.1/src/openalex_local/_cli/cli.py +409 -0
- openalex_local-0.3.1/src/openalex_local/_cli/cli_cache.py +220 -0
- openalex_local-0.3.1/src/openalex_local/_cli/mcp.py +210 -0
- openalex_local-0.3.1/src/openalex_local/_cli/mcp_server.py +235 -0
- openalex_local-0.3.1/src/openalex_local/_core/__init__.py +42 -0
- openalex_local-0.3.1/src/openalex_local/_core/api.py +376 -0
- openalex_local-0.3.1/src/openalex_local/_core/config.py +120 -0
- openalex_local-0.3.1/src/openalex_local/_core/db.py +214 -0
- openalex_local-0.3.1/src/openalex_local/_core/export.py +252 -0
- openalex_local-0.3.1/src/openalex_local/_core/fts.py +165 -0
- openalex_local-0.3.1/src/openalex_local/_core/models.py +432 -0
- openalex_local-0.3.1/src/openalex_local/_remote/__init__.py +34 -0
- openalex_local-0.3.1/src/openalex_local/_remote/base.py +256 -0
- openalex_local-0.3.1/src/openalex_local/_server/__init__.py +117 -0
- openalex_local-0.3.1/src/openalex_local/_server/routes.py +175 -0
- openalex_local-0.3.1/src/openalex_local/aio.py +259 -0
- openalex_local-0.3.1/src/openalex_local/cache.py +31 -0
- openalex_local-0.3.1/src/openalex_local/cli.py +8 -0
- openalex_local-0.3.1/src/openalex_local/jobs.py +169 -0
- openalex_local-0.3.1/src/openalex_local/remote.py +8 -0
- openalex_local-0.3.1/src/openalex_local/server.py +8 -0
- openalex_local-0.3.1/src/openalex_local.egg-info/PKG-INFO +288 -0
- openalex_local-0.3.1/src/openalex_local.egg-info/SOURCES.txt +42 -0
- openalex_local-0.3.1/src/openalex_local.egg-info/entry_points.txt +2 -0
- openalex_local-0.3.1/src/openalex_local.egg-info/requires.txt +24 -0
- openalex_local-0.3.1/tests/test_api.py +200 -0
- openalex_local-0.3.1/tests/test_cli.py +99 -0
- openalex_local-0.3.1/tests/test_config.py +107 -0
- openalex_local-0.3.1/tests/test_jobs.py +132 -0
- openalex_local-0.3.1/tests/test_models.py +239 -0
- openalex_local-0.1.0/PKG-INFO +0 -152
- openalex_local-0.1.0/README.md +0 -127
- openalex_local-0.1.0/src/openalex_local/__init__.py +0 -14
- openalex_local-0.1.0/src/openalex_local/config.py +0 -73
- openalex_local-0.1.0/src/openalex_local/models.py +0 -187
- openalex_local-0.1.0/src/openalex_local.egg-info/PKG-INFO +0 -152
- openalex_local-0.1.0/src/openalex_local.egg-info/SOURCES.txt +0 -11
- openalex_local-0.1.0/src/openalex_local.egg-info/entry_points.txt +0 -2
- openalex_local-0.1.0/src/openalex_local.egg-info/requires.txt +0 -6
- {openalex_local-0.1.0 → openalex_local-0.3.1}/setup.cfg +0 -0
- {openalex_local-0.1.0 → openalex_local-0.3.1}/src/openalex_local.egg-info/dependency_links.txt +0 -0
- {openalex_local-0.1.0 → openalex_local-0.3.1}/src/openalex_local.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openalex-local
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
|
|
5
|
+
Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
|
|
6
|
+
License: AGPL-3.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
|
|
8
|
+
Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
|
|
9
|
+
Keywords: openalex,academic,research,abstracts,semantic-search
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Requires-Dist: awscli>=1.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
26
|
+
Provides-Extra: mcp
|
|
27
|
+
Requires-Dist: fastmcp>=0.4; extra == "mcp"
|
|
28
|
+
Provides-Extra: server
|
|
29
|
+
Requires-Dist: fastapi>=0.100; extra == "server"
|
|
30
|
+
Requires-Dist: uvicorn>=0.23; extra == "server"
|
|
31
|
+
Provides-Extra: docs
|
|
32
|
+
Requires-Dist: sphinx>=7.0; extra == "docs"
|
|
33
|
+
Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
|
|
34
|
+
Requires-Dist: myst-parser>=2.0; extra == "docs"
|
|
35
|
+
Requires-Dist: sphinx-copybutton>=0.5; extra == "docs"
|
|
36
|
+
Requires-Dist: sphinx-autodoc-typehints>=1.25; extra == "docs"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: openalex-local[dev,docs,mcp,server]; extra == "all"
|
|
39
|
+
|
|
40
|
+
# OpenAlex Local
|
|
41
|
+
|
|
42
|
+
Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
|
|
43
|
+
|
|
44
|
+
[](https://badge.fury.io/py/openalex-local)
|
|
45
|
+
[](https://openalex-local.readthedocs.io/en/latest/)
|
|
46
|
+
[](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml)
|
|
47
|
+
[](https://www.python.org/downloads/)
|
|
48
|
+
[](LICENSE)
|
|
49
|
+
|
|
50
|
+
<details>
|
|
51
|
+
<summary><strong>Why OpenAlex Local?</strong></summary>
|
|
52
|
+
|
|
53
|
+
**Built for the LLM era** - features that matter for AI research assistants:
|
|
54
|
+
|
|
55
|
+
| Feature | Benefit |
|
|
56
|
+
|---------|---------|
|
|
57
|
+
| **284M Works** | More coverage than CrossRef |
|
|
58
|
+
| **Abstracts** | ~45-60% availability for semantic search |
|
|
59
|
+
| **Concepts & Topics** | Built-in classification |
|
|
60
|
+
| **Author Disambiguation** | Linked to institutions |
|
|
61
|
+
| **Open Access Info** | OA status and URLs |
|
|
62
|
+
|
|
63
|
+
Perfect for: RAG systems, research assistants, literature review automation.
|
|
64
|
+
|
|
65
|
+
</details>
|
|
66
|
+
|
|
67
|
+
<details>
|
|
68
|
+
<summary><strong>Installation</strong></summary>
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install openalex-local
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
From source:
|
|
75
|
+
```bash
|
|
76
|
+
git clone https://github.com/ywatanabe1989/openalex-local
|
|
77
|
+
cd openalex-local && make install
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Database setup (~300 GB, ~1-2 days to build):
|
|
81
|
+
```bash
|
|
82
|
+
# Check system status
|
|
83
|
+
make status
|
|
84
|
+
|
|
85
|
+
# 1. Download OpenAlex Works snapshot (~300GB)
|
|
86
|
+
make download-screen # runs in background
|
|
87
|
+
|
|
88
|
+
# 2. Build SQLite database
|
|
89
|
+
make build-db
|
|
90
|
+
|
|
91
|
+
# 3. Build FTS5 index
|
|
92
|
+
make build-fts
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
</details>
|
|
96
|
+
|
|
97
|
+
<details>
|
|
98
|
+
<summary><strong>Python API</strong></summary>
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from openalex_local import search, get, count
|
|
102
|
+
|
|
103
|
+
# Full-text search (title + abstract)
|
|
104
|
+
results = search("machine learning neural networks")
|
|
105
|
+
for work in results:
|
|
106
|
+
print(f"{work.title} ({work.year})")
|
|
107
|
+
print(f" Abstract: {work.abstract[:200]}...")
|
|
108
|
+
print(f" Concepts: {[c['name'] for c in work.concepts]}")
|
|
109
|
+
|
|
110
|
+
# Get by OpenAlex ID or DOI
|
|
111
|
+
work = get("W2741809807")
|
|
112
|
+
work = get("10.1038/nature12373")
|
|
113
|
+
|
|
114
|
+
# Count matches
|
|
115
|
+
n = count("CRISPR")
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
</details>
|
|
119
|
+
|
|
120
|
+
<details>
|
|
121
|
+
<summary><strong>CLI</strong></summary>
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
openalex-local search "CRISPR genome editing" -n 5
|
|
125
|
+
openalex-local search-by-doi W2741809807
|
|
126
|
+
openalex-local search-by-doi 10.1038/nature12373
|
|
127
|
+
openalex-local status # Configuration and database stats
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
With abstracts (`-a` flag):
|
|
131
|
+
```
|
|
132
|
+
$ openalex-local search "neural network" -n 1 -a
|
|
133
|
+
|
|
134
|
+
Found 1,523,847 matches in 45.2ms
|
|
135
|
+
|
|
136
|
+
1. Deep learning for neural networks (2015)
|
|
137
|
+
OpenAlex ID: W2741809807
|
|
138
|
+
Abstract: This paper presents a comprehensive overview of deep learning
|
|
139
|
+
techniques for neural network architectures...
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
</details>
|
|
143
|
+
|
|
144
|
+
<details>
|
|
145
|
+
<summary><strong>HTTP API</strong></summary>
|
|
146
|
+
|
|
147
|
+
Start the FastAPI server:
|
|
148
|
+
```bash
|
|
149
|
+
openalex-local relay --host 0.0.0.0 --port 31292
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Endpoints:
|
|
153
|
+
```bash
|
|
154
|
+
# Search works (FTS5)
|
|
155
|
+
curl "http://localhost:31292/works?q=CRISPR&limit=10"
|
|
156
|
+
|
|
157
|
+
# Get by ID or DOI
|
|
158
|
+
curl "http://localhost:31292/works/W2741809807"
|
|
159
|
+
curl "http://localhost:31292/works/10.1038/nature12373"
|
|
160
|
+
|
|
161
|
+
# Batch lookup
|
|
162
|
+
curl -X POST "http://localhost:31292/works/batch" \
|
|
163
|
+
-H "Content-Type: application/json" \
|
|
164
|
+
-d '{"ids": ["W2741809807", "10.1038/nature12373"]}'
|
|
165
|
+
|
|
166
|
+
# Database info
|
|
167
|
+
curl "http://localhost:31292/info"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
HTTP mode (connect to running server):
|
|
171
|
+
```bash
|
|
172
|
+
# On local machine (if server is remote)
|
|
173
|
+
ssh -L 31292:127.0.0.1:31292 your-server
|
|
174
|
+
|
|
175
|
+
# Python client
|
|
176
|
+
from openalex_local import configure_http
|
|
177
|
+
configure_http("http://localhost:31292")
|
|
178
|
+
|
|
179
|
+
# Or via CLI
|
|
180
|
+
openalex-local --http search "CRISPR"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
</details>
|
|
184
|
+
|
|
185
|
+
<details>
|
|
186
|
+
<summary><strong>MCP Server</strong></summary>
|
|
187
|
+
|
|
188
|
+
Run as MCP (Model Context Protocol) server:
|
|
189
|
+
```bash
|
|
190
|
+
openalex-local mcp start
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Local MCP client configuration:
|
|
194
|
+
```json
|
|
195
|
+
{
|
|
196
|
+
"mcpServers": {
|
|
197
|
+
"openalex-local": {
|
|
198
|
+
"command": "openalex-local",
|
|
199
|
+
"args": ["mcp", "start"],
|
|
200
|
+
"env": {
|
|
201
|
+
"OPENALEX_LOCAL_DB": "/path/to/openalex.db"
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Remote MCP via HTTP:
|
|
209
|
+
```bash
|
|
210
|
+
# On server: start persistent MCP server
|
|
211
|
+
openalex-local mcp start -t http --host 0.0.0.0 --port 8083
|
|
212
|
+
```
|
|
213
|
+
```json
|
|
214
|
+
{
|
|
215
|
+
"mcpServers": {
|
|
216
|
+
"openalex-remote": {
|
|
217
|
+
"url": "http://your-server:8083/mcp"
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Diagnose setup:
|
|
224
|
+
```bash
|
|
225
|
+
openalex-local mcp doctor # Check dependencies and database
|
|
226
|
+
openalex-local mcp list-tools # Show available MCP tools
|
|
227
|
+
openalex-local mcp installation # Show client config examples
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Available tools:
|
|
231
|
+
- `search` - Full-text search across 284M+ papers
|
|
232
|
+
- `search_by_id` - Get paper by OpenAlex ID or DOI
|
|
233
|
+
- `enrich_ids` - Batch lookup with metadata
|
|
234
|
+
- `status` - Database statistics
|
|
235
|
+
|
|
236
|
+
</details>
|
|
237
|
+
|
|
238
|
+
<details>
|
|
239
|
+
<summary><strong>Related Projects</strong></summary>
|
|
240
|
+
|
|
241
|
+
**[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
|
|
242
|
+
|
|
243
|
+
| Feature | crossref-local | openalex-local |
|
|
244
|
+
|---------|----------------|----------------|
|
|
245
|
+
| Works | 167M | 284M |
|
|
246
|
+
| Abstracts | ~21% | ~45-60% |
|
|
247
|
+
| Update frequency | Real-time | Monthly |
|
|
248
|
+
| DOI authority | Yes (source) | Uses CrossRef |
|
|
249
|
+
| Citations | Raw references | Linked works |
|
|
250
|
+
| Concepts/Topics | No | Yes |
|
|
251
|
+
| Author IDs | No | Yes |
|
|
252
|
+
| Best for | DOI lookup, raw refs | Semantic search |
|
|
253
|
+
|
|
254
|
+
**When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
|
|
255
|
+
**When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
|
|
256
|
+
|
|
257
|
+
</details>
|
|
258
|
+
|
|
259
|
+
<details>
|
|
260
|
+
<summary><strong>Documentation</strong></summary>
|
|
261
|
+
|
|
262
|
+
Full documentation available at [openalex-local.readthedocs.io](https://openalex-local.readthedocs.io/en/latest/)
|
|
263
|
+
|
|
264
|
+
- [Installation Guide](https://openalex-local.readthedocs.io/en/latest/installation.html)
|
|
265
|
+
- [Quickstart](https://openalex-local.readthedocs.io/en/latest/quickstart.html)
|
|
266
|
+
- [CLI Reference](https://openalex-local.readthedocs.io/en/latest/cli_reference.html)
|
|
267
|
+
- [HTTP API Reference](https://openalex-local.readthedocs.io/en/latest/http_api.html)
|
|
268
|
+
- [Python API](https://openalex-local.readthedocs.io/en/latest/api/openalex_local.html)
|
|
269
|
+
|
|
270
|
+
</details>
|
|
271
|
+
|
|
272
|
+
<details>
|
|
273
|
+
<summary><strong>Data Source</strong></summary>
|
|
274
|
+
|
|
275
|
+
Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
|
|
276
|
+
Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
|
|
277
|
+
|
|
278
|
+
</details>
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
<p align="center">
|
|
283
|
+
<a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
|
|
284
|
+
<br>
|
|
285
|
+
AGPL-3.0 · ywatanabe@scitex.ai
|
|
286
|
+
</p>
|
|
287
|
+
|
|
288
|
+
<!-- EOF -->
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# OpenAlex Local
|
|
2
|
+
|
|
3
|
+
Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/openalex-local)
|
|
6
|
+
[](https://openalex-local.readthedocs.io/en/latest/)
|
|
7
|
+
[](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml)
|
|
8
|
+
[](https://www.python.org/downloads/)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
|
|
11
|
+
<details>
|
|
12
|
+
<summary><strong>Why OpenAlex Local?</strong></summary>
|
|
13
|
+
|
|
14
|
+
**Built for the LLM era** - features that matter for AI research assistants:
|
|
15
|
+
|
|
16
|
+
| Feature | Benefit |
|
|
17
|
+
|---------|---------|
|
|
18
|
+
| **284M Works** | More coverage than CrossRef |
|
|
19
|
+
| **Abstracts** | ~45-60% availability for semantic search |
|
|
20
|
+
| **Concepts & Topics** | Built-in classification |
|
|
21
|
+
| **Author Disambiguation** | Linked to institutions |
|
|
22
|
+
| **Open Access Info** | OA status and URLs |
|
|
23
|
+
|
|
24
|
+
Perfect for: RAG systems, research assistants, literature review automation.
|
|
25
|
+
|
|
26
|
+
</details>
|
|
27
|
+
|
|
28
|
+
<details>
|
|
29
|
+
<summary><strong>Installation</strong></summary>
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install openalex-local
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
From source:
|
|
36
|
+
```bash
|
|
37
|
+
git clone https://github.com/ywatanabe1989/openalex-local
|
|
38
|
+
cd openalex-local && make install
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Database setup (~300 GB, ~1-2 days to build):
|
|
42
|
+
```bash
|
|
43
|
+
# Check system status
|
|
44
|
+
make status
|
|
45
|
+
|
|
46
|
+
# 1. Download OpenAlex Works snapshot (~300GB)
|
|
47
|
+
make download-screen # runs in background
|
|
48
|
+
|
|
49
|
+
# 2. Build SQLite database
|
|
50
|
+
make build-db
|
|
51
|
+
|
|
52
|
+
# 3. Build FTS5 index
|
|
53
|
+
make build-fts
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
</details>
|
|
57
|
+
|
|
58
|
+
<details>
|
|
59
|
+
<summary><strong>Python API</strong></summary>
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from openalex_local import search, get, count
|
|
63
|
+
|
|
64
|
+
# Full-text search (title + abstract)
|
|
65
|
+
results = search("machine learning neural networks")
|
|
66
|
+
for work in results:
|
|
67
|
+
print(f"{work.title} ({work.year})")
|
|
68
|
+
print(f" Abstract: {work.abstract[:200]}...")
|
|
69
|
+
print(f" Concepts: {[c['name'] for c in work.concepts]}")
|
|
70
|
+
|
|
71
|
+
# Get by OpenAlex ID or DOI
|
|
72
|
+
work = get("W2741809807")
|
|
73
|
+
work = get("10.1038/nature12373")
|
|
74
|
+
|
|
75
|
+
# Count matches
|
|
76
|
+
n = count("CRISPR")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
</details>
|
|
80
|
+
|
|
81
|
+
<details>
|
|
82
|
+
<summary><strong>CLI</strong></summary>
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
openalex-local search "CRISPR genome editing" -n 5
|
|
86
|
+
openalex-local search-by-doi W2741809807
|
|
87
|
+
openalex-local search-by-doi 10.1038/nature12373
|
|
88
|
+
openalex-local status # Configuration and database stats
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
With abstracts (`-a` flag):
|
|
92
|
+
```
|
|
93
|
+
$ openalex-local search "neural network" -n 1 -a
|
|
94
|
+
|
|
95
|
+
Found 1,523,847 matches in 45.2ms
|
|
96
|
+
|
|
97
|
+
1. Deep learning for neural networks (2015)
|
|
98
|
+
OpenAlex ID: W2741809807
|
|
99
|
+
Abstract: This paper presents a comprehensive overview of deep learning
|
|
100
|
+
techniques for neural network architectures...
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
</details>
|
|
104
|
+
|
|
105
|
+
<details>
|
|
106
|
+
<summary><strong>HTTP API</strong></summary>
|
|
107
|
+
|
|
108
|
+
Start the FastAPI server:
|
|
109
|
+
```bash
|
|
110
|
+
openalex-local relay --host 0.0.0.0 --port 31292
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Endpoints:
|
|
114
|
+
```bash
|
|
115
|
+
# Search works (FTS5)
|
|
116
|
+
curl "http://localhost:31292/works?q=CRISPR&limit=10"
|
|
117
|
+
|
|
118
|
+
# Get by ID or DOI
|
|
119
|
+
curl "http://localhost:31292/works/W2741809807"
|
|
120
|
+
curl "http://localhost:31292/works/10.1038/nature12373"
|
|
121
|
+
|
|
122
|
+
# Batch lookup
|
|
123
|
+
curl -X POST "http://localhost:31292/works/batch" \
|
|
124
|
+
-H "Content-Type: application/json" \
|
|
125
|
+
-d '{"ids": ["W2741809807", "10.1038/nature12373"]}'
|
|
126
|
+
|
|
127
|
+
# Database info
|
|
128
|
+
curl "http://localhost:31292/info"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
HTTP mode (connect to running server):
|
|
132
|
+
```bash
|
|
133
|
+
# On local machine (if server is remote)
|
|
134
|
+
ssh -L 31292:127.0.0.1:31292 your-server
|
|
135
|
+
|
|
136
|
+
# Python client
|
|
137
|
+
from openalex_local import configure_http
|
|
138
|
+
configure_http("http://localhost:31292")
|
|
139
|
+
|
|
140
|
+
# Or via CLI
|
|
141
|
+
openalex-local --http search "CRISPR"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
</details>
|
|
145
|
+
|
|
146
|
+
<details>
|
|
147
|
+
<summary><strong>MCP Server</strong></summary>
|
|
148
|
+
|
|
149
|
+
Run as MCP (Model Context Protocol) server:
|
|
150
|
+
```bash
|
|
151
|
+
openalex-local mcp start
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Local MCP client configuration:
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"mcpServers": {
|
|
158
|
+
"openalex-local": {
|
|
159
|
+
"command": "openalex-local",
|
|
160
|
+
"args": ["mcp", "start"],
|
|
161
|
+
"env": {
|
|
162
|
+
"OPENALEX_LOCAL_DB": "/path/to/openalex.db"
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Remote MCP via HTTP:
|
|
170
|
+
```bash
|
|
171
|
+
# On server: start persistent MCP server
|
|
172
|
+
openalex-local mcp start -t http --host 0.0.0.0 --port 8083
|
|
173
|
+
```
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"mcpServers": {
|
|
177
|
+
"openalex-remote": {
|
|
178
|
+
"url": "http://your-server:8083/mcp"
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Diagnose setup:
|
|
185
|
+
```bash
|
|
186
|
+
openalex-local mcp doctor # Check dependencies and database
|
|
187
|
+
openalex-local mcp list-tools # Show available MCP tools
|
|
188
|
+
openalex-local mcp installation # Show client config examples
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Available tools:
|
|
192
|
+
- `search` - Full-text search across 284M+ papers
|
|
193
|
+
- `search_by_id` - Get paper by OpenAlex ID or DOI
|
|
194
|
+
- `enrich_ids` - Batch lookup with metadata
|
|
195
|
+
- `status` - Database statistics
|
|
196
|
+
|
|
197
|
+
</details>
|
|
198
|
+
|
|
199
|
+
<details>
|
|
200
|
+
<summary><strong>Related Projects</strong></summary>
|
|
201
|
+
|
|
202
|
+
**[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
|
|
203
|
+
|
|
204
|
+
| Feature | crossref-local | openalex-local |
|
|
205
|
+
|---------|----------------|----------------|
|
|
206
|
+
| Works | 167M | 284M |
|
|
207
|
+
| Abstracts | ~21% | ~45-60% |
|
|
208
|
+
| Update frequency | Real-time | Monthly |
|
|
209
|
+
| DOI authority | Yes (source) | Uses CrossRef |
|
|
210
|
+
| Citations | Raw references | Linked works |
|
|
211
|
+
| Concepts/Topics | No | Yes |
|
|
212
|
+
| Author IDs | No | Yes |
|
|
213
|
+
| Best for | DOI lookup, raw refs | Semantic search |
|
|
214
|
+
|
|
215
|
+
**When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
|
|
216
|
+
**When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
|
|
217
|
+
|
|
218
|
+
</details>
|
|
219
|
+
|
|
220
|
+
<details>
|
|
221
|
+
<summary><strong>Documentation</strong></summary>
|
|
222
|
+
|
|
223
|
+
Full documentation available at [openalex-local.readthedocs.io](https://openalex-local.readthedocs.io/en/latest/)
|
|
224
|
+
|
|
225
|
+
- [Installation Guide](https://openalex-local.readthedocs.io/en/latest/installation.html)
|
|
226
|
+
- [Quickstart](https://openalex-local.readthedocs.io/en/latest/quickstart.html)
|
|
227
|
+
- [CLI Reference](https://openalex-local.readthedocs.io/en/latest/cli_reference.html)
|
|
228
|
+
- [HTTP API Reference](https://openalex-local.readthedocs.io/en/latest/http_api.html)
|
|
229
|
+
- [Python API](https://openalex-local.readthedocs.io/en/latest/api/openalex_local.html)
|
|
230
|
+
|
|
231
|
+
</details>
|
|
232
|
+
|
|
233
|
+
<details>
|
|
234
|
+
<summary><strong>Data Source</strong></summary>
|
|
235
|
+
|
|
236
|
+
Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
|
|
237
|
+
Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
|
|
238
|
+
|
|
239
|
+
</details>
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
<p align="center">
|
|
244
|
+
<a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
|
|
245
|
+
<br>
|
|
246
|
+
AGPL-3.0 · ywatanabe@scitex.ai
|
|
247
|
+
</p>
|
|
248
|
+
|
|
249
|
+
<!-- EOF -->
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "openalex-local"
|
|
7
|
-
version = "0.1
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Local OpenAlex database with 284M+ works, abstracts, and semantic search"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "AGPL-3.0"}
|
|
@@ -32,10 +32,28 @@ dependencies = [
|
|
|
32
32
|
dev = [
|
|
33
33
|
"pytest>=7.0",
|
|
34
34
|
"pytest-cov>=4.0",
|
|
35
|
+
"pytest-asyncio>=0.21",
|
|
36
|
+
]
|
|
37
|
+
mcp = [
|
|
38
|
+
"fastmcp>=0.4",
|
|
39
|
+
]
|
|
40
|
+
server = [
|
|
41
|
+
"fastapi>=0.100",
|
|
42
|
+
"uvicorn>=0.23",
|
|
43
|
+
]
|
|
44
|
+
docs = [
|
|
45
|
+
"sphinx>=7.0",
|
|
46
|
+
"sphinx-rtd-theme>=2.0",
|
|
47
|
+
"myst-parser>=2.0",
|
|
48
|
+
"sphinx-copybutton>=0.5",
|
|
49
|
+
"sphinx-autodoc-typehints>=1.25",
|
|
50
|
+
]
|
|
51
|
+
all = [
|
|
52
|
+
"openalex-local[server,mcp,dev,docs]",
|
|
35
53
|
]
|
|
36
54
|
|
|
37
55
|
[project.scripts]
|
|
38
|
-
openalex-local = "openalex_local.cli:main"
|
|
56
|
+
openalex-local = "openalex_local._cli.cli:main"
|
|
39
57
|
|
|
40
58
|
[project.urls]
|
|
41
59
|
Homepage = "https://github.com/ywatanabe1989/openalex-local"
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAlex Local - Local OpenAlex database with 284M+ works and semantic search.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
>>> from openalex_local import search, get
|
|
6
|
+
>>> results = search("machine learning neural networks")
|
|
7
|
+
>>> work = get("W2741809807") # OpenAlex ID
|
|
8
|
+
>>> work = get("10.1038/nature12373") # or DOI
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__version__ = "0.3.1"
|
|
12
|
+
|
|
13
|
+
from ._core import (
|
|
14
|
+
SUPPORTED_FORMATS,
|
|
15
|
+
SearchResult,
|
|
16
|
+
Work,
|
|
17
|
+
configure,
|
|
18
|
+
count,
|
|
19
|
+
enrich,
|
|
20
|
+
enrich_ids,
|
|
21
|
+
exists,
|
|
22
|
+
get,
|
|
23
|
+
get_many,
|
|
24
|
+
get_mode,
|
|
25
|
+
info,
|
|
26
|
+
save,
|
|
27
|
+
search,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Jobs module (public functions only)
|
|
31
|
+
from . import jobs
|
|
32
|
+
|
|
33
|
+
# Async module
|
|
34
|
+
from . import aio
|
|
35
|
+
|
|
36
|
+
# Cache module
|
|
37
|
+
from . import cache
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
# Core functions
|
|
41
|
+
"search",
|
|
42
|
+
"count",
|
|
43
|
+
"get",
|
|
44
|
+
"get_many",
|
|
45
|
+
"exists",
|
|
46
|
+
"info",
|
|
47
|
+
# Enrich functions
|
|
48
|
+
"enrich",
|
|
49
|
+
"enrich_ids",
|
|
50
|
+
# Configuration
|
|
51
|
+
"configure",
|
|
52
|
+
"get_mode",
|
|
53
|
+
# Models
|
|
54
|
+
"Work",
|
|
55
|
+
"SearchResult",
|
|
56
|
+
# Export
|
|
57
|
+
"save",
|
|
58
|
+
"SUPPORTED_FORMATS",
|
|
59
|
+
# Jobs
|
|
60
|
+
"jobs",
|
|
61
|
+
# Async
|
|
62
|
+
"aio",
|
|
63
|
+
# Cache
|
|
64
|
+
"cache",
|
|
65
|
+
]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Cache module for openalex_local.
|
|
2
|
+
|
|
3
|
+
Provides local caching of search results and works for offline analysis.
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
>>> from openalex_local import cache
|
|
7
|
+
>>> # Create a cache from search
|
|
8
|
+
>>> info = cache.create("ml_papers", query="machine learning", limit=1000)
|
|
9
|
+
>>> print(f"Cached {info.count} papers")
|
|
10
|
+
>>>
|
|
11
|
+
>>> # Query the cache
|
|
12
|
+
>>> papers = cache.query("ml_papers", year_min=2020)
|
|
13
|
+
>>> # Get IDs for further processing
|
|
14
|
+
>>> ids = cache.query_ids("ml_papers")
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .models import CacheInfo
|
|
18
|
+
from .core import (
|
|
19
|
+
create,
|
|
20
|
+
append,
|
|
21
|
+
load,
|
|
22
|
+
query,
|
|
23
|
+
query_ids,
|
|
24
|
+
stats,
|
|
25
|
+
info,
|
|
26
|
+
exists,
|
|
27
|
+
list_caches,
|
|
28
|
+
delete,
|
|
29
|
+
)
|
|
30
|
+
from .export import export
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"CacheInfo",
|
|
34
|
+
"create",
|
|
35
|
+
"append",
|
|
36
|
+
"load",
|
|
37
|
+
"query",
|
|
38
|
+
"query_ids",
|
|
39
|
+
"stats",
|
|
40
|
+
"info",
|
|
41
|
+
"exists",
|
|
42
|
+
"list_caches",
|
|
43
|
+
"delete",
|
|
44
|
+
"export",
|
|
45
|
+
]
|