openalex-local 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. openalex_local/__init__.py +28 -7
  2. openalex_local/_cache/__init__.py +45 -0
  3. openalex_local/_cache/core.py +298 -0
  4. openalex_local/_cache/export.py +100 -0
  5. openalex_local/_cache/models.py +17 -0
  6. openalex_local/_cache/utils.py +85 -0
  7. openalex_local/_cli/__init__.py +9 -0
  8. openalex_local/_cli/cli.py +409 -0
  9. openalex_local/_cli/cli_cache.py +220 -0
  10. openalex_local/_cli/mcp.py +210 -0
  11. openalex_local/_cli/mcp_server.py +235 -0
  12. openalex_local/_core/__init__.py +42 -0
  13. openalex_local/{api.py → _core/api.py} +137 -19
  14. openalex_local/_core/config.py +120 -0
  15. openalex_local/{db.py → _core/db.py} +53 -0
  16. openalex_local/_core/export.py +252 -0
  17. openalex_local/{models.py → _core/models.py} +201 -0
  18. openalex_local/_remote/__init__.py +34 -0
  19. openalex_local/_remote/base.py +256 -0
  20. openalex_local/_server/__init__.py +117 -0
  21. openalex_local/_server/routes.py +175 -0
  22. openalex_local/aio.py +259 -0
  23. openalex_local/cache.py +31 -0
  24. openalex_local/cli.py +4 -205
  25. openalex_local/jobs.py +169 -0
  26. openalex_local/remote.py +8 -0
  27. openalex_local/server.py +8 -0
  28. openalex_local-0.3.1.dist-info/METADATA +288 -0
  29. openalex_local-0.3.1.dist-info/RECORD +34 -0
  30. openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
  31. openalex_local/config.py +0 -182
  32. openalex_local-0.3.0.dist-info/METADATA +0 -152
  33. openalex_local-0.3.0.dist-info/RECORD +0 -13
  34. openalex_local-0.3.0.dist-info/entry_points.txt +0 -2
  35. /openalex_local/{fts.py → _core/fts.py} +0 -0
  36. {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +0 -0
  37. {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: openalex-local
3
+ Version: 0.3.1
4
+ Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
5
+ Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
6
+ License: AGPL-3.0
7
+ Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
8
+ Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
9
+ Keywords: openalex,academic,research,abstracts,semantic-search
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: click>=8.0
21
+ Requires-Dist: awscli>=1.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
26
+ Provides-Extra: mcp
27
+ Requires-Dist: fastmcp>=0.4; extra == "mcp"
28
+ Provides-Extra: server
29
+ Requires-Dist: fastapi>=0.100; extra == "server"
30
+ Requires-Dist: uvicorn>=0.23; extra == "server"
31
+ Provides-Extra: docs
32
+ Requires-Dist: sphinx>=7.0; extra == "docs"
33
+ Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
34
+ Requires-Dist: myst-parser>=2.0; extra == "docs"
35
+ Requires-Dist: sphinx-copybutton>=0.5; extra == "docs"
36
+ Requires-Dist: sphinx-autodoc-typehints>=1.25; extra == "docs"
37
+ Provides-Extra: all
38
+ Requires-Dist: openalex-local[dev,docs,mcp,server]; extra == "all"
39
+
40
+ # OpenAlex Local
41
+
42
+ Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
43
+
44
+ [![PyPI version](https://badge.fury.io/py/openalex-local.svg)](https://badge.fury.io/py/openalex-local)
45
+ [![Documentation](https://readthedocs.org/projects/openalex-local/badge/?version=latest)](https://openalex-local.readthedocs.io/en/latest/)
46
+ [![Tests](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml/badge.svg)](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml)
47
+ [![Python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
48
+ [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
49
+
50
+ <details>
51
+ <summary><strong>Why OpenAlex Local?</strong></summary>
52
+
53
+ **Built for the LLM era** - features that matter for AI research assistants:
54
+
55
+ | Feature | Benefit |
56
+ |---------|---------|
57
+ | **284M Works** | More coverage than CrossRef |
58
+ | **Abstracts** | ~45-60% availability for semantic search |
59
+ | **Concepts & Topics** | Built-in classification |
60
+ | **Author Disambiguation** | Linked to institutions |
61
+ | **Open Access Info** | OA status and URLs |
62
+
63
+ Perfect for: RAG systems, research assistants, literature review automation.
64
+
65
+ </details>
66
+
67
+ <details>
68
+ <summary><strong>Installation</strong></summary>
69
+
70
+ ```bash
71
+ pip install openalex-local
72
+ ```
73
+
74
+ From source:
75
+ ```bash
76
+ git clone https://github.com/ywatanabe1989/openalex-local
77
+ cd openalex-local && make install
78
+ ```
79
+
80
+ Database setup (~300 GB, ~1-2 days to build):
81
+ ```bash
82
+ # Check system status
83
+ make status
84
+
85
+ # 1. Download OpenAlex Works snapshot (~300GB)
86
+ make download-screen # runs in background
87
+
88
+ # 2. Build SQLite database
89
+ make build-db
90
+
91
+ # 3. Build FTS5 index
92
+ make build-fts
93
+ ```
94
+
95
+ </details>
96
+
97
+ <details>
98
+ <summary><strong>Python API</strong></summary>
99
+
100
+ ```python
101
+ from openalex_local import search, get, count
102
+
103
+ # Full-text search (title + abstract)
104
+ results = search("machine learning neural networks")
105
+ for work in results:
106
+ print(f"{work.title} ({work.year})")
107
+ print(f" Abstract: {work.abstract[:200]}...")
108
+ print(f" Concepts: {[c['name'] for c in work.concepts]}")
109
+
110
+ # Get by OpenAlex ID or DOI
111
+ work = get("W2741809807")
112
+ work = get("10.1038/nature12373")
113
+
114
+ # Count matches
115
+ n = count("CRISPR")
116
+ ```
117
+
118
+ </details>
119
+
120
+ <details>
121
+ <summary><strong>CLI</strong></summary>
122
+
123
+ ```bash
124
+ openalex-local search "CRISPR genome editing" -n 5
125
+ openalex-local search-by-doi W2741809807
126
+ openalex-local search-by-doi 10.1038/nature12373
127
+ openalex-local status # Configuration and database stats
128
+ ```
129
+
130
+ With abstracts (`-a` flag):
131
+ ```
132
+ $ openalex-local search "neural network" -n 1 -a
133
+
134
+ Found 1,523,847 matches in 45.2ms
135
+
136
+ 1. Deep learning for neural networks (2015)
137
+ OpenAlex ID: W2741809807
138
+ Abstract: This paper presents a comprehensive overview of deep learning
139
+ techniques for neural network architectures...
140
+ ```
141
+
142
+ </details>
143
+
144
+ <details>
145
+ <summary><strong>HTTP API</strong></summary>
146
+
147
+ Start the FastAPI server:
148
+ ```bash
149
+ openalex-local relay --host 0.0.0.0 --port 31292
150
+ ```
151
+
152
+ Endpoints:
153
+ ```bash
154
+ # Search works (FTS5)
155
+ curl "http://localhost:31292/works?q=CRISPR&limit=10"
156
+
157
+ # Get by ID or DOI
158
+ curl "http://localhost:31292/works/W2741809807"
159
+ curl "http://localhost:31292/works/10.1038/nature12373"
160
+
161
+ # Batch lookup
162
+ curl -X POST "http://localhost:31292/works/batch" \
163
+ -H "Content-Type: application/json" \
164
+ -d '{"ids": ["W2741809807", "10.1038/nature12373"]}'
165
+
166
+ # Database info
167
+ curl "http://localhost:31292/info"
168
+ ```
169
+
170
+ HTTP mode (connect to running server):
171
+ ```bash
172
+ # On local machine (if server is remote)
173
+ ssh -L 31292:127.0.0.1:31292 your-server
174
+
175
+ # Python client
176
+ from openalex_local import configure_http
177
+ configure_http("http://localhost:31292")
178
+
179
+ # Or via CLI
180
+ openalex-local --http search "CRISPR"
181
+ ```
182
+
183
+ </details>
184
+
185
+ <details>
186
+ <summary><strong>MCP Server</strong></summary>
187
+
188
+ Run as MCP (Model Context Protocol) server:
189
+ ```bash
190
+ openalex-local mcp start
191
+ ```
192
+
193
+ Local MCP client configuration:
194
+ ```json
195
+ {
196
+ "mcpServers": {
197
+ "openalex-local": {
198
+ "command": "openalex-local",
199
+ "args": ["mcp", "start"],
200
+ "env": {
201
+ "OPENALEX_LOCAL_DB": "/path/to/openalex.db"
202
+ }
203
+ }
204
+ }
205
+ }
206
+ ```
207
+
208
+ Remote MCP via HTTP:
209
+ ```bash
210
+ # On server: start persistent MCP server
211
+ openalex-local mcp start -t http --host 0.0.0.0 --port 8083
212
+ ```
213
+ ```json
214
+ {
215
+ "mcpServers": {
216
+ "openalex-remote": {
217
+ "url": "http://your-server:8083/mcp"
218
+ }
219
+ }
220
+ }
221
+ ```
222
+
223
+ Diagnose setup:
224
+ ```bash
225
+ openalex-local mcp doctor # Check dependencies and database
226
+ openalex-local mcp list-tools # Show available MCP tools
227
+ openalex-local mcp installation # Show client config examples
228
+ ```
229
+
230
+ Available tools:
231
+ - `search` - Full-text search across 284M+ papers
232
+ - `search_by_id` - Get paper by OpenAlex ID or DOI
233
+ - `enrich_ids` - Batch lookup with metadata
234
+ - `status` - Database statistics
235
+
236
+ </details>
237
+
238
+ <details>
239
+ <summary><strong>Related Projects</strong></summary>
240
+
241
+ **[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
242
+
243
+ | Feature | crossref-local | openalex-local |
244
+ |---------|----------------|----------------|
245
+ | Works | 167M | 284M |
246
+ | Abstracts | ~21% | ~45-60% |
247
+ | Update frequency | Real-time | Monthly |
248
+ | DOI authority | Yes (source) | Uses CrossRef |
249
+ | Citations | Raw references | Linked works |
250
+ | Concepts/Topics | No | Yes |
251
+ | Author IDs | No | Yes |
252
+ | Best for | DOI lookup, raw refs | Semantic search |
253
+
254
+ **When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
255
+ **When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
256
+
257
+ </details>
258
+
259
+ <details>
260
+ <summary><strong>Documentation</strong></summary>
261
+
262
+ Full documentation available at [openalex-local.readthedocs.io](https://openalex-local.readthedocs.io/en/latest/)
263
+
264
+ - [Installation Guide](https://openalex-local.readthedocs.io/en/latest/installation.html)
265
+ - [Quickstart](https://openalex-local.readthedocs.io/en/latest/quickstart.html)
266
+ - [CLI Reference](https://openalex-local.readthedocs.io/en/latest/cli_reference.html)
267
+ - [HTTP API Reference](https://openalex-local.readthedocs.io/en/latest/http_api.html)
268
+ - [Python API](https://openalex-local.readthedocs.io/en/latest/api/openalex_local.html)
269
+
270
+ </details>
271
+
272
+ <details>
273
+ <summary><strong>Data Source</strong></summary>
274
+
275
+ Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
276
+ Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
277
+
278
+ </details>
279
+
280
+ ---
281
+
282
+ <p align="center">
283
+ <a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
284
+ <br>
285
+ AGPL-3.0 · ywatanabe@scitex.ai
286
+ </p>
287
+
288
+ <!-- EOF -->
@@ -0,0 +1,34 @@
1
+ openalex_local/__init__.py,sha256=USWssgVhl3PXKugOWe_VNJRD0j_PR-0vrXziWeaYX6k,1057
2
+ openalex_local/__main__.py,sha256=7zIPyOv659VptzHef3Zsw3k6m-WhGTN4MFq2-yVkdLE,111
3
+ openalex_local/aio.py,sha256=42qi3qOKgaK-e37rVj9afGvzLhzDhHAuBVOqtmXEh6M,6468
4
+ openalex_local/cache.py,sha256=-FdYvzd9XSM9qbuE2xHu8p-BUjF6pItv1qX7Jl0auck,415
5
+ openalex_local/cli.py,sha256=2pWJK_vnO1IIwUrMB3K8KW94ZWfkEF7TLSxMsaR-LrI,137
6
+ openalex_local/jobs.py,sha256=8yoG1um3g94wddPcnhx5w3XDPegnG7qpRshDtCcR2gI,4892
7
+ openalex_local/remote.py,sha256=PmvUq87mC76sM9BL9RczOaXtvwHoqMc_dN5PJCYT18M,239
8
+ openalex_local/server.py,sha256=SKoQ-cOoZjdXm24Sv1CFu3F8UclbD6QstfQb-7l2xtA,215
9
+ openalex_local/_cache/__init__.py,sha256=z56OFC31_zmngyD7k7N8Tt0vFqjIgYN7-crhmPSeG98,891
10
+ openalex_local/_cache/core.py,sha256=fi8lSVhnmhSSLJ4rEJIKMZWALTat4NlXuyZxu2awBvw,8277
11
+ openalex_local/_cache/export.py,sha256=uHpymppj8n-zz2KrveHH0XO9Mo1wR81eQ2B8CWsM6nw,2713
12
+ openalex_local/_cache/models.py,sha256=llxMa2gB8cVsmhz2Lt8YlpFG2yTtIbv6OO1O8lw-Aso,319
13
+ openalex_local/_cache/utils.py,sha256=xPQpUudWU1y-KuzKnNQN5Rt0R-fnEqcno7_EFrpgINY,2038
14
+ openalex_local/_cli/__init__.py,sha256=NS07Eo93dRAuO5vxGwGwS0ctvZxMMkF_py09bzIk3Hk,175
15
+ openalex_local/_cli/cli.py,sha256=S3RK3BzFBFWdwFPoiiImgZwMFhkRcNjK74RVP_Vp-vM,12874
16
+ openalex_local/_cli/cli_cache.py,sha256=cy1LHBtJ7S7ecLgbsCFisH9XnQ6gDm0o1vT2QAUrBOU,7147
17
+ openalex_local/_cli/mcp.py,sha256=IZ5r3rs601eTACkFhxojzOxJh9QJHLSxTjzUAilnlDs,6306
18
+ openalex_local/_cli/mcp_server.py,sha256=5qMWxb1Yeqf4GEH8JIPKRrM4lb-tOJlKHPbMjJxWGlY,6829
19
+ openalex_local/_core/__init__.py,sha256=aj7jUJ8Kv9cWoLC120JGF6h_kmgf2-5wwHkkTsgqwGU,615
20
+ openalex_local/_core/api.py,sha256=_C6hWhmvRjqpdn_LEzLhm_krbkE9YIGY3C-tep0yN20,9410
21
+ openalex_local/_core/config.py,sha256=VEoFIVCuFLO2zPRpJ0aeG5Mu8nqtHN2eFOQeGxFdQ7Y,3372
22
+ openalex_local/_core/db.py,sha256=wyCNRbNxNujfvdJwvnNjrzhJGC5aS2ZhoKaSuuheehs,5957
23
+ openalex_local/_core/export.py,sha256=NBt4dldvqPus7Ns88Qi9yPd4KgMX5FdggsIIZMfBWM4,6517
24
+ openalex_local/_core/fts.py,sha256=52TrRqabNj1zLR8gDtb9mOgIXioEBaTRJN7tz5mAcpE,3959
25
+ openalex_local/_core/models.py,sha256=m4ApslrAuTsCz8j_0Zfc6CXVWweBU8VCeBiOrtvH3Ng,13575
26
+ openalex_local/_remote/__init__.py,sha256=3NmHe2uY_VDR-MxP2XAZM8EW9PYQ57_s77hZFVGILms,792
27
+ openalex_local/_remote/base.py,sha256=qzyD-D_JPX9g5O9bJt01JYf2fzEC-7A7uW5mUA_qRIo,8404
28
+ openalex_local/_server/__init__.py,sha256=8mZawWpAuGFwNJaXekrtaFgrb-1wcXaHiIzB0pVmFug,2785
29
+ openalex_local/_server/routes.py,sha256=3qTcjiP2huLVrB6dHjCTrKossRGNFacHCJX3Ai6u4K4,4444
30
+ openalex_local-0.3.1.dist-info/METADATA,sha256=T0CwMb4_pEW3Ol8zeiynMNGMaqHCDFcDx7m2PtTY4fA,8294
31
+ openalex_local-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
32
+ openalex_local-0.3.1.dist-info/entry_points.txt,sha256=8G5Q3Nwg3vlKqJiHO0BoAgbSSi3R4lNBMqwANbs7Uz4,64
33
+ openalex_local-0.3.1.dist-info/top_level.txt,sha256=arEhuDR1f42p7soJ5JkJFAnAiCPSGKvuHIchaKTmqKg,15
34
+ openalex_local-0.3.1.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ openalex-local = openalex_local._cli.cli:main
openalex_local/config.py DELETED
@@ -1,182 +0,0 @@
1
- """Configuration for openalex_local."""
2
-
3
- import os
4
- from pathlib import Path
5
- from typing import Optional
6
-
7
- # Default database locations (checked in order)
8
- DEFAULT_DB_PATHS = [
9
- Path("/home/ywatanabe/proj/openalex-local/data/openalex.db"),
10
- Path("/home/ywatanabe/proj/openalex_local/data/openalex.db"),
11
- Path("/mnt/nas_ug/openalex_local/data/openalex.db"),
12
- Path.home() / ".openalex_local" / "openalex.db",
13
- Path.cwd() / "data" / "openalex.db",
14
- ]
15
-
16
-
17
- def get_db_path() -> Path:
18
- """
19
- Get database path from environment or auto-detect.
20
-
21
- Priority:
22
- 1. OPENALEX_LOCAL_DB environment variable
23
- 2. First existing path from DEFAULT_DB_PATHS
24
-
25
- Returns:
26
- Path to the database file
27
-
28
- Raises:
29
- FileNotFoundError: If no database found
30
- """
31
- # Check environment variable first
32
- env_path = os.environ.get("OPENALEX_LOCAL_DB")
33
- if env_path:
34
- path = Path(env_path)
35
- if path.exists():
36
- return path
37
- raise FileNotFoundError(f"OPENALEX_LOCAL_DB path not found: {env_path}")
38
-
39
- # Auto-detect from default locations
40
- for path in DEFAULT_DB_PATHS:
41
- if path.exists():
42
- return path
43
-
44
- raise FileNotFoundError(
45
- "OpenAlex database not found. Set OPENALEX_LOCAL_DB environment variable "
46
- f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
47
- )
48
-
49
-
50
- # Default port (SciTeX port scheme: 31292 for openalex)
51
- DEFAULT_PORT = 31292
52
- DEFAULT_HOST = "0.0.0.0"
53
-
54
-
55
- class Config:
56
- """Configuration container."""
57
-
58
- _db_path: Optional[Path] = None
59
- _api_url: Optional[str] = None
60
- _mode: Optional[str] = None # "db" or "http"
61
- _port: Optional[int] = None
62
- _host: Optional[str] = None
63
-
64
- @classmethod
65
- def get_db_path(cls) -> Path:
66
- """Get or auto-detect database path."""
67
- if cls._db_path is None:
68
- cls._db_path = get_db_path()
69
- return cls._db_path
70
-
71
- @classmethod
72
- def set_db_path(cls, path: str | Path) -> None:
73
- """Set database path explicitly."""
74
- path = Path(path)
75
- if not path.exists():
76
- raise FileNotFoundError(f"Database not found: {path}")
77
- cls._db_path = path
78
- cls._mode = "db"
79
-
80
- @classmethod
81
- def get_api_url(cls) -> str:
82
- """Get API URL for HTTP mode."""
83
- if cls._api_url:
84
- return cls._api_url
85
-
86
- # Check environment variables (scitex priority)
87
- for var in [
88
- "SCITEX_SCHOLAR_OPENALEX_API_URL",
89
- "OPENALEX_LOCAL_API_URL",
90
- ]:
91
- url = os.environ.get(var)
92
- if url:
93
- return url
94
-
95
- return "http://localhost:31292"
96
-
97
- @classmethod
98
- def set_api_url(cls, url: str) -> None:
99
- """Set API URL explicitly."""
100
- cls._api_url = url
101
- cls._mode = "http"
102
-
103
- @classmethod
104
- def get_mode(cls) -> str:
105
- """
106
- Get current mode.
107
-
108
- Priority:
109
- 1. Explicitly set mode
110
- 2. OPENALEX_LOCAL_MODE environment variable
111
- 3. Auto-detect based on available config
112
-
113
- Returns:
114
- "db" or "http"
115
- """
116
- if cls._mode:
117
- return cls._mode
118
-
119
- # Check environment variable
120
- env_mode = os.environ.get("OPENALEX_LOCAL_MODE", "").lower()
121
- if env_mode in ("db", "http"):
122
- return env_mode
123
-
124
- # Check if API URL is set
125
- if os.environ.get("OPENALEX_LOCAL_API_URL"):
126
- return "http"
127
-
128
- # Default to db mode (will raise FileNotFoundError if no database)
129
- return "db"
130
-
131
- @classmethod
132
- def get_port(cls) -> int:
133
- """Get server port."""
134
- if cls._port:
135
- return cls._port
136
-
137
- # Check environment variables (scitex priority)
138
- for var in [
139
- "SCITEX_SCHOLAR_OPENALEX_PORT",
140
- "OPENALEX_LOCAL_PORT",
141
- ]:
142
- port = os.environ.get(var)
143
- if port:
144
- return int(port)
145
-
146
- return DEFAULT_PORT
147
-
148
- @classmethod
149
- def set_port(cls, port: int) -> None:
150
- """Set server port explicitly."""
151
- cls._port = port
152
-
153
- @classmethod
154
- def get_host(cls) -> str:
155
- """Get server host."""
156
- if cls._host:
157
- return cls._host
158
-
159
- # Check environment variables (scitex priority)
160
- for var in [
161
- "SCITEX_SCHOLAR_OPENALEX_HOST",
162
- "OPENALEX_LOCAL_HOST",
163
- ]:
164
- host = os.environ.get(var)
165
- if host:
166
- return host
167
-
168
- return DEFAULT_HOST
169
-
170
- @classmethod
171
- def set_host(cls, host: str) -> None:
172
- """Set server host explicitly."""
173
- cls._host = host
174
-
175
- @classmethod
176
- def reset(cls) -> None:
177
- """Reset configuration (for testing)."""
178
- cls._db_path = None
179
- cls._api_url = None
180
- cls._mode = None
181
- cls._port = None
182
- cls._host = None
@@ -1,152 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: openalex-local
3
- Version: 0.3.0
4
- Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
5
- Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
6
- License: AGPL-3.0
7
- Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
8
- Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
9
- Keywords: openalex,academic,research,abstracts,semantic-search
10
- Classifier: Development Status :: 3 - Alpha
11
- Classifier: Intended Audience :: Science/Research
12
- Classifier: License :: OSI Approved :: GNU Affero General Public License v3
13
- Classifier: Programming Language :: Python :: 3
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Topic :: Scientific/Engineering
18
- Requires-Python: >=3.10
19
- Description-Content-Type: text/markdown
20
- Requires-Dist: click>=8.0
21
- Requires-Dist: awscli>=1.0
22
- Provides-Extra: dev
23
- Requires-Dist: pytest>=7.0; extra == "dev"
24
- Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
-
26
- # OpenAlex Local
27
-
28
- Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
29
-
30
- [![Python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
31
- [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
32
-
33
- <details>
34
- <summary><strong>Why OpenAlex Local?</strong></summary>
35
-
36
- **Built for the LLM era** - features that matter for AI research assistants:
37
-
38
- | Feature | Benefit |
39
- |---------|---------|
40
- | 📚 **284M Works** | More coverage than CrossRef |
41
- | 📝 **Abstracts** | ~45-60% availability for semantic search |
42
- | 🏷️ **Concepts & Topics** | Built-in classification |
43
- | 👤 **Author Disambiguation** | Linked to institutions |
44
- | 🔓 **Open Access Info** | OA status and URLs |
45
-
46
- Perfect for: RAG systems, research assistants, literature review automation.
47
-
48
- </details>
49
-
50
- <details>
51
- <summary><strong>Installation</strong></summary>
52
-
53
- ```bash
54
- pip install openalex-local
55
- ```
56
-
57
- From source:
58
- ```bash
59
- git clone https://github.com/ywatanabe1989/openalex-local
60
- cd openalex-local && make install
61
- ```
62
-
63
- Database setup (~300 GB, ~1-2 days to build):
64
- ```bash
65
- # Check system status
66
- make status
67
-
68
- # 1. Download OpenAlex Works snapshot (~300GB)
69
- make download-screen # runs in background
70
-
71
- # 2. Build SQLite database
72
- make build-db
73
-
74
- # 3. Build FTS5 index
75
- make build-fts
76
- ```
77
-
78
- </details>
79
-
80
- <details>
81
- <summary><strong>Python API</strong></summary>
82
-
83
- ```python
84
- from openalex_local import search, get, count
85
-
86
- # Full-text search (title + abstract)
87
- results = search("machine learning neural networks")
88
- for work in results:
89
- print(f"{work.title} ({work.year})")
90
- print(f" Abstract: {work.abstract[:200]}...")
91
- print(f" Concepts: {[c['name'] for c in work.concepts]}")
92
-
93
- # Get by OpenAlex ID or DOI
94
- work = get("W2741809807")
95
- work = get("10.1038/nature12373")
96
-
97
- # Count matches
98
- n = count("CRISPR")
99
- ```
100
-
101
- </details>
102
-
103
- <details>
104
- <summary><strong>CLI</strong></summary>
105
-
106
- ```bash
107
- openalex-local search "CRISPR genome editing" -n 5
108
- openalex-local get W2741809807
109
- openalex-local get 10.1038/nature12373
110
- openalex-local count "machine learning"
111
- ```
112
-
113
- </details>
114
-
115
- <details>
116
- <summary><strong>Related Projects</strong></summary>
117
-
118
- **[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
119
-
120
- | Feature | crossref-local | openalex-local |
121
- |---------|----------------|----------------|
122
- | Works | 167M | 284M |
123
- | Abstracts | ~21% | ~45-60% |
124
- | Update frequency | Real-time | Monthly |
125
- | DOI authority | ✓ (source) | Uses CrossRef |
126
- | Citations | Raw references | Linked works |
127
- | Concepts/Topics | ❌ | ✓ |
128
- | Author IDs | ❌ | ✓ |
129
- | Best for | DOI lookup, raw refs | Semantic search |
130
-
131
- **When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
132
- **When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
133
-
134
- </details>
135
-
136
- <details>
137
- <summary><strong>Data Source</strong></summary>
138
-
139
- Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
140
- Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
141
-
142
- </details>
143
-
144
- ---
145
-
146
- <p align="center">
147
- <a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
148
- <br>
149
- AGPL-3.0 · ywatanabe@scitex.ai
150
- </p>
151
-
152
- <!-- EOF -->
@@ -1,13 +0,0 @@
1
- openalex_local/__init__.py,sha256=UKl7hQFZMwK1oPV3zz7V2Pw7M4ufPouUCNwmODdFZlw,759
2
- openalex_local/__main__.py,sha256=7zIPyOv659VptzHef3Zsw3k6m-WhGTN4MFq2-yVkdLE,111
3
- openalex_local/api.py,sha256=LRIU0JZ41c33YAK96r_MOHNP5tdweu1_Fd-ZRdZ8IT0,6036
4
- openalex_local/cli.py,sha256=LLlgdAdt6qrF1LVZf5jtimsDiTmvLQ-wzw0-hCnLbr0,6464
5
- openalex_local/config.py,sha256=pwvXj-CncHCWdQr4ZhtH4ItuDY7RVKWikspQYyYfcbE,4848
6
- openalex_local/db.py,sha256=eNPZ4Ejqn2w2m5Gk8eCApQHT_cr2X5wt4KiOeVeU7wU,4355
7
- openalex_local/fts.py,sha256=52TrRqabNj1zLR8gDtb9mOgIXioEBaTRJN7tz5mAcpE,3959
8
- openalex_local/models.py,sha256=yLjQsYgDcjvjqwt_amvIAXNTpGgcy8nqEkSS70Q8_cY,7120
9
- openalex_local-0.3.0.dist-info/METADATA,sha256=sL5zP_JOo9DRNcvgrX1myWczeCGnrtWhAJ78VLpuAWc,4384
10
- openalex_local-0.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
- openalex_local-0.3.0.dist-info/entry_points.txt,sha256=OYO9RohYQ52zoeb-jTjhMYR07F0PioGQXPizMnBv_Es,59
12
- openalex_local-0.3.0.dist-info/top_level.txt,sha256=arEhuDR1f42p7soJ5JkJFAnAiCPSGKvuHIchaKTmqKg,15
13
- openalex_local-0.3.0.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- openalex-local = openalex_local.cli:main
File without changes