openalex-local 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. openalex_local/__init__.py +54 -3
  2. openalex_local/__main__.py +6 -0
  3. openalex_local/_cache/__init__.py +45 -0
  4. openalex_local/_cache/core.py +298 -0
  5. openalex_local/_cache/export.py +100 -0
  6. openalex_local/_cache/models.py +17 -0
  7. openalex_local/_cache/utils.py +85 -0
  8. openalex_local/_cli/__init__.py +9 -0
  9. openalex_local/_cli/cli.py +409 -0
  10. openalex_local/_cli/cli_cache.py +220 -0
  11. openalex_local/_cli/mcp.py +210 -0
  12. openalex_local/_cli/mcp_server.py +235 -0
  13. openalex_local/_core/__init__.py +42 -0
  14. openalex_local/_core/api.py +376 -0
  15. openalex_local/_core/config.py +120 -0
  16. openalex_local/_core/db.py +214 -0
  17. openalex_local/_core/export.py +252 -0
  18. openalex_local/_core/fts.py +165 -0
  19. openalex_local/_core/models.py +432 -0
  20. openalex_local/_remote/__init__.py +34 -0
  21. openalex_local/_remote/base.py +256 -0
  22. openalex_local/_server/__init__.py +117 -0
  23. openalex_local/_server/routes.py +175 -0
  24. openalex_local/aio.py +259 -0
  25. openalex_local/cache.py +31 -0
  26. openalex_local/cli.py +8 -0
  27. openalex_local/jobs.py +169 -0
  28. openalex_local/remote.py +8 -0
  29. openalex_local/server.py +8 -0
  30. openalex_local-0.3.1.dist-info/METADATA +288 -0
  31. openalex_local-0.3.1.dist-info/RECORD +34 -0
  32. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +1 -1
  33. openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
  34. openalex_local/config.py +0 -73
  35. openalex_local/models.py +0 -187
  36. openalex_local-0.1.0.dist-info/METADATA +0 -152
  37. openalex_local-0.1.0.dist-info/RECORD +0 -8
  38. openalex_local-0.1.0.dist-info/entry_points.txt +0 -2
  39. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: openalex-local
3
+ Version: 0.3.1
4
+ Summary: Local OpenAlex database with 284M+ works, abstracts, and semantic search
5
+ Author-email: Yusuke Watanabe <ywatanabe@alumni.u-tokyo.ac.jp>
6
+ License: AGPL-3.0
7
+ Project-URL: Homepage, https://github.com/ywatanabe1989/openalex-local
8
+ Project-URL: Repository, https://github.com/ywatanabe1989/openalex-local
9
+ Keywords: openalex,academic,research,abstracts,semantic-search
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: click>=8.0
21
+ Requires-Dist: awscli>=1.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
26
+ Provides-Extra: mcp
27
+ Requires-Dist: fastmcp>=0.4; extra == "mcp"
28
+ Provides-Extra: server
29
+ Requires-Dist: fastapi>=0.100; extra == "server"
30
+ Requires-Dist: uvicorn>=0.23; extra == "server"
31
+ Provides-Extra: docs
32
+ Requires-Dist: sphinx>=7.0; extra == "docs"
33
+ Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
34
+ Requires-Dist: myst-parser>=2.0; extra == "docs"
35
+ Requires-Dist: sphinx-copybutton>=0.5; extra == "docs"
36
+ Requires-Dist: sphinx-autodoc-typehints>=1.25; extra == "docs"
37
+ Provides-Extra: all
38
+ Requires-Dist: openalex-local[dev,docs,mcp,server]; extra == "all"
39
+
40
+ # OpenAlex Local
41
+
42
+ Local OpenAlex database with 284M+ scholarly works, abstracts, and semantic search.
43
+
44
+ [![PyPI version](https://badge.fury.io/py/openalex-local.svg)](https://badge.fury.io/py/openalex-local)
45
+ [![Documentation](https://readthedocs.org/projects/openalex-local/badge/?version=latest)](https://openalex-local.readthedocs.io/en/latest/)
46
+ [![Tests](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml/badge.svg)](https://github.com/ywatanabe1989/openalex-local/actions/workflows/test.yml)
47
+ [![Python](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
48
+ [![License](https://img.shields.io/badge/license-AGPL--3.0-blue.svg)](LICENSE)
49
+
50
+ <details>
51
+ <summary><strong>Why OpenAlex Local?</strong></summary>
52
+
53
+ **Built for the LLM era** - features that matter for AI research assistants:
54
+
55
+ | Feature | Benefit |
56
+ |---------|---------|
57
+ | **284M Works** | More coverage than CrossRef |
58
+ | **Abstracts** | ~45-60% availability for semantic search |
59
+ | **Concepts & Topics** | Built-in classification |
60
+ | **Author Disambiguation** | Linked to institutions |
61
+ | **Open Access Info** | OA status and URLs |
62
+
63
+ Perfect for: RAG systems, research assistants, literature review automation.
64
+
65
+ </details>
66
+
67
+ <details>
68
+ <summary><strong>Installation</strong></summary>
69
+
70
+ ```bash
71
+ pip install openalex-local
72
+ ```
73
+
74
+ From source:
75
+ ```bash
76
+ git clone https://github.com/ywatanabe1989/openalex-local
77
+ cd openalex-local && make install
78
+ ```
79
+
80
+ Database setup (~300 GB, ~1-2 days to build):
81
+ ```bash
82
+ # Check system status
83
+ make status
84
+
85
+ # 1. Download OpenAlex Works snapshot (~300GB)
86
+ make download-screen # runs in background
87
+
88
+ # 2. Build SQLite database
89
+ make build-db
90
+
91
+ # 3. Build FTS5 index
92
+ make build-fts
93
+ ```
94
+
95
+ </details>
96
+
97
+ <details>
98
+ <summary><strong>Python API</strong></summary>
99
+
100
+ ```python
101
+ from openalex_local import search, get, count
102
+
103
+ # Full-text search (title + abstract)
104
+ results = search("machine learning neural networks")
105
+ for work in results:
106
+ print(f"{work.title} ({work.year})")
107
+ print(f" Abstract: {work.abstract[:200]}...")
108
+ print(f" Concepts: {[c['name'] for c in work.concepts]}")
109
+
110
+ # Get by OpenAlex ID or DOI
111
+ work = get("W2741809807")
112
+ work = get("10.1038/nature12373")
113
+
114
+ # Count matches
115
+ n = count("CRISPR")
116
+ ```
117
+
118
+ </details>
119
+
120
+ <details>
121
+ <summary><strong>CLI</strong></summary>
122
+
123
+ ```bash
124
+ openalex-local search "CRISPR genome editing" -n 5
125
+ openalex-local search-by-doi W2741809807
126
+ openalex-local search-by-doi 10.1038/nature12373
127
+ openalex-local status # Configuration and database stats
128
+ ```
129
+
130
+ With abstracts (`-a` flag):
131
+ ```
132
+ $ openalex-local search "neural network" -n 1 -a
133
+
134
+ Found 1,523,847 matches in 45.2ms
135
+
136
+ 1. Deep learning for neural networks (2015)
137
+ OpenAlex ID: W2741809807
138
+ Abstract: This paper presents a comprehensive overview of deep learning
139
+ techniques for neural network architectures...
140
+ ```
141
+
142
+ </details>
143
+
144
+ <details>
145
+ <summary><strong>HTTP API</strong></summary>
146
+
147
+ Start the FastAPI server:
148
+ ```bash
149
+ openalex-local relay --host 0.0.0.0 --port 31292
150
+ ```
151
+
152
+ Endpoints:
153
+ ```bash
154
+ # Search works (FTS5)
155
+ curl "http://localhost:31292/works?q=CRISPR&limit=10"
156
+
157
+ # Get by ID or DOI
158
+ curl "http://localhost:31292/works/W2741809807"
159
+ curl "http://localhost:31292/works/10.1038/nature12373"
160
+
161
+ # Batch lookup
162
+ curl -X POST "http://localhost:31292/works/batch" \
163
+ -H "Content-Type: application/json" \
164
+ -d '{"ids": ["W2741809807", "10.1038/nature12373"]}'
165
+
166
+ # Database info
167
+ curl "http://localhost:31292/info"
168
+ ```
169
+
170
+ HTTP mode (connect to running server):
171
+ ```bash
172
+ # On local machine (if server is remote)
173
+ ssh -L 31292:127.0.0.1:31292 your-server
174
+
175
+ # Python client
176
+ from openalex_local import configure_http
177
+ configure_http("http://localhost:31292")
178
+
179
+ # Or via CLI
180
+ openalex-local --http search "CRISPR"
181
+ ```
182
+
183
+ </details>
184
+
185
+ <details>
186
+ <summary><strong>MCP Server</strong></summary>
187
+
188
+ Run as MCP (Model Context Protocol) server:
189
+ ```bash
190
+ openalex-local mcp start
191
+ ```
192
+
193
+ Local MCP client configuration:
194
+ ```json
195
+ {
196
+ "mcpServers": {
197
+ "openalex-local": {
198
+ "command": "openalex-local",
199
+ "args": ["mcp", "start"],
200
+ "env": {
201
+ "OPENALEX_LOCAL_DB": "/path/to/openalex.db"
202
+ }
203
+ }
204
+ }
205
+ }
206
+ ```
207
+
208
+ Remote MCP via HTTP:
209
+ ```bash
210
+ # On server: start persistent MCP server
211
+ openalex-local mcp start -t http --host 0.0.0.0 --port 8083
212
+ ```
213
+ ```json
214
+ {
215
+ "mcpServers": {
216
+ "openalex-remote": {
217
+ "url": "http://your-server:8083/mcp"
218
+ }
219
+ }
220
+ }
221
+ ```
222
+
223
+ Diagnose setup:
224
+ ```bash
225
+ openalex-local mcp doctor # Check dependencies and database
226
+ openalex-local mcp list-tools # Show available MCP tools
227
+ openalex-local mcp installation # Show client config examples
228
+ ```
229
+
230
+ Available tools:
231
+ - `search` - Full-text search across 284M+ papers
232
+ - `search_by_id` - Get paper by OpenAlex ID or DOI
233
+ - `enrich_ids` - Batch lookup with metadata
234
+ - `status` - Database statistics
235
+
236
+ </details>
237
+
238
+ <details>
239
+ <summary><strong>Related Projects</strong></summary>
240
+
241
+ **[crossref-local](https://github.com/ywatanabe1989/crossref-local)** - Sister project with CrossRef data:
242
+
243
+ | Feature | crossref-local | openalex-local |
244
+ |---------|----------------|----------------|
245
+ | Works | 167M | 284M |
246
+ | Abstracts | ~21% | ~45-60% |
247
+ | Update frequency | Real-time | Monthly |
248
+ | DOI authority | Yes (source) | Uses CrossRef |
249
+ | Citations | Raw references | Linked works |
250
+ | Concepts/Topics | No | Yes |
251
+ | Author IDs | No | Yes |
252
+ | Best for | DOI lookup, raw refs | Semantic search |
253
+
254
+ **When to use CrossRef**: Real-time DOI updates, raw reference parsing, authoritative metadata.
255
+ **When to use OpenAlex**: Semantic search, citation analysis, topic discovery.
256
+
257
+ </details>
258
+
259
+ <details>
260
+ <summary><strong>Documentation</strong></summary>
261
+
262
+ Full documentation available at [openalex-local.readthedocs.io](https://openalex-local.readthedocs.io/en/latest/)
263
+
264
+ - [Installation Guide](https://openalex-local.readthedocs.io/en/latest/installation.html)
265
+ - [Quickstart](https://openalex-local.readthedocs.io/en/latest/quickstart.html)
266
+ - [CLI Reference](https://openalex-local.readthedocs.io/en/latest/cli_reference.html)
267
+ - [HTTP API Reference](https://openalex-local.readthedocs.io/en/latest/http_api.html)
268
+ - [Python API](https://openalex-local.readthedocs.io/en/latest/api/openalex_local.html)
269
+
270
+ </details>
271
+
272
+ <details>
273
+ <summary><strong>Data Source</strong></summary>
274
+
275
+ Data from [OpenAlex](https://openalex.org/), an open catalog of scholarly works.
276
+ Updated monthly from their [snapshot](https://docs.openalex.org/download-all-data/openalex-snapshot).
277
+
278
+ </details>
279
+
280
+ ---
281
+
282
+ <p align="center">
283
+ <a href="https://scitex.ai"><img src="docs/scitex-icon-navy-inverted.png" alt="SciTeX" width="40"/></a>
284
+ <br>
285
+ AGPL-3.0 · ywatanabe@scitex.ai
286
+ </p>
287
+
288
+ <!-- EOF -->
@@ -0,0 +1,34 @@
1
+ openalex_local/__init__.py,sha256=USWssgVhl3PXKugOWe_VNJRD0j_PR-0vrXziWeaYX6k,1057
2
+ openalex_local/__main__.py,sha256=7zIPyOv659VptzHef3Zsw3k6m-WhGTN4MFq2-yVkdLE,111
3
+ openalex_local/aio.py,sha256=42qi3qOKgaK-e37rVj9afGvzLhzDhHAuBVOqtmXEh6M,6468
4
+ openalex_local/cache.py,sha256=-FdYvzd9XSM9qbuE2xHu8p-BUjF6pItv1qX7Jl0auck,415
5
+ openalex_local/cli.py,sha256=2pWJK_vnO1IIwUrMB3K8KW94ZWfkEF7TLSxMsaR-LrI,137
6
+ openalex_local/jobs.py,sha256=8yoG1um3g94wddPcnhx5w3XDPegnG7qpRshDtCcR2gI,4892
7
+ openalex_local/remote.py,sha256=PmvUq87mC76sM9BL9RczOaXtvwHoqMc_dN5PJCYT18M,239
8
+ openalex_local/server.py,sha256=SKoQ-cOoZjdXm24Sv1CFu3F8UclbD6QstfQb-7l2xtA,215
9
+ openalex_local/_cache/__init__.py,sha256=z56OFC31_zmngyD7k7N8Tt0vFqjIgYN7-crhmPSeG98,891
10
+ openalex_local/_cache/core.py,sha256=fi8lSVhnmhSSLJ4rEJIKMZWALTat4NlXuyZxu2awBvw,8277
11
+ openalex_local/_cache/export.py,sha256=uHpymppj8n-zz2KrveHH0XO9Mo1wR81eQ2B8CWsM6nw,2713
12
+ openalex_local/_cache/models.py,sha256=llxMa2gB8cVsmhz2Lt8YlpFG2yTtIbv6OO1O8lw-Aso,319
13
+ openalex_local/_cache/utils.py,sha256=xPQpUudWU1y-KuzKnNQN5Rt0R-fnEqcno7_EFrpgINY,2038
14
+ openalex_local/_cli/__init__.py,sha256=NS07Eo93dRAuO5vxGwGwS0ctvZxMMkF_py09bzIk3Hk,175
15
+ openalex_local/_cli/cli.py,sha256=S3RK3BzFBFWdwFPoiiImgZwMFhkRcNjK74RVP_Vp-vM,12874
16
+ openalex_local/_cli/cli_cache.py,sha256=cy1LHBtJ7S7ecLgbsCFisH9XnQ6gDm0o1vT2QAUrBOU,7147
17
+ openalex_local/_cli/mcp.py,sha256=IZ5r3rs601eTACkFhxojzOxJh9QJHLSxTjzUAilnlDs,6306
18
+ openalex_local/_cli/mcp_server.py,sha256=5qMWxb1Yeqf4GEH8JIPKRrM4lb-tOJlKHPbMjJxWGlY,6829
19
+ openalex_local/_core/__init__.py,sha256=aj7jUJ8Kv9cWoLC120JGF6h_kmgf2-5wwHkkTsgqwGU,615
20
+ openalex_local/_core/api.py,sha256=_C6hWhmvRjqpdn_LEzLhm_krbkE9YIGY3C-tep0yN20,9410
21
+ openalex_local/_core/config.py,sha256=VEoFIVCuFLO2zPRpJ0aeG5Mu8nqtHN2eFOQeGxFdQ7Y,3372
22
+ openalex_local/_core/db.py,sha256=wyCNRbNxNujfvdJwvnNjrzhJGC5aS2ZhoKaSuuheehs,5957
23
+ openalex_local/_core/export.py,sha256=NBt4dldvqPus7Ns88Qi9yPd4KgMX5FdggsIIZMfBWM4,6517
24
+ openalex_local/_core/fts.py,sha256=52TrRqabNj1zLR8gDtb9mOgIXioEBaTRJN7tz5mAcpE,3959
25
+ openalex_local/_core/models.py,sha256=m4ApslrAuTsCz8j_0Zfc6CXVWweBU8VCeBiOrtvH3Ng,13575
26
+ openalex_local/_remote/__init__.py,sha256=3NmHe2uY_VDR-MxP2XAZM8EW9PYQ57_s77hZFVGILms,792
27
+ openalex_local/_remote/base.py,sha256=qzyD-D_JPX9g5O9bJt01JYf2fzEC-7A7uW5mUA_qRIo,8404
28
+ openalex_local/_server/__init__.py,sha256=8mZawWpAuGFwNJaXekrtaFgrb-1wcXaHiIzB0pVmFug,2785
29
+ openalex_local/_server/routes.py,sha256=3qTcjiP2huLVrB6dHjCTrKossRGNFacHCJX3Ai6u4K4,4444
30
+ openalex_local-0.3.1.dist-info/METADATA,sha256=T0CwMb4_pEW3Ol8zeiynMNGMaqHCDFcDx7m2PtTY4fA,8294
31
+ openalex_local-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
32
+ openalex_local-0.3.1.dist-info/entry_points.txt,sha256=8G5Q3Nwg3vlKqJiHO0BoAgbSSi3R4lNBMqwANbs7Uz4,64
33
+ openalex_local-0.3.1.dist-info/top_level.txt,sha256=arEhuDR1f42p7soJ5JkJFAnAiCPSGKvuHIchaKTmqKg,15
34
+ openalex_local-0.3.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ openalex-local = openalex_local._cli.cli:main
openalex_local/config.py DELETED
@@ -1,73 +0,0 @@
1
- """Configuration for openalex_local."""
2
-
3
- import os
4
- from pathlib import Path
5
- from typing import Optional
6
-
7
- # Default database locations (checked in order)
8
- DEFAULT_DB_PATHS = [
9
- Path("/home/ywatanabe/proj/openalex-local/data/openalex.db"),
10
- Path("/home/ywatanabe/proj/openalex_local/data/openalex.db"),
11
- Path("/mnt/nas_ug/openalex_local/data/openalex.db"),
12
- Path.home() / ".openalex_local" / "openalex.db",
13
- Path.cwd() / "data" / "openalex.db",
14
- ]
15
-
16
-
17
- def get_db_path() -> Path:
18
- """
19
- Get database path from environment or auto-detect.
20
-
21
- Priority:
22
- 1. OPENALEX_LOCAL_DB environment variable
23
- 2. First existing path from DEFAULT_DB_PATHS
24
-
25
- Returns:
26
- Path to the database file
27
-
28
- Raises:
29
- FileNotFoundError: If no database found
30
- """
31
- # Check environment variable first
32
- env_path = os.environ.get("OPENALEX_LOCAL_DB")
33
- if env_path:
34
- path = Path(env_path)
35
- if path.exists():
36
- return path
37
- raise FileNotFoundError(f"OPENALEX_LOCAL_DB path not found: {env_path}")
38
-
39
- # Auto-detect from default locations
40
- for path in DEFAULT_DB_PATHS:
41
- if path.exists():
42
- return path
43
-
44
- raise FileNotFoundError(
45
- "OpenAlex database not found. Set OPENALEX_LOCAL_DB environment variable "
46
- f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
47
- )
48
-
49
-
50
- class Config:
51
- """Configuration container."""
52
-
53
- _db_path: Optional[Path] = None
54
-
55
- @classmethod
56
- def get_db_path(cls) -> Path:
57
- """Get or auto-detect database path."""
58
- if cls._db_path is None:
59
- cls._db_path = get_db_path()
60
- return cls._db_path
61
-
62
- @classmethod
63
- def set_db_path(cls, path: str | Path) -> None:
64
- """Set database path explicitly."""
65
- path = Path(path)
66
- if not path.exists():
67
- raise FileNotFoundError(f"Database not found: {path}")
68
- cls._db_path = path
69
-
70
- @classmethod
71
- def reset(cls) -> None:
72
- """Reset configuration (for testing)."""
73
- cls._db_path = None
openalex_local/models.py DELETED
@@ -1,187 +0,0 @@
1
- """Data models for openalex_local."""
2
-
3
- from dataclasses import dataclass, field
4
- from typing import List, Optional, Dict, Any
5
-
6
-
7
- @dataclass
8
- class Work:
9
- """
10
- Represents a scholarly work from OpenAlex.
11
-
12
- Attributes:
13
- openalex_id: OpenAlex ID (e.g., W2741809807)
14
- doi: Digital Object Identifier
15
- title: Work title
16
- abstract: Abstract text (reconstructed from inverted index)
17
- authors: List of author names
18
- year: Publication year
19
- source: Journal/venue name
20
- issn: Journal ISSN
21
- volume: Volume number
22
- issue: Issue number
23
- pages: Page range
24
- publisher: Publisher name
25
- type: Work type (journal-article, book-chapter, etc.)
26
- concepts: List of OpenAlex concepts
27
- topics: List of OpenAlex topics
28
- cited_by_count: Number of citations
29
- referenced_works: List of referenced OpenAlex IDs
30
- is_oa: Is open access
31
- oa_url: Open access URL
32
- """
33
-
34
- openalex_id: str
35
- doi: Optional[str] = None
36
- title: Optional[str] = None
37
- abstract: Optional[str] = None
38
- authors: List[str] = field(default_factory=list)
39
- year: Optional[int] = None
40
- source: Optional[str] = None
41
- issn: Optional[str] = None
42
- volume: Optional[str] = None
43
- issue: Optional[str] = None
44
- pages: Optional[str] = None
45
- publisher: Optional[str] = None
46
- type: Optional[str] = None
47
- concepts: List[Dict[str, Any]] = field(default_factory=list)
48
- topics: List[Dict[str, Any]] = field(default_factory=list)
49
- cited_by_count: Optional[int] = None
50
- referenced_works: List[str] = field(default_factory=list)
51
- is_oa: bool = False
52
- oa_url: Optional[str] = None
53
-
54
- @classmethod
55
- def from_openalex(cls, data: dict) -> "Work":
56
- """
57
- Create Work from OpenAlex API/snapshot JSON.
58
-
59
- Args:
60
- data: OpenAlex work dictionary
61
-
62
- Returns:
63
- Work instance
64
- """
65
- # Extract OpenAlex ID
66
- openalex_id = data.get("id", "").replace("https://openalex.org/", "")
67
-
68
- # Extract DOI
69
- doi = data.get("doi", "").replace("https://doi.org/", "") if data.get("doi") else None
70
-
71
- # Extract authors
72
- authors = []
73
- for authorship in data.get("authorships", []):
74
- author = authorship.get("author", {})
75
- name = author.get("display_name")
76
- if name:
77
- authors.append(name)
78
-
79
- # Reconstruct abstract from inverted index
80
- abstract = None
81
- inv_index = data.get("abstract_inverted_index")
82
- if inv_index:
83
- words = sorted(
84
- [(pos, word) for word, positions in inv_index.items() for pos in positions]
85
- )
86
- abstract = " ".join(word for _, word in words)
87
-
88
- # Extract source info
89
- primary_location = data.get("primary_location") or {}
90
- source_info = primary_location.get("source") or {}
91
- source = source_info.get("display_name")
92
- issns = source_info.get("issn") or []
93
- issn = issns[0] if issns else None
94
-
95
- # Extract biblio
96
- biblio = data.get("biblio") or {}
97
-
98
- # Extract concepts (top 5)
99
- concepts = [
100
- {"name": c.get("display_name"), "score": c.get("score")}
101
- for c in (data.get("concepts") or [])[:5]
102
- ]
103
-
104
- # Extract topics (top 3)
105
- topics = [
106
- {"name": t.get("display_name"), "subfield": t.get("subfield", {}).get("display_name")}
107
- for t in (data.get("topics") or [])[:3]
108
- ]
109
-
110
- # Extract OA info
111
- oa_info = data.get("open_access") or {}
112
-
113
- return cls(
114
- openalex_id=openalex_id,
115
- doi=doi,
116
- title=data.get("title") or data.get("display_name"),
117
- abstract=abstract,
118
- authors=authors,
119
- year=data.get("publication_year"),
120
- source=source,
121
- issn=issn,
122
- volume=biblio.get("volume"),
123
- issue=biblio.get("issue"),
124
- pages=biblio.get("first_page"),
125
- publisher=source_info.get("host_organization_name"),
126
- type=data.get("type"),
127
- concepts=concepts,
128
- topics=topics,
129
- cited_by_count=data.get("cited_by_count"),
130
- referenced_works=[
131
- r.replace("https://openalex.org/", "")
132
- for r in (data.get("referenced_works") or [])
133
- ],
134
- is_oa=oa_info.get("is_oa", False),
135
- oa_url=oa_info.get("oa_url"),
136
- )
137
-
138
- def to_dict(self) -> dict:
139
- """Convert to dictionary."""
140
- return {
141
- "openalex_id": self.openalex_id,
142
- "doi": self.doi,
143
- "title": self.title,
144
- "abstract": self.abstract,
145
- "authors": self.authors,
146
- "year": self.year,
147
- "source": self.source,
148
- "issn": self.issn,
149
- "volume": self.volume,
150
- "issue": self.issue,
151
- "pages": self.pages,
152
- "publisher": self.publisher,
153
- "type": self.type,
154
- "concepts": self.concepts,
155
- "topics": self.topics,
156
- "cited_by_count": self.cited_by_count,
157
- "referenced_works": self.referenced_works,
158
- "is_oa": self.is_oa,
159
- "oa_url": self.oa_url,
160
- }
161
-
162
-
163
- @dataclass
164
- class SearchResult:
165
- """
166
- Container for search results with metadata.
167
-
168
- Attributes:
169
- works: List of Work objects
170
- total: Total number of matches
171
- query: Original search query
172
- elapsed_ms: Search time in milliseconds
173
- """
174
-
175
- works: List[Work]
176
- total: int
177
- query: str
178
- elapsed_ms: float
179
-
180
- def __len__(self) -> int:
181
- return len(self.works)
182
-
183
- def __iter__(self):
184
- return iter(self.works)
185
-
186
- def __getitem__(self, idx):
187
- return self.works[idx]