pyopenalex 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyopenalex-0.1.0/.gitignore +10 -0
- pyopenalex-0.1.0/LICENSE +21 -0
- pyopenalex-0.1.0/PKG-INFO +392 -0
- pyopenalex-0.1.0/README.md +369 -0
- pyopenalex-0.1.0/pyopenalex/__init__.py +33 -0
- pyopenalex-0.1.0/pyopenalex/_http.py +52 -0
- pyopenalex-0.1.0/pyopenalex/client.py +46 -0
- pyopenalex-0.1.0/pyopenalex/config.py +13 -0
- pyopenalex-0.1.0/pyopenalex/endpoints.py +102 -0
- pyopenalex-0.1.0/pyopenalex/exceptions.py +19 -0
- pyopenalex-0.1.0/pyopenalex/expressions.py +85 -0
- pyopenalex-0.1.0/pyopenalex/models/__init__.py +19 -0
- pyopenalex-0.1.0/pyopenalex/models/authors.py +35 -0
- pyopenalex-0.1.0/pyopenalex/models/autocomplete.py +13 -0
- pyopenalex-0.1.0/pyopenalex/models/base.py +93 -0
- pyopenalex-0.1.0/pyopenalex/models/funders.py +30 -0
- pyopenalex-0.1.0/pyopenalex/models/institutions.py +45 -0
- pyopenalex-0.1.0/pyopenalex/models/keywords.py +13 -0
- pyopenalex-0.1.0/pyopenalex/models/publishers.py +32 -0
- pyopenalex-0.1.0/pyopenalex/models/sources.py +53 -0
- pyopenalex-0.1.0/pyopenalex/models/topics.py +22 -0
- pyopenalex-0.1.0/pyopenalex/models/works.py +133 -0
- pyopenalex-0.1.0/pyopenalex/query.py +192 -0
- pyopenalex-0.1.0/pyproject.toml +39 -0
pyopenalex-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicolai B. Thomsen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyopenalex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Pydantic-powered Python client for the OpenAlex API
|
|
5
|
+
Project-URL: Homepage, https://github.com/nicolaibthomsen/pyopenalex
|
|
6
|
+
Project-URL: Repository, https://github.com/nicolaibthomsen/pyopenalex
|
|
7
|
+
Project-URL: Issues, https://github.com/nicolaibthomsen/pyopenalex/issues
|
|
8
|
+
Author: Nicolai B. Thomsen
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: academic,api,openalex,pydantic,research,scholarly
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.13
|
|
19
|
+
Requires-Dist: httpx>=0.27
|
|
20
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
21
|
+
Requires-Dist: pydantic>=2.0
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="https://raw.githubusercontent.com/nicolaibthomsen/pyopenalex/main/logo.png" alt="PyOpenAlex" width="320">
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
<p align="center">
|
|
29
|
+
A Pydantic-powered Python client for the <a href="https://openalex.org">OpenAlex API</a>.
|
|
30
|
+
</p>
|
|
31
|
+
|
|
32
|
+
OpenAlex is an open catalog of the global research system: 270M+ scholarly works, 90M+ authors, and 100K+ sources. PyOpenAlex gives you typed access to all of it with an API that follows the patterns of FastAPI and Pydantic.
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from pyopenalex import OpenAlex, gt
|
|
36
|
+
|
|
37
|
+
with OpenAlex() as client:
|
|
38
|
+
for work in client.works.filter(cited_by_count=gt(1000), publication_year=2024).limit(10):
|
|
39
|
+
print(f"{work.title} ({work.cited_by_count} citations)")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install pyopenalex
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Requires Python 3.13+.
|
|
49
|
+
|
|
50
|
+
## Quick Start
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from pyopenalex import OpenAlex
|
|
54
|
+
|
|
55
|
+
client = OpenAlex(api_key="your-key") # or set OPENALEX_API_KEY env var
|
|
56
|
+
|
|
57
|
+
# Get a single work by ID
|
|
58
|
+
work = client.works.get("W2741809807")
|
|
59
|
+
print(work.title)
|
|
60
|
+
print(work.doi)
|
|
61
|
+
print(work.abstract) # reconstructed from inverted index
|
|
62
|
+
|
|
63
|
+
# Search for authors
|
|
64
|
+
results = client.authors.search("Einstein").per_page(5).get()
|
|
65
|
+
for author in results.results:
|
|
66
|
+
print(f"{author.display_name}: {author.works_count} works")
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Entities
|
|
70
|
+
|
|
71
|
+
PyOpenAlex supports all core OpenAlex entity types:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
client.works # Scholarly documents (articles, books, datasets)
|
|
75
|
+
client.authors # Researcher profiles
|
|
76
|
+
client.sources # Journals, repositories, conferences
|
|
77
|
+
client.institutions # Universities, research organizations
|
|
78
|
+
client.topics # Subject classifications
|
|
79
|
+
client.keywords # Extracted keywords
|
|
80
|
+
client.publishers # Publishing organizations
|
|
81
|
+
client.funders # Funding agencies
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Every entity is a Pydantic model with fully typed fields:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
work = client.works.get("W2741809807")
|
|
88
|
+
|
|
89
|
+
work.title # str | None
|
|
90
|
+
work.publication_year # int | None
|
|
91
|
+
work.cited_by_count # int | None
|
|
92
|
+
work.open_access.is_oa # bool
|
|
93
|
+
work.open_access.oa_status # str (gold, green, hybrid, bronze, diamond, closed)
|
|
94
|
+
work.authorships[0].author.display_name # str | None
|
|
95
|
+
work.authorships[0].institutions # list[DehydratedInstitution]
|
|
96
|
+
work.primary_location.source # DehydratedSource | None
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Looking Up Entities
|
|
100
|
+
|
|
101
|
+
### By OpenAlex ID
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
work = client.works.get("W2741809807")
|
|
105
|
+
author = client.authors.get("A5023888391")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### By External ID
|
|
109
|
+
|
|
110
|
+
Works accept DOIs, authors accept ORCIDs, institutions accept ROR IDs:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
work = client.works.get("https://doi.org/10.7717/peerj.4375")
|
|
114
|
+
author = client.authors.get("https://orcid.org/0000-0001-6187-6610")
|
|
115
|
+
institution = client.institutions.get("https://ror.org/0161xgx34")
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Batch Lookup
|
|
119
|
+
|
|
120
|
+
Fetch up to 100 entities at once:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
works = client.works.get(["W2741809807", "W2100837269", "W1775749144"])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Random Entity
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
work = client.works.random()
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Filtering
|
|
133
|
+
|
|
134
|
+
Chain `.filter()` calls to narrow results. Multiple filters combine with AND:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
results = (
|
|
138
|
+
client.works
|
|
139
|
+
.filter(publication_year=2024, is_oa=True)
|
|
140
|
+
.sort("cited_by_count", desc=True)
|
|
141
|
+
.per_page(100)
|
|
142
|
+
.get()
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Filter Expressions
|
|
147
|
+
|
|
148
|
+
PyOpenAlex provides expression functions for building filters, similar to how FastAPI uses `Query()`, `Path()`, and `Body()`:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from pyopenalex import gt, lt, ne, or_, between
|
|
152
|
+
|
|
153
|
+
# Greater than / less than
|
|
154
|
+
client.works.filter(cited_by_count=gt(100))
|
|
155
|
+
client.works.filter(publication_year=lt(2020))
|
|
156
|
+
|
|
157
|
+
# Not equal
|
|
158
|
+
client.works.filter(type=ne("paratext"))
|
|
159
|
+
|
|
160
|
+
# OR (up to 100 values)
|
|
161
|
+
client.works.filter(doi=or_(
|
|
162
|
+
"https://doi.org/10.7717/peerj.4375",
|
|
163
|
+
"https://doi.org/10.1038/nature12373",
|
|
164
|
+
))
|
|
165
|
+
|
|
166
|
+
# Range
|
|
167
|
+
client.works.filter(publication_year=between(2020, 2024))
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Nested Filters
|
|
171
|
+
|
|
172
|
+
Use dicts for dot-notation filter paths. PyOpenAlex flattens them automatically:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# These are equivalent:
|
|
176
|
+
client.works.filter(authorships={"institutions": {"id": "I136199984"}})
|
|
177
|
+
client.works.filter_raw("authorships.institutions.id:I136199984")
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Raw Filters
|
|
181
|
+
|
|
182
|
+
For full control, pass the filter string directly:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
client.works.filter_raw("publication_year:2024,is_oa:true,cited_by_count:>100")
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Searching
|
|
189
|
+
|
|
190
|
+
### Full-Text Search
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
results = client.works.search("machine learning").get()
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Field-Specific Search
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
results = client.works.search_filter(title="neural networks").get()
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Search and filters can be combined:
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
results = (
|
|
206
|
+
client.works
|
|
207
|
+
.search("CRISPR")
|
|
208
|
+
.filter(publication_year=2024, is_oa=True)
|
|
209
|
+
.sort("cited_by_count", desc=True)
|
|
210
|
+
.get()
|
|
211
|
+
)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Sorting
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
# Ascending (default)
|
|
218
|
+
client.works.sort("publication_date")
|
|
219
|
+
|
|
220
|
+
# Descending
|
|
221
|
+
client.works.sort("cited_by_count", desc=True)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## Field Selection
|
|
225
|
+
|
|
226
|
+
Request only the fields you need to reduce response size:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
results = client.works.select("id", "title", "doi", "cited_by_count").get()
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Pagination
|
|
233
|
+
|
|
234
|
+
### Page-Based
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
page1 = client.works.filter(publication_year=2024).page(1).per_page(100).get()
|
|
238
|
+
page2 = client.works.filter(publication_year=2024).page(2).per_page(100).get()
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Cursor-Based (Automatic)
|
|
242
|
+
|
|
243
|
+
Iterate over any query and PyOpenAlex handles cursor pagination automatically:
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
for work in client.works.filter(publication_year=2024, is_oa=True):
|
|
247
|
+
print(work.title)
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Use `.limit()` to cap the total number of results:
|
|
251
|
+
|
|
252
|
+
```python
|
|
253
|
+
for work in client.works.filter(publication_year=2024).limit(500):
|
|
254
|
+
process(work)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Counting
|
|
258
|
+
|
|
259
|
+
Get the total number of matching results without fetching them:
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
count = client.works.filter(publication_year=2024, is_oa=True).count()
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Grouping
|
|
266
|
+
|
|
267
|
+
Aggregate results by a field:
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
response = client.works.filter(publication_year=2024).group_by("type").get()
|
|
271
|
+
for group in response.group_by:
|
|
272
|
+
print(f"{group.key_display_name}: {group.count}")
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Sampling
|
|
276
|
+
|
|
277
|
+
Get a random sample of results:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
results = client.works.sample(100, seed=42).get()
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Autocomplete
|
|
284
|
+
|
|
285
|
+
Fast typeahead search returning up to 10 results:
|
|
286
|
+
|
|
287
|
+
```python
|
|
288
|
+
results = client.institutions.autocomplete("harvard")
|
|
289
|
+
for r in results:
|
|
290
|
+
print(f"{r.display_name} ({r.works_count} works)")
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
## Query Reuse
|
|
294
|
+
|
|
295
|
+
The query builder is immutable. Each method returns a new instance, so you can safely branch from a base query:
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
base = client.works.filter(publication_year=2024, is_oa=True)
|
|
299
|
+
|
|
300
|
+
most_cited = base.sort("cited_by_count", desc=True).per_page(10).get()
|
|
301
|
+
recent = base.sort("publication_date", desc=True).per_page(10).get()
|
|
302
|
+
count = base.count()
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## Response Objects
|
|
306
|
+
|
|
307
|
+
List queries return a `ListResponse` with three parts:
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
response = client.works.search("CRISPR").get()
|
|
311
|
+
|
|
312
|
+
response.meta # Meta: count, page, per_page, cost_usd, ...
|
|
313
|
+
response.results # list[Work]: the entities
|
|
314
|
+
response.group_by # list[GroupByResult]: populated when using group_by
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Configuration
|
|
318
|
+
|
|
319
|
+
### API Key
|
|
320
|
+
|
|
321
|
+
Set your API key in any of these ways (in order of precedence):
|
|
322
|
+
|
|
323
|
+
```python
|
|
324
|
+
# 1. Constructor argument
|
|
325
|
+
client = OpenAlex(api_key="your-key")
|
|
326
|
+
|
|
327
|
+
# 2. Environment variable
|
|
328
|
+
# export OPENALEX_API_KEY=your-key
|
|
329
|
+
client = OpenAlex()
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
Get a free API key at [openalex.org/settings/api](https://openalex.org/settings/api).
|
|
333
|
+
|
|
334
|
+
### Other Settings
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
client = OpenAlex(
|
|
338
|
+
api_key="your-key",
|
|
339
|
+
base_url="https://api.openalex.org", # default
|
|
340
|
+
timeout=30.0, # request timeout in seconds
|
|
341
|
+
max_retries=3, # retries on 429/5xx errors
|
|
342
|
+
)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
All settings can be set via environment variables with the `OPENALEX_` prefix:
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
export OPENALEX_API_KEY=your-key
|
|
349
|
+
export OPENALEX_TIMEOUT=60
|
|
350
|
+
export OPENALEX_MAX_RETRIES=5
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
### Context Manager
|
|
354
|
+
|
|
355
|
+
The client can be used as a context manager to ensure the HTTP connection is closed:
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
with OpenAlex() as client:
|
|
359
|
+
work = client.works.get("W2741809807")
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
## Error Handling
|
|
363
|
+
|
|
364
|
+
PyOpenAlex raises typed exceptions:
|
|
365
|
+
|
|
366
|
+
```python
|
|
367
|
+
from pyopenalex.exceptions import NotFoundError, RateLimitError, APIError
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
work = client.works.get("W0000000000")
|
|
371
|
+
except NotFoundError:
|
|
372
|
+
print("Work not found")
|
|
373
|
+
except RateLimitError:
|
|
374
|
+
print("Daily rate limit exceeded")
|
|
375
|
+
except APIError as e:
|
|
376
|
+
print(f"HTTP {e.status_code}: {e}")
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
Retries with exponential backoff are automatic for 429 (rate limit) and 5xx (server error) responses.
|
|
380
|
+
|
|
381
|
+
## Abstract Reconstruction
|
|
382
|
+
|
|
383
|
+
OpenAlex stores abstracts as inverted indexes. PyOpenAlex reconstructs them for you:
|
|
384
|
+
|
|
385
|
+
```python
|
|
386
|
+
work = client.works.get("W2741809807")
|
|
387
|
+
print(work.abstract) # full abstract text, or None if unavailable
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
## License
|
|
391
|
+
|
|
392
|
+
MIT
|