pyalex 0.19__tar.gz → 0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyalex-0.19 → pyalex-0.20}/.gitignore +2 -0
- {pyalex-0.19 → pyalex-0.20}/.pre-commit-config.yaml +1 -1
- {pyalex-0.19 → pyalex-0.20}/PKG-INFO +105 -28
- {pyalex-0.19 → pyalex-0.20}/README.md +104 -27
- {pyalex-0.19 → pyalex-0.20}/pyalex/__init__.py +8 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex/_version.py +3 -3
- {pyalex-0.19 → pyalex-0.20}/pyalex/api.py +125 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex.egg-info/PKG-INFO +105 -28
- {pyalex-0.19 → pyalex-0.20}/tests/test_paging.py +44 -0
- {pyalex-0.19 → pyalex-0.20}/tests/test_pyalex.py +136 -0
- {pyalex-0.19 → pyalex-0.20}/.github/workflows/python-lint.yml +0 -0
- {pyalex-0.19 → pyalex-0.20}/.github/workflows/python-package.yml +0 -0
- {pyalex-0.19 → pyalex-0.20}/.github/workflows/python-publish.yml +0 -0
- {pyalex-0.19 → pyalex-0.20}/CITATION.cff +0 -0
- {pyalex-0.19 → pyalex-0.20}/LICENSE +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex.egg-info/SOURCES.txt +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex.egg-info/dependency_links.txt +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex.egg-info/requires.txt +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex.egg-info/top_level.txt +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex_repocard.png +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyalex_repocard.svg +0 -0
- {pyalex-0.19 → pyalex-0.20}/pyproject.toml +0 -0
- {pyalex-0.19 → pyalex-0.20}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -39,7 +39,18 @@ institutions, and more. OpenAlex offers a robust, open, and free [REST API](http
|
|
|
39
39
|
PyAlex is a lightweight and thin Python interface to this API. PyAlex tries to
|
|
40
40
|
stay as close as possible to the design of the original service.
|
|
41
41
|
|
|
42
|
-
The following
|
|
42
|
+
The following entities of OpenAlex are currently supported by PyAlex:
|
|
43
|
+
|
|
44
|
+
- [x] Work
|
|
45
|
+
- [x] Author
|
|
46
|
+
- [x] Source
|
|
47
|
+
- [x] Institution
|
|
48
|
+
- [x] Concept
|
|
49
|
+
- [x] Topic
|
|
50
|
+
- [x] Publisher
|
|
51
|
+
- [x] Funder
|
|
52
|
+
|
|
53
|
+
Including the following functionality:
|
|
43
54
|
|
|
44
55
|
- [x] Get single entities
|
|
45
56
|
- [x] Filter entities
|
|
@@ -50,7 +61,7 @@ The following features of OpenAlex are currently supported by PyAlex:
|
|
|
50
61
|
- [x] Sample
|
|
51
62
|
- [x] Pagination
|
|
52
63
|
- [x] Autocomplete endpoint
|
|
53
|
-
- [x] N-grams
|
|
64
|
+
- [x] N-grams [Deprecated by OpenAlex]
|
|
54
65
|
- [x] Authentication
|
|
55
66
|
|
|
56
67
|
We aim to cover the entire API, and we are looking for help. We are welcoming Pull Requests.
|
|
@@ -59,6 +70,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
|
|
|
59
70
|
|
|
60
71
|
- **Pipe operations** - PyAlex can handle multiple operations in a sequence. This allows the developer to write understandable queries. For examples, see [code snippets](#code-snippets).
|
|
61
72
|
- **Plaintext abstracts** - OpenAlex [doesn't include plaintext abstracts](https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index) due to legal constraints. PyAlex can convert the inverted abstracts into [plaintext abstracts on the fly](#get-abstract).
|
|
73
|
+
- **Fetch content in PDF and TEI format** - Retrieve full-text content from OpenAlex in PDF or TEI XML formats. See [fetching content](#fetch-content-in-pdf-and-tei-format).
|
|
62
74
|
- **Permissive license** - OpenAlex data is CC0 licensed :raised_hands:. PyAlex is published under the MIT license.
|
|
63
75
|
|
|
64
76
|
## Installation
|
|
@@ -74,40 +86,51 @@ pip install pyalex
|
|
|
74
86
|
PyAlex offers support for all [Entity Objects](https://docs.openalex.org/api-entities/entities-overview): [Works](https://docs.openalex.org/api-entities/works), [Authors](https://docs.openalex.org/api-entities/authors), [Sources](https://docs.openalex.org/api-entities/sourcese), [Institutions](https://docs.openalex.org/api-entities/institutions), [Topics](https://docs.openalex.org/api-entities/topics), [Publishers](https://docs.openalex.org/api-entities/publishers), and [Funders](https://docs.openalex.org/api-entities/funders).
|
|
75
87
|
|
|
76
88
|
```python
|
|
77
|
-
from pyalex import
|
|
89
|
+
from pyalex import (
|
|
90
|
+
Works,
|
|
91
|
+
Authors,
|
|
92
|
+
Sources,
|
|
93
|
+
Institutions,
|
|
94
|
+
Topics,
|
|
95
|
+
Keywords,
|
|
96
|
+
Publishers,
|
|
97
|
+
Funders,
|
|
98
|
+
Awards,
|
|
99
|
+
Concepts,
|
|
100
|
+
)
|
|
78
101
|
```
|
|
79
102
|
|
|
80
|
-
###
|
|
103
|
+
### Rate limits and authentication [Changed!]
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
faster and more consistent response times. To get into the polite pool, you
|
|
84
|
-
set your email:
|
|
105
|
+
**⚠️ API Key Required**: Starting February 13, 2026, an API key is **required** to use the OpenAlex API. API keys are free!
|
|
85
106
|
|
|
86
|
-
|
|
87
|
-
import pyalex
|
|
107
|
+
The OpenAlex API uses a credit-based rate limiting system. Different endpoint types consume different amounts of credits per request:
|
|
88
108
|
|
|
89
|
-
|
|
90
|
-
|
|
109
|
+
- **Without API key**: 100 credits per day (testing/demos only)
|
|
110
|
+
- **With free API key**: 100,000 credits per day
|
|
111
|
+
- **Singleton requests** (e.g., `/works/W123`): Free (0 credits)
|
|
112
|
+
- **List requests** (e.g., `/works?filter=...`): 1 credit each
|
|
91
113
|
|
|
92
|
-
|
|
114
|
+
All users are limited to a maximum of 100 requests per second.
|
|
93
115
|
|
|
94
|
-
|
|
116
|
+
#### Get an API Key
|
|
117
|
+
|
|
118
|
+
1. Create a free account at [openalex.org](https://openalex.org/)
|
|
119
|
+
2. Go to [openalex.org/settings/api](https://openalex.org/settings/api) to get your API key
|
|
120
|
+
3. Configure PyAlex with your key:
|
|
95
121
|
|
|
96
122
|
```python
|
|
97
|
-
|
|
123
|
+
import pyalex
|
|
98
124
|
|
|
99
|
-
config.
|
|
100
|
-
config.retry_backoff_factor = 0.1
|
|
101
|
-
config.retry_http_codes = [429, 500, 503]
|
|
125
|
+
pyalex.config.api_key = "<YOUR_API_KEY>"
|
|
102
126
|
```
|
|
103
127
|
|
|
104
|
-
|
|
128
|
+
For more information, see the [OpenAlex Rate limits and authentication documentation](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication).
|
|
105
129
|
|
|
106
|
-
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
130
|
|
|
108
131
|
### Get single entity
|
|
109
132
|
|
|
110
|
-
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or
|
|
133
|
+
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher, Funders or Awards from OpenAlex by the
|
|
111
134
|
OpenAlex ID, or by DOI or ROR.
|
|
112
135
|
|
|
113
136
|
```python
|
|
@@ -172,6 +195,55 @@ w["abstract"]
|
|
|
172
195
|
|
|
173
196
|
Please respect the legal constraints when using this feature.
|
|
174
197
|
|
|
198
|
+
#### Fetch content in PDF and TEI format
|
|
199
|
+
|
|
200
|
+
OpenAlex reference: [Get content](https://docs.openalex.org/how-to-use-the-api/get-content)
|
|
201
|
+
|
|
202
|
+
Only for Works. Retrieve the full-text content of a work in PDF or TEI (Text Encoding Initiative) XML format, if available.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from pyalex import Works
|
|
206
|
+
|
|
207
|
+
# Get a work
|
|
208
|
+
w = Works()["W4412002745"]
|
|
209
|
+
|
|
210
|
+
# Access the PDF content
|
|
211
|
+
pdf_content = w.pdf.get()
|
|
212
|
+
|
|
213
|
+
# Or access the TEI content
|
|
214
|
+
tei_content = w.tei.get()
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
You can also download the content directly to a file:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from pyalex import Works
|
|
221
|
+
|
|
222
|
+
w = Works()["W4412002745"]
|
|
223
|
+
|
|
224
|
+
# Download PDF to a file
|
|
225
|
+
w.pdf.download("document.pdf")
|
|
226
|
+
|
|
227
|
+
# Download TEI to a file
|
|
228
|
+
w.tei.download("document.xml")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
You can also get the URL of the content without downloading it:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from pyalex import Works
|
|
235
|
+
|
|
236
|
+
w = Works()["W4412002745"]
|
|
237
|
+
|
|
238
|
+
# Get the URL of the PDF
|
|
239
|
+
pdf_url = w.pdf.url
|
|
240
|
+
|
|
241
|
+
# Get the URL of the TEI
|
|
242
|
+
tei_url = w.tei.url
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Note: Content availability depends on the publisher's open access policies and licensing agreements.
|
|
246
|
+
|
|
175
247
|
### Get lists of entities
|
|
176
248
|
|
|
177
249
|
```python
|
|
@@ -420,6 +492,10 @@ with open(Path("works.json")) as f:
|
|
|
420
492
|
works = [Work(w) for w in json.load(f)]
|
|
421
493
|
```
|
|
422
494
|
|
|
495
|
+
## Standards
|
|
496
|
+
|
|
497
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
498
|
+
|
|
423
499
|
## Code snippets
|
|
424
500
|
|
|
425
501
|
A list of awesome use cases of the OpenAlex dataset.
|
|
@@ -498,20 +574,21 @@ Works() \
|
|
|
498
574
|
|
|
499
575
|
```
|
|
500
576
|
|
|
501
|
-
## Experimental
|
|
502
577
|
|
|
503
|
-
|
|
578
|
+
## Troubleshooting
|
|
504
579
|
|
|
505
|
-
|
|
580
|
+
### Max retries
|
|
581
|
+
|
|
582
|
+
By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
|
|
506
583
|
|
|
507
584
|
```python
|
|
508
|
-
import
|
|
585
|
+
from pyalex import config
|
|
509
586
|
|
|
510
|
-
|
|
587
|
+
config.max_retries = 0
|
|
588
|
+
config.retry_backoff_factor = 0.1
|
|
589
|
+
config.retry_http_codes = [429, 500, 503]
|
|
511
590
|
```
|
|
512
591
|
|
|
513
|
-
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
-
|
|
515
592
|
## Alternatives
|
|
516
593
|
|
|
517
594
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -13,7 +13,18 @@ institutions, and more. OpenAlex offers a robust, open, and free [REST API](http
|
|
|
13
13
|
PyAlex is a lightweight and thin Python interface to this API. PyAlex tries to
|
|
14
14
|
stay as close as possible to the design of the original service.
|
|
15
15
|
|
|
16
|
-
The following
|
|
16
|
+
The following entities of OpenAlex are currently supported by PyAlex:
|
|
17
|
+
|
|
18
|
+
- [x] Work
|
|
19
|
+
- [x] Author
|
|
20
|
+
- [x] Source
|
|
21
|
+
- [x] Institution
|
|
22
|
+
- [x] Concept
|
|
23
|
+
- [x] Topic
|
|
24
|
+
- [x] Publisher
|
|
25
|
+
- [x] Funder
|
|
26
|
+
|
|
27
|
+
Including the following functionality:
|
|
17
28
|
|
|
18
29
|
- [x] Get single entities
|
|
19
30
|
- [x] Filter entities
|
|
@@ -24,7 +35,7 @@ The following features of OpenAlex are currently supported by PyAlex:
|
|
|
24
35
|
- [x] Sample
|
|
25
36
|
- [x] Pagination
|
|
26
37
|
- [x] Autocomplete endpoint
|
|
27
|
-
- [x] N-grams
|
|
38
|
+
- [x] N-grams [Deprecated by OpenAlex]
|
|
28
39
|
- [x] Authentication
|
|
29
40
|
|
|
30
41
|
We aim to cover the entire API, and we are looking for help. We are welcoming Pull Requests.
|
|
@@ -33,6 +44,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
|
|
|
33
44
|
|
|
34
45
|
- **Pipe operations** - PyAlex can handle multiple operations in a sequence. This allows the developer to write understandable queries. For examples, see [code snippets](#code-snippets).
|
|
35
46
|
- **Plaintext abstracts** - OpenAlex [doesn't include plaintext abstracts](https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index) due to legal constraints. PyAlex can convert the inverted abstracts into [plaintext abstracts on the fly](#get-abstract).
|
|
47
|
+
- **Fetch content in PDF and TEI format** - Retrieve full-text content from OpenAlex in PDF or TEI XML formats. See [fetching content](#fetch-content-in-pdf-and-tei-format).
|
|
36
48
|
- **Permissive license** - OpenAlex data is CC0 licensed :raised_hands:. PyAlex is published under the MIT license.
|
|
37
49
|
|
|
38
50
|
## Installation
|
|
@@ -48,40 +60,51 @@ pip install pyalex
|
|
|
48
60
|
PyAlex offers support for all [Entity Objects](https://docs.openalex.org/api-entities/entities-overview): [Works](https://docs.openalex.org/api-entities/works), [Authors](https://docs.openalex.org/api-entities/authors), [Sources](https://docs.openalex.org/api-entities/sourcese), [Institutions](https://docs.openalex.org/api-entities/institutions), [Topics](https://docs.openalex.org/api-entities/topics), [Publishers](https://docs.openalex.org/api-entities/publishers), and [Funders](https://docs.openalex.org/api-entities/funders).
|
|
49
61
|
|
|
50
62
|
```python
|
|
51
|
-
from pyalex import
|
|
63
|
+
from pyalex import (
|
|
64
|
+
Works,
|
|
65
|
+
Authors,
|
|
66
|
+
Sources,
|
|
67
|
+
Institutions,
|
|
68
|
+
Topics,
|
|
69
|
+
Keywords,
|
|
70
|
+
Publishers,
|
|
71
|
+
Funders,
|
|
72
|
+
Awards,
|
|
73
|
+
Concepts,
|
|
74
|
+
)
|
|
52
75
|
```
|
|
53
76
|
|
|
54
|
-
###
|
|
77
|
+
### Rate limits and authentication [Changed!]
|
|
55
78
|
|
|
56
|
-
|
|
57
|
-
faster and more consistent response times. To get into the polite pool, you
|
|
58
|
-
set your email:
|
|
79
|
+
**⚠️ API Key Required**: Starting February 13, 2026, an API key is **required** to use the OpenAlex API. API keys are free!
|
|
59
80
|
|
|
60
|
-
|
|
61
|
-
import pyalex
|
|
81
|
+
The OpenAlex API uses a credit-based rate limiting system. Different endpoint types consume different amounts of credits per request:
|
|
62
82
|
|
|
63
|
-
|
|
64
|
-
|
|
83
|
+
- **Without API key**: 100 credits per day (testing/demos only)
|
|
84
|
+
- **With free API key**: 100,000 credits per day
|
|
85
|
+
- **Singleton requests** (e.g., `/works/W123`): Free (0 credits)
|
|
86
|
+
- **List requests** (e.g., `/works?filter=...`): 1 credit each
|
|
65
87
|
|
|
66
|
-
|
|
88
|
+
All users are limited to a maximum of 100 requests per second.
|
|
67
89
|
|
|
68
|
-
|
|
90
|
+
#### Get an API Key
|
|
91
|
+
|
|
92
|
+
1. Create a free account at [openalex.org](https://openalex.org/)
|
|
93
|
+
2. Go to [openalex.org/settings/api](https://openalex.org/settings/api) to get your API key
|
|
94
|
+
3. Configure PyAlex with your key:
|
|
69
95
|
|
|
70
96
|
```python
|
|
71
|
-
|
|
97
|
+
import pyalex
|
|
72
98
|
|
|
73
|
-
config.
|
|
74
|
-
config.retry_backoff_factor = 0.1
|
|
75
|
-
config.retry_http_codes = [429, 500, 503]
|
|
99
|
+
pyalex.config.api_key = "<YOUR_API_KEY>"
|
|
76
100
|
```
|
|
77
101
|
|
|
78
|
-
|
|
102
|
+
For more information, see the [OpenAlex Rate limits and authentication documentation](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication).
|
|
79
103
|
|
|
80
|
-
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
81
104
|
|
|
82
105
|
### Get single entity
|
|
83
106
|
|
|
84
|
-
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or
|
|
107
|
+
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher, Funders or Awards from OpenAlex by the
|
|
85
108
|
OpenAlex ID, or by DOI or ROR.
|
|
86
109
|
|
|
87
110
|
```python
|
|
@@ -146,6 +169,55 @@ w["abstract"]
|
|
|
146
169
|
|
|
147
170
|
Please respect the legal constraints when using this feature.
|
|
148
171
|
|
|
172
|
+
#### Fetch content in PDF and TEI format
|
|
173
|
+
|
|
174
|
+
OpenAlex reference: [Get content](https://docs.openalex.org/how-to-use-the-api/get-content)
|
|
175
|
+
|
|
176
|
+
Only for Works. Retrieve the full-text content of a work in PDF or TEI (Text Encoding Initiative) XML format, if available.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from pyalex import Works
|
|
180
|
+
|
|
181
|
+
# Get a work
|
|
182
|
+
w = Works()["W4412002745"]
|
|
183
|
+
|
|
184
|
+
# Access the PDF content
|
|
185
|
+
pdf_content = w.pdf.get()
|
|
186
|
+
|
|
187
|
+
# Or access the TEI content
|
|
188
|
+
tei_content = w.tei.get()
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
You can also download the content directly to a file:
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from pyalex import Works
|
|
195
|
+
|
|
196
|
+
w = Works()["W4412002745"]
|
|
197
|
+
|
|
198
|
+
# Download PDF to a file
|
|
199
|
+
w.pdf.download("document.pdf")
|
|
200
|
+
|
|
201
|
+
# Download TEI to a file
|
|
202
|
+
w.tei.download("document.xml")
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
You can also get the URL of the content without downloading it:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from pyalex import Works
|
|
209
|
+
|
|
210
|
+
w = Works()["W4412002745"]
|
|
211
|
+
|
|
212
|
+
# Get the URL of the PDF
|
|
213
|
+
pdf_url = w.pdf.url
|
|
214
|
+
|
|
215
|
+
# Get the URL of the TEI
|
|
216
|
+
tei_url = w.tei.url
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Note: Content availability depends on the publisher's open access policies and licensing agreements.
|
|
220
|
+
|
|
149
221
|
### Get lists of entities
|
|
150
222
|
|
|
151
223
|
```python
|
|
@@ -394,6 +466,10 @@ with open(Path("works.json")) as f:
|
|
|
394
466
|
works = [Work(w) for w in json.load(f)]
|
|
395
467
|
```
|
|
396
468
|
|
|
469
|
+
## Standards
|
|
470
|
+
|
|
471
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
472
|
+
|
|
397
473
|
## Code snippets
|
|
398
474
|
|
|
399
475
|
A list of awesome use cases of the OpenAlex dataset.
|
|
@@ -472,20 +548,21 @@ Works() \
|
|
|
472
548
|
|
|
473
549
|
```
|
|
474
550
|
|
|
475
|
-
## Experimental
|
|
476
551
|
|
|
477
|
-
|
|
552
|
+
## Troubleshooting
|
|
478
553
|
|
|
479
|
-
|
|
554
|
+
### Max retries
|
|
555
|
+
|
|
556
|
+
By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
|
|
480
557
|
|
|
481
558
|
```python
|
|
482
|
-
import
|
|
559
|
+
from pyalex import config
|
|
483
560
|
|
|
484
|
-
|
|
561
|
+
config.max_retries = 0
|
|
562
|
+
config.retry_backoff_factor = 0.1
|
|
563
|
+
config.retry_http_codes = [429, 500, 503]
|
|
485
564
|
```
|
|
486
565
|
|
|
487
|
-
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
488
|
-
|
|
489
566
|
## Alternatives
|
|
490
567
|
|
|
491
568
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -7,6 +7,8 @@ except ImportError:
|
|
|
7
7
|
|
|
8
8
|
from pyalex.api import Author
|
|
9
9
|
from pyalex.api import Authors
|
|
10
|
+
from pyalex.api import Award
|
|
11
|
+
from pyalex.api import Awards
|
|
10
12
|
from pyalex.api import Concept
|
|
11
13
|
from pyalex.api import Concepts
|
|
12
14
|
from pyalex.api import Domain
|
|
@@ -18,6 +20,8 @@ from pyalex.api import Funders
|
|
|
18
20
|
from pyalex.api import Institution
|
|
19
21
|
from pyalex.api import Institutions
|
|
20
22
|
from pyalex.api import Journals
|
|
23
|
+
from pyalex.api import Keyword
|
|
24
|
+
from pyalex.api import Keywords
|
|
21
25
|
from pyalex.api import OpenAlexResponseList
|
|
22
26
|
from pyalex.api import People
|
|
23
27
|
from pyalex.api import Publisher
|
|
@@ -35,6 +39,8 @@ from pyalex.api import config
|
|
|
35
39
|
from pyalex.api import invert_abstract
|
|
36
40
|
|
|
37
41
|
__all__ = [
|
|
42
|
+
"Award",
|
|
43
|
+
"Awards",
|
|
38
44
|
"Works",
|
|
39
45
|
"Work",
|
|
40
46
|
"Authors",
|
|
@@ -57,6 +63,8 @@ __all__ = [
|
|
|
57
63
|
"Subfield",
|
|
58
64
|
"Topics",
|
|
59
65
|
"Topic",
|
|
66
|
+
"Keywords",
|
|
67
|
+
"Keyword",
|
|
60
68
|
"People",
|
|
61
69
|
"Journals",
|
|
62
70
|
"autocomplete",
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.20'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 20)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'ga78ef7e47'
|
|
@@ -874,9 +874,88 @@ class BaseOpenAlex:
|
|
|
874
874
|
return resp_list
|
|
875
875
|
|
|
876
876
|
|
|
877
|
+
class BaseContent:
|
|
878
|
+
"""Class representing content in OpenAlex."""
|
|
879
|
+
|
|
880
|
+
def __init__(self, key):
|
|
881
|
+
self.key = key
|
|
882
|
+
|
|
883
|
+
def __repr__(self):
|
|
884
|
+
return f"Content(key='{self.key}')"
|
|
885
|
+
|
|
886
|
+
@property
|
|
887
|
+
def url(self):
|
|
888
|
+
"""Get the URL for the content.
|
|
889
|
+
|
|
890
|
+
Returns
|
|
891
|
+
-------
|
|
892
|
+
str
|
|
893
|
+
URL for the content.
|
|
894
|
+
"""
|
|
895
|
+
return f"https://content.openalex.org/works/{self.key}"
|
|
896
|
+
|
|
897
|
+
def get(self):
|
|
898
|
+
"""Get the content
|
|
899
|
+
|
|
900
|
+
Returns
|
|
901
|
+
-------
|
|
902
|
+
bytes
|
|
903
|
+
Content of the request.
|
|
904
|
+
"""
|
|
905
|
+
content_url = f"https://content.openalex.org/works/{self.key}"
|
|
906
|
+
|
|
907
|
+
res = _get_requests_session().get(
|
|
908
|
+
content_url, auth=OpenAlexAuth(config), allow_redirects=True
|
|
909
|
+
)
|
|
910
|
+
res.raise_for_status()
|
|
911
|
+
return res.content
|
|
912
|
+
|
|
913
|
+
def download(self, filepath):
|
|
914
|
+
"""Download the content to a file.
|
|
915
|
+
|
|
916
|
+
Parameters
|
|
917
|
+
----------
|
|
918
|
+
filepath : str
|
|
919
|
+
Path to save the content.
|
|
920
|
+
"""
|
|
921
|
+
|
|
922
|
+
with open(filepath, "wb") as f:
|
|
923
|
+
f.write(self.get())
|
|
924
|
+
|
|
925
|
+
|
|
877
926
|
# The API
|
|
878
927
|
|
|
879
928
|
|
|
929
|
+
class PDF(BaseContent):
|
|
930
|
+
"""Class representing a PDF content in OpenAlex."""
|
|
931
|
+
|
|
932
|
+
@property
|
|
933
|
+
def url(self):
|
|
934
|
+
"""Get the URL for the content.
|
|
935
|
+
|
|
936
|
+
Returns
|
|
937
|
+
-------
|
|
938
|
+
str
|
|
939
|
+
URL for the content.
|
|
940
|
+
"""
|
|
941
|
+
return f"https://content.openalex.org/works/{self.key}.pdf"
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
class TEI(BaseContent):
|
|
945
|
+
"""Class representing a TEI content in OpenAlex."""
|
|
946
|
+
|
|
947
|
+
@property
|
|
948
|
+
def url(self):
|
|
949
|
+
"""Get the URL for the content.
|
|
950
|
+
|
|
951
|
+
Returns
|
|
952
|
+
-------
|
|
953
|
+
str
|
|
954
|
+
URL for the content.
|
|
955
|
+
"""
|
|
956
|
+
return f"https://content.openalex.org/works/{self.key}.grobid-xml"
|
|
957
|
+
|
|
958
|
+
|
|
880
959
|
class Work(OpenAlexEntity):
|
|
881
960
|
"""Class representing a work entity in OpenAlex."""
|
|
882
961
|
|
|
@@ -918,6 +997,28 @@ class Work(OpenAlexEntity):
|
|
|
918
997
|
else:
|
|
919
998
|
return resp_list
|
|
920
999
|
|
|
1000
|
+
@property
|
|
1001
|
+
def pdf(self):
|
|
1002
|
+
"""Get the PDF content for the work.
|
|
1003
|
+
|
|
1004
|
+
Returns
|
|
1005
|
+
-------
|
|
1006
|
+
PDF
|
|
1007
|
+
PDF content object.
|
|
1008
|
+
"""
|
|
1009
|
+
return PDF(self["id"].split("/")[-1])
|
|
1010
|
+
|
|
1011
|
+
@property
|
|
1012
|
+
def tei(self):
|
|
1013
|
+
"""Get the TEI content for the work.
|
|
1014
|
+
|
|
1015
|
+
Returns
|
|
1016
|
+
-------
|
|
1017
|
+
TEI
|
|
1018
|
+
TEI content object.
|
|
1019
|
+
"""
|
|
1020
|
+
return TEI(self["id"].split("/")[-1])
|
|
1021
|
+
|
|
921
1022
|
|
|
922
1023
|
class Works(BaseOpenAlex):
|
|
923
1024
|
"""Class representing a collection of work entities in OpenAlex."""
|
|
@@ -1033,6 +1134,30 @@ class Funders(BaseOpenAlex):
|
|
|
1033
1134
|
resource_class = Funder
|
|
1034
1135
|
|
|
1035
1136
|
|
|
1137
|
+
class Award(OpenAlexEntity):
|
|
1138
|
+
"""Class representing an award entity in OpenAlex."""
|
|
1139
|
+
|
|
1140
|
+
pass
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
class Awards(BaseOpenAlex):
|
|
1144
|
+
"""Class representing a collection of award entities in OpenAlex."""
|
|
1145
|
+
|
|
1146
|
+
resource_class = Award
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
class Keyword(OpenAlexEntity):
|
|
1150
|
+
"""Class representing a keyword entity in OpenAlex."""
|
|
1151
|
+
|
|
1152
|
+
pass
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
class Keywords(BaseOpenAlex):
|
|
1156
|
+
"""Class representing a collection of keyword entities in OpenAlex."""
|
|
1157
|
+
|
|
1158
|
+
resource_class = Keyword
|
|
1159
|
+
|
|
1160
|
+
|
|
1036
1161
|
class Autocomplete(OpenAlexEntity):
|
|
1037
1162
|
"""Class representing an autocomplete entity in OpenAlex."""
|
|
1038
1163
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -39,7 +39,18 @@ institutions, and more. OpenAlex offers a robust, open, and free [REST API](http
|
|
|
39
39
|
PyAlex is a lightweight and thin Python interface to this API. PyAlex tries to
|
|
40
40
|
stay as close as possible to the design of the original service.
|
|
41
41
|
|
|
42
|
-
The following
|
|
42
|
+
The following entities of OpenAlex are currently supported by PyAlex:
|
|
43
|
+
|
|
44
|
+
- [x] Work
|
|
45
|
+
- [x] Author
|
|
46
|
+
- [x] Source
|
|
47
|
+
- [x] Institution
|
|
48
|
+
- [x] Concept
|
|
49
|
+
- [x] Topic
|
|
50
|
+
- [x] Publisher
|
|
51
|
+
- [x] Funder
|
|
52
|
+
|
|
53
|
+
Including the following functionality:
|
|
43
54
|
|
|
44
55
|
- [x] Get single entities
|
|
45
56
|
- [x] Filter entities
|
|
@@ -50,7 +61,7 @@ The following features of OpenAlex are currently supported by PyAlex:
|
|
|
50
61
|
- [x] Sample
|
|
51
62
|
- [x] Pagination
|
|
52
63
|
- [x] Autocomplete endpoint
|
|
53
|
-
- [x] N-grams
|
|
64
|
+
- [x] N-grams [Deprecated by OpenAlex]
|
|
54
65
|
- [x] Authentication
|
|
55
66
|
|
|
56
67
|
We aim to cover the entire API, and we are looking for help. We are welcoming Pull Requests.
|
|
@@ -59,6 +70,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
|
|
|
59
70
|
|
|
60
71
|
- **Pipe operations** - PyAlex can handle multiple operations in a sequence. This allows the developer to write understandable queries. For examples, see [code snippets](#code-snippets).
|
|
61
72
|
- **Plaintext abstracts** - OpenAlex [doesn't include plaintext abstracts](https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index) due to legal constraints. PyAlex can convert the inverted abstracts into [plaintext abstracts on the fly](#get-abstract).
|
|
73
|
+
- **Fetch content in PDF and TEI format** - Retrieve full-text content from OpenAlex in PDF or TEI XML formats. See [fetching content](#fetch-content-in-pdf-and-tei-format).
|
|
62
74
|
- **Permissive license** - OpenAlex data is CC0 licensed :raised_hands:. PyAlex is published under the MIT license.
|
|
63
75
|
|
|
64
76
|
## Installation
|
|
@@ -74,40 +86,51 @@ pip install pyalex
|
|
|
74
86
|
PyAlex offers support for all [Entity Objects](https://docs.openalex.org/api-entities/entities-overview): [Works](https://docs.openalex.org/api-entities/works), [Authors](https://docs.openalex.org/api-entities/authors), [Sources](https://docs.openalex.org/api-entities/sourcese), [Institutions](https://docs.openalex.org/api-entities/institutions), [Topics](https://docs.openalex.org/api-entities/topics), [Publishers](https://docs.openalex.org/api-entities/publishers), and [Funders](https://docs.openalex.org/api-entities/funders).
|
|
75
87
|
|
|
76
88
|
```python
|
|
77
|
-
from pyalex import
|
|
89
|
+
from pyalex import (
|
|
90
|
+
Works,
|
|
91
|
+
Authors,
|
|
92
|
+
Sources,
|
|
93
|
+
Institutions,
|
|
94
|
+
Topics,
|
|
95
|
+
Keywords,
|
|
96
|
+
Publishers,
|
|
97
|
+
Funders,
|
|
98
|
+
Awards,
|
|
99
|
+
Concepts,
|
|
100
|
+
)
|
|
78
101
|
```
|
|
79
102
|
|
|
80
|
-
###
|
|
103
|
+
### Rate limits and authentication [Changed!]
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
faster and more consistent response times. To get into the polite pool, you
|
|
84
|
-
set your email:
|
|
105
|
+
**⚠️ API Key Required**: Starting February 13, 2026, an API key is **required** to use the OpenAlex API. API keys are free!
|
|
85
106
|
|
|
86
|
-
|
|
87
|
-
import pyalex
|
|
107
|
+
The OpenAlex API uses a credit-based rate limiting system. Different endpoint types consume different amounts of credits per request:
|
|
88
108
|
|
|
89
|
-
|
|
90
|
-
|
|
109
|
+
- **Without API key**: 100 credits per day (testing/demos only)
|
|
110
|
+
- **With free API key**: 100,000 credits per day
|
|
111
|
+
- **Singleton requests** (e.g., `/works/W123`): Free (0 credits)
|
|
112
|
+
- **List requests** (e.g., `/works?filter=...`): 1 credit each
|
|
91
113
|
|
|
92
|
-
|
|
114
|
+
All users are limited to a maximum of 100 requests per second.
|
|
93
115
|
|
|
94
|
-
|
|
116
|
+
#### Get an API Key
|
|
117
|
+
|
|
118
|
+
1. Create a free account at [openalex.org](https://openalex.org/)
|
|
119
|
+
2. Go to [openalex.org/settings/api](https://openalex.org/settings/api) to get your API key
|
|
120
|
+
3. Configure PyAlex with your key:
|
|
95
121
|
|
|
96
122
|
```python
|
|
97
|
-
|
|
123
|
+
import pyalex
|
|
98
124
|
|
|
99
|
-
config.
|
|
100
|
-
config.retry_backoff_factor = 0.1
|
|
101
|
-
config.retry_http_codes = [429, 500, 503]
|
|
125
|
+
pyalex.config.api_key = "<YOUR_API_KEY>"
|
|
102
126
|
```
|
|
103
127
|
|
|
104
|
-
|
|
128
|
+
For more information, see the [OpenAlex Rate limits and authentication documentation](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication).
|
|
105
129
|
|
|
106
|
-
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
130
|
|
|
108
131
|
### Get single entity
|
|
109
132
|
|
|
110
|
-
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or
|
|
133
|
+
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher, Funders or Awards from OpenAlex by the
|
|
111
134
|
OpenAlex ID, or by DOI or ROR.
|
|
112
135
|
|
|
113
136
|
```python
|
|
@@ -172,6 +195,55 @@ w["abstract"]
|
|
|
172
195
|
|
|
173
196
|
Please respect the legal constraints when using this feature.
|
|
174
197
|
|
|
198
|
+
#### Fetch content in PDF and TEI format
|
|
199
|
+
|
|
200
|
+
OpenAlex reference: [Get content](https://docs.openalex.org/how-to-use-the-api/get-content)
|
|
201
|
+
|
|
202
|
+
Only for Works. Retrieve the full-text content of a work in PDF or TEI (Text Encoding Initiative) XML format, if available.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from pyalex import Works
|
|
206
|
+
|
|
207
|
+
# Get a work
|
|
208
|
+
w = Works()["W4412002745"]
|
|
209
|
+
|
|
210
|
+
# Access the PDF content
|
|
211
|
+
pdf_content = w.pdf.get()
|
|
212
|
+
|
|
213
|
+
# Or access the TEI content
|
|
214
|
+
tei_content = w.tei.get()
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
You can also download the content directly to a file:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from pyalex import Works
|
|
221
|
+
|
|
222
|
+
w = Works()["W4412002745"]
|
|
223
|
+
|
|
224
|
+
# Download PDF to a file
|
|
225
|
+
w.pdf.download("document.pdf")
|
|
226
|
+
|
|
227
|
+
# Download TEI to a file
|
|
228
|
+
w.tei.download("document.xml")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
You can also get the URL of the content without downloading it:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from pyalex import Works
|
|
235
|
+
|
|
236
|
+
w = Works()["W4412002745"]
|
|
237
|
+
|
|
238
|
+
# Get the URL of the PDF
|
|
239
|
+
pdf_url = w.pdf.url
|
|
240
|
+
|
|
241
|
+
# Get the URL of the TEI
|
|
242
|
+
tei_url = w.tei.url
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Note: Content availability depends on the publisher's open access policies and licensing agreements.
|
|
246
|
+
|
|
175
247
|
### Get lists of entities
|
|
176
248
|
|
|
177
249
|
```python
|
|
@@ -420,6 +492,10 @@ with open(Path("works.json")) as f:
|
|
|
420
492
|
works = [Work(w) for w in json.load(f)]
|
|
421
493
|
```
|
|
422
494
|
|
|
495
|
+
## Standards
|
|
496
|
+
|
|
497
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
498
|
+
|
|
423
499
|
## Code snippets
|
|
424
500
|
|
|
425
501
|
A list of awesome use cases of the OpenAlex dataset.
|
|
@@ -498,20 +574,21 @@ Works() \
|
|
|
498
574
|
|
|
499
575
|
```
|
|
500
576
|
|
|
501
|
-
## Experimental
|
|
502
577
|
|
|
503
|
-
|
|
578
|
+
## Troubleshooting
|
|
504
579
|
|
|
505
|
-
|
|
580
|
+
### Max retries
|
|
581
|
+
|
|
582
|
+
By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
|
|
506
583
|
|
|
507
584
|
```python
|
|
508
|
-
import
|
|
585
|
+
from pyalex import config
|
|
509
586
|
|
|
510
|
-
|
|
587
|
+
config.max_retries = 0
|
|
588
|
+
config.retry_backoff_factor = 0.1
|
|
589
|
+
config.retry_http_codes = [429, 500, 503]
|
|
511
590
|
```
|
|
512
591
|
|
|
513
|
-
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
-
|
|
515
592
|
## Alternatives
|
|
516
593
|
|
|
517
594
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
1
4
|
import pytest
|
|
2
5
|
|
|
3
6
|
import pyalex
|
|
@@ -7,10 +10,39 @@ from pyalex.api import Paginator
|
|
|
7
10
|
pyalex.config.max_retries = 10
|
|
8
11
|
|
|
9
12
|
|
|
13
|
+
def requires_api_key(reason="OpenAlex requires authentication for this operation"):
|
|
14
|
+
"""Decorator for API Key requirement.
|
|
15
|
+
|
|
16
|
+
Decorator that skips test if OPENALEX_API_KEY is not set, and
|
|
17
|
+
sets it for the test.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def decorator(func):
|
|
21
|
+
@pytest.mark.skipif(
|
|
22
|
+
not os.environ.get("OPENALEX_API_KEY"),
|
|
23
|
+
reason=reason,
|
|
24
|
+
)
|
|
25
|
+
@wraps(func)
|
|
26
|
+
def wrapper(*args, **kwargs):
|
|
27
|
+
api_key = os.environ.get("OPENALEX_API_KEY")
|
|
28
|
+
original_api_key = pyalex.config.api_key
|
|
29
|
+
try:
|
|
30
|
+
pyalex.config.api_key = api_key
|
|
31
|
+
return func(*args, **kwargs)
|
|
32
|
+
finally:
|
|
33
|
+
pyalex.config.api_key = original_api_key
|
|
34
|
+
|
|
35
|
+
return wrapper
|
|
36
|
+
|
|
37
|
+
return decorator
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
10
41
|
def test_cursor_no_filter():
|
|
11
42
|
assert len(list(pyalex.Works().paginate(per_page=200, n_max=1000))) == 5
|
|
12
43
|
|
|
13
44
|
|
|
45
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
14
46
|
def test_cursor():
|
|
15
47
|
query = Authors().search_filter(display_name="einstein")
|
|
16
48
|
|
|
@@ -33,6 +65,7 @@ def test_cursor():
|
|
|
33
65
|
assert len(results) > 200
|
|
34
66
|
|
|
35
67
|
|
|
68
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
36
69
|
def test_page():
|
|
37
70
|
query = Authors().search_filter(display_name="einstein")
|
|
38
71
|
|
|
@@ -54,6 +87,7 @@ def test_page():
|
|
|
54
87
|
assert len(results) > 200
|
|
55
88
|
|
|
56
89
|
|
|
90
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
57
91
|
def test_paginate_counts():
|
|
58
92
|
r = Authors().search_filter(display_name="einstein").get()
|
|
59
93
|
|
|
@@ -77,34 +111,41 @@ def test_paginate_counts():
|
|
|
77
111
|
assert r.meta["count"] == n_p_page >= n_p_default == n_p_cursor
|
|
78
112
|
|
|
79
113
|
|
|
114
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
80
115
|
def test_paginate_per_page():
|
|
81
116
|
assert all(len(page) <= 10 for page in Authors().paginate(per_page=10, n_max=50))
|
|
82
117
|
|
|
83
118
|
|
|
119
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
84
120
|
def test_paginate_per_page_200():
|
|
85
121
|
assert all(len(page) == 200 for page in Authors().paginate(per_page=200, n_max=400))
|
|
86
122
|
|
|
87
123
|
|
|
124
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
88
125
|
def test_paginate_per_page_none():
|
|
89
126
|
assert all(len(page) == 25 for page in Authors().paginate(n_max=500))
|
|
90
127
|
|
|
91
128
|
|
|
129
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
92
130
|
def test_paginate_per_page_1000():
|
|
93
131
|
with pytest.raises(ValueError):
|
|
94
132
|
assert next(Authors().paginate(per_page=1000))
|
|
95
133
|
|
|
96
134
|
|
|
135
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
97
136
|
def test_paginate_per_page_str():
|
|
98
137
|
with pytest.raises(ValueError):
|
|
99
138
|
assert next(Authors().paginate(per_page="100"))
|
|
100
139
|
|
|
101
140
|
|
|
141
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
102
142
|
def test_paginate_instance():
|
|
103
143
|
p_default = Authors().search_filter(display_name="einstein").paginate(per_page=200)
|
|
104
144
|
assert isinstance(p_default, Paginator)
|
|
105
145
|
assert p_default.method == "cursor"
|
|
106
146
|
|
|
107
147
|
|
|
148
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
108
149
|
def test_paginate_cursor_n_max():
|
|
109
150
|
p = (
|
|
110
151
|
Authors()
|
|
@@ -115,6 +156,7 @@ def test_paginate_cursor_n_max():
|
|
|
115
156
|
assert sum(len(page) for page in p) == 400
|
|
116
157
|
|
|
117
158
|
|
|
159
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
118
160
|
def test_cursor_paging_n_max_none():
|
|
119
161
|
p = (
|
|
120
162
|
Authors()
|
|
@@ -125,10 +167,12 @@ def test_cursor_paging_n_max_none():
|
|
|
125
167
|
sum(len(page) for page in p)
|
|
126
168
|
|
|
127
169
|
|
|
170
|
+
@requires_api_key(reason="OpenAlex requires authentication for sample queries")
|
|
128
171
|
def test_paging_with_sample():
|
|
129
172
|
with pytest.raises(ValueError):
|
|
130
173
|
Authors().sample(1).paginate(method="cursor")
|
|
131
174
|
|
|
132
175
|
|
|
176
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
133
177
|
def test_paging_next():
|
|
134
178
|
next(Authors().paginate())
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
|
+
from functools import wraps
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
@@ -10,11 +11,13 @@ from requests import HTTPError
|
|
|
10
11
|
|
|
11
12
|
import pyalex
|
|
12
13
|
from pyalex import Authors
|
|
14
|
+
from pyalex import Awards
|
|
13
15
|
from pyalex import Concepts
|
|
14
16
|
from pyalex import Domains
|
|
15
17
|
from pyalex import Fields
|
|
16
18
|
from pyalex import Funders
|
|
17
19
|
from pyalex import Institutions
|
|
20
|
+
from pyalex import Keywords
|
|
18
21
|
from pyalex import Publishers
|
|
19
22
|
from pyalex import Sources
|
|
20
23
|
from pyalex import Subfields
|
|
@@ -30,12 +33,41 @@ load_dotenv()
|
|
|
30
33
|
pyalex.config.max_retries = 10
|
|
31
34
|
|
|
32
35
|
|
|
36
|
+
def requires_api_key(reason="OpenAlex requires authentication for this operation"):
|
|
37
|
+
"""Decorator for API Key requirement.
|
|
38
|
+
|
|
39
|
+
Decorator that skips test if OPENALEX_API_KEY is not set, and
|
|
40
|
+
sets it for the test.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def decorator(func):
|
|
44
|
+
@pytest.mark.skipif(
|
|
45
|
+
not os.environ.get("OPENALEX_API_KEY"),
|
|
46
|
+
reason=reason,
|
|
47
|
+
)
|
|
48
|
+
@wraps(func)
|
|
49
|
+
def wrapper(*args, **kwargs):
|
|
50
|
+
api_key = os.environ.get("OPENALEX_API_KEY")
|
|
51
|
+
original_api_key = pyalex.config.api_key
|
|
52
|
+
try:
|
|
53
|
+
pyalex.config.api_key = api_key
|
|
54
|
+
return func(*args, **kwargs)
|
|
55
|
+
finally:
|
|
56
|
+
pyalex.config.api_key = original_api_key
|
|
57
|
+
|
|
58
|
+
return wrapper
|
|
59
|
+
|
|
60
|
+
return decorator
|
|
61
|
+
|
|
62
|
+
|
|
33
63
|
OPEN_ALEX_ENTITIES = [
|
|
34
64
|
Authors,
|
|
65
|
+
Awards,
|
|
35
66
|
Domains,
|
|
36
67
|
Fields,
|
|
37
68
|
Funders,
|
|
38
69
|
Institutions,
|
|
70
|
+
Keywords,
|
|
39
71
|
Publishers,
|
|
40
72
|
Sources,
|
|
41
73
|
Subfields,
|
|
@@ -54,43 +86,52 @@ def test_config():
|
|
|
54
86
|
pyalex.config.api_key = None
|
|
55
87
|
|
|
56
88
|
|
|
89
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
57
90
|
@pytest.mark.parametrize("entity", OPEN_ALEX_ENTITIES)
|
|
58
91
|
def test_meta_entities(entity):
|
|
59
92
|
r = entity().get()
|
|
60
93
|
assert r.meta.get("count", False)
|
|
61
94
|
|
|
62
95
|
|
|
96
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
63
97
|
@pytest.mark.filterwarnings("ignore:.*deprecated.*:DeprecationWarning")
|
|
64
98
|
def test_meta_entities_deprecated():
|
|
65
99
|
r = Concepts().get()
|
|
66
100
|
assert r.meta.get("count", False)
|
|
67
101
|
|
|
68
102
|
|
|
103
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
69
104
|
def test_works_params():
|
|
70
105
|
assert len(Works(params={"filter": {"publication_year": "2020"}}).get()) == 25
|
|
71
106
|
|
|
72
107
|
|
|
108
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
73
109
|
def test_works():
|
|
74
110
|
assert len(Works().filter(publication_year=2020).get()) == 25
|
|
75
111
|
|
|
76
112
|
|
|
113
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
77
114
|
def test_works_count():
|
|
78
115
|
assert Works().filter(publication_year=2020).count() > 10_000_000
|
|
79
116
|
|
|
80
117
|
|
|
118
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
81
119
|
def test_per_page():
|
|
82
120
|
assert len(Works().filter(publication_year=2020).get(per_page=200)) == 200
|
|
83
121
|
|
|
84
122
|
|
|
123
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
85
124
|
def test_per_page_none():
|
|
86
125
|
assert len(Works().filter(publication_year=2020).get(per_page=None)) == 25
|
|
87
126
|
|
|
88
127
|
|
|
128
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
89
129
|
def test_per_page_1000():
|
|
90
130
|
with pytest.raises(ValueError):
|
|
91
131
|
Works().filter(publication_year=2020).get(per_page=1000)
|
|
92
132
|
|
|
93
133
|
|
|
134
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
94
135
|
def test_per_page_str():
|
|
95
136
|
with pytest.raises(ValueError):
|
|
96
137
|
Works().filter(publication_year=2020).get(per_page="100")
|
|
@@ -128,10 +169,12 @@ def test_work_error():
|
|
|
128
169
|
Works()["NotAWorkID"]
|
|
129
170
|
|
|
130
171
|
|
|
172
|
+
@requires_api_key(reason="OpenAlex requires authentication for random() endpoint")
|
|
131
173
|
def test_random_works():
|
|
132
174
|
assert isinstance(Works().random(), dict)
|
|
133
175
|
|
|
134
176
|
|
|
177
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
135
178
|
def test_multi_works():
|
|
136
179
|
# the work to extract the referenced works of
|
|
137
180
|
w = Works()["W2741809807"]
|
|
@@ -144,6 +187,7 @@ def test_multi_works():
|
|
|
144
187
|
)
|
|
145
188
|
|
|
146
189
|
|
|
190
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
147
191
|
def test_works_multifilter():
|
|
148
192
|
r = requests.get(
|
|
149
193
|
"https://api.openalex.org/works?filter=publication_year:2020,is_oa:true"
|
|
@@ -174,6 +218,7 @@ def test_works_url():
|
|
|
174
218
|
assert Works().url == "https://api.openalex.org/works"
|
|
175
219
|
|
|
176
220
|
|
|
221
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
177
222
|
def test_works_multifilter_meta():
|
|
178
223
|
r1 = Works().filter(publication_year=2020, is_oa=True).get()
|
|
179
224
|
r2 = Works().filter(publication_year=2020).filter(is_oa=True).get()
|
|
@@ -181,11 +226,13 @@ def test_works_multifilter_meta():
|
|
|
181
226
|
assert r1.meta["count"] == r2.meta["count"]
|
|
182
227
|
|
|
183
228
|
|
|
229
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
184
230
|
def test_query_error():
|
|
185
231
|
with pytest.raises(QueryError):
|
|
186
232
|
Works().filter(publication_year_error=2020).get()
|
|
187
233
|
|
|
188
234
|
|
|
235
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
189
236
|
def test_data_publications():
|
|
190
237
|
w = (
|
|
191
238
|
Works()
|
|
@@ -198,6 +245,7 @@ def test_data_publications():
|
|
|
198
245
|
assert len(w) > 20
|
|
199
246
|
|
|
200
247
|
|
|
248
|
+
@requires_api_key(reason="OpenAlex requires authentication for search queries")
|
|
201
249
|
def test_search():
|
|
202
250
|
w = (
|
|
203
251
|
Works()
|
|
@@ -211,6 +259,7 @@ def test_search():
|
|
|
211
259
|
assert w[0]["doi"] == "https://doi.org/10.1038/s42256-020-00287-7"
|
|
212
260
|
|
|
213
261
|
|
|
262
|
+
@requires_api_key(reason="OpenAlex requires authentication for search_filter queries")
|
|
214
263
|
def test_search_filter():
|
|
215
264
|
r = requests.get(
|
|
216
265
|
"https://api.openalex.org/authors?filter=display_name.search:einstein"
|
|
@@ -221,6 +270,7 @@ def test_search_filter():
|
|
|
221
270
|
assert r["meta"]["count"] == a_count
|
|
222
271
|
|
|
223
272
|
|
|
273
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
224
274
|
def test_referenced_works():
|
|
225
275
|
# the work to extract the referenced works of
|
|
226
276
|
w = Works()["W2741809807"]
|
|
@@ -230,6 +280,7 @@ def test_referenced_works():
|
|
|
230
280
|
assert r.meta["count"] <= len(w["referenced_works"])
|
|
231
281
|
|
|
232
282
|
|
|
283
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
233
284
|
def test_code_examples():
|
|
234
285
|
# /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code # noqa
|
|
235
286
|
# /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code&sort=count:desc # noqa
|
|
@@ -260,6 +311,7 @@ def test_serializable(tmpdir):
|
|
|
260
311
|
assert "W4238809453" in json.load(f)["id"]
|
|
261
312
|
|
|
262
313
|
|
|
314
|
+
@requires_api_key(reason="OpenAlex requires authentication for unfiltered queries")
|
|
263
315
|
def test_serializable_list(tmpdir):
|
|
264
316
|
with open(Path(tmpdir, "test.json"), "w") as f:
|
|
265
317
|
json.dump(Works().get(), f)
|
|
@@ -285,6 +337,7 @@ def test_ngrams_with_metadata():
|
|
|
285
337
|
assert meta["count"] == 1068
|
|
286
338
|
|
|
287
339
|
|
|
340
|
+
@requires_api_key(reason="OpenAlex requires authentication for random() endpoint")
|
|
288
341
|
def test_random_publishers():
|
|
289
342
|
assert isinstance(Publishers().random(), dict)
|
|
290
343
|
|
|
@@ -385,18 +438,23 @@ def test_subset():
|
|
|
385
438
|
assert url == Works().select(["id", "doi", "display_name"]).url
|
|
386
439
|
|
|
387
440
|
|
|
441
|
+
@requires_api_key(
|
|
442
|
+
reason="OpenAlex requires authentication for filter queries with autocomplete"
|
|
443
|
+
)
|
|
388
444
|
def test_autocomplete_works():
|
|
389
445
|
w = Works().filter(publication_year=2023).autocomplete("planetary boundaries")
|
|
390
446
|
|
|
391
447
|
assert all(["external_id" in x for x in w])
|
|
392
448
|
|
|
393
449
|
|
|
450
|
+
@requires_api_key(reason="OpenAlex requires authentication for autocomplete endpoint")
|
|
394
451
|
def test_autocomplete():
|
|
395
452
|
a = autocomplete("stockholm resilience")
|
|
396
453
|
|
|
397
454
|
assert all(["external_id" in x for x in a])
|
|
398
455
|
|
|
399
456
|
|
|
457
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
400
458
|
def test_filter_urlencoding():
|
|
401
459
|
assert Works().filter(doi="10.1207/s15327809jls0703&4_2").count() == 1
|
|
402
460
|
assert (
|
|
@@ -405,6 +463,7 @@ def test_filter_urlencoding():
|
|
|
405
463
|
)
|
|
406
464
|
|
|
407
465
|
|
|
466
|
+
@requires_api_key(reason="OpenAlex requires authentication for filter queries")
|
|
408
467
|
def test_urlencoding_list():
|
|
409
468
|
assert (
|
|
410
469
|
Works()
|
|
@@ -426,6 +485,40 @@ def test_premium_api_no_valid_key():
|
|
|
426
485
|
Works().get()
|
|
427
486
|
|
|
428
487
|
|
|
488
|
+
def test_unauthenticated_filter_call():
|
|
489
|
+
"""Test that filter/search calls without authentication will fail.
|
|
490
|
+
|
|
491
|
+
(post Feb 11, 2025)
|
|
492
|
+
|
|
493
|
+
This test documents the expected behavior when OpenAlex enforces their new policy
|
|
494
|
+
allowing only singleton calls without authentication. Filter and search queries
|
|
495
|
+
will require an API key.
|
|
496
|
+
|
|
497
|
+
Note: This test currently passes the filter call because OpenAlex hasn't yet
|
|
498
|
+
enforced the restriction. Once the policy is enforced, this test should fail
|
|
499
|
+
and we'll need to mark it as @requires_api_key instead.
|
|
500
|
+
"""
|
|
501
|
+
# Ensure no API key is set
|
|
502
|
+
original_api_key = pyalex.config.api_key
|
|
503
|
+
pyalex.config.api_key = None
|
|
504
|
+
|
|
505
|
+
try:
|
|
506
|
+
# This should work for now, but will fail once OpenAlex enforces the policy
|
|
507
|
+
# Singleton calls like Works()["ID"] should still work without auth
|
|
508
|
+
result = Works()["W2741809807"]
|
|
509
|
+
assert result["id"] == "https://openalex.org/W2741809807"
|
|
510
|
+
|
|
511
|
+
# Filter/search calls will fail once policy is enforced
|
|
512
|
+
# For now, they still work, so we document the expected future behavior
|
|
513
|
+
# Once OpenAlex enforces the policy, this should raise an error
|
|
514
|
+
filter_result = Works().filter(publication_year=2020).get()
|
|
515
|
+
# If we get here, the policy hasn't been enforced yet
|
|
516
|
+
assert len(filter_result) > 0
|
|
517
|
+
finally:
|
|
518
|
+
# Restore original API key
|
|
519
|
+
pyalex.config.api_key = original_api_key
|
|
520
|
+
|
|
521
|
+
|
|
429
522
|
@pytest.mark.skipif(
|
|
430
523
|
not os.environ.get("OPENALEX_API_KEY"),
|
|
431
524
|
reason="OPENALEX_API_KEY is not set in the environment variables",
|
|
@@ -439,3 +532,46 @@ def test_premium_api():
|
|
|
439
532
|
Works().filter(from_updated_date=f"{datetime.datetime.now().year}-01-01").get()
|
|
440
533
|
|
|
441
534
|
pyalex.config.api_key = None
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def test_work_pdf_and_tei_download(tmpdir):
|
|
538
|
+
"""Test downloading PDF and TEI content for a Work.
|
|
539
|
+
|
|
540
|
+
This test verifies that:
|
|
541
|
+
1. A Work object has accessible pdf and tei properties
|
|
542
|
+
2. PDF and TEI objects have correct URLs
|
|
543
|
+
3. PDF and TEI content can be retrieved and downloaded to files
|
|
544
|
+
"""
|
|
545
|
+
|
|
546
|
+
pyalex.config.api_key = os.environ["OPENALEX_API_KEY"]
|
|
547
|
+
|
|
548
|
+
# Get a work
|
|
549
|
+
work = Works()["W4412002745"]
|
|
550
|
+
|
|
551
|
+
# Test that pdf and tei properties return the correct types
|
|
552
|
+
assert work.pdf is not None
|
|
553
|
+
assert work.tei is not None
|
|
554
|
+
|
|
555
|
+
# Test that PDF has a valid URL
|
|
556
|
+
pdf_url = work.pdf.url
|
|
557
|
+
assert pdf_url.endswith(".pdf")
|
|
558
|
+
assert "content.openalex.org" in pdf_url
|
|
559
|
+
assert "W4412002745" in pdf_url
|
|
560
|
+
|
|
561
|
+
# Test that TEI has a valid URL
|
|
562
|
+
tei_url = work.tei.url
|
|
563
|
+
assert "grobid-xml" in tei_url
|
|
564
|
+
assert "content.openalex.org" in tei_url
|
|
565
|
+
assert "W4412002745" in tei_url
|
|
566
|
+
|
|
567
|
+
# Test downloading PDF content
|
|
568
|
+
pdf_path = Path(tmpdir) / "test.pdf"
|
|
569
|
+
work.pdf.download(str(pdf_path))
|
|
570
|
+
assert pdf_path.exists()
|
|
571
|
+
assert pdf_path.stat().st_size > 0
|
|
572
|
+
|
|
573
|
+
# Test downloading TEI content
|
|
574
|
+
tei_path = Path(tmpdir) / "test.xml"
|
|
575
|
+
work.tei.download(str(tei_path))
|
|
576
|
+
assert tei_path.exists()
|
|
577
|
+
assert tei_path.stat().st_size > 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|