pyalex 0.19__py3-none-any.whl → 0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyalex/__init__.py +8 -0
- pyalex/_version.py +2 -2
- pyalex/api.py +125 -0
- {pyalex-0.19.dist-info → pyalex-0.20.dist-info}/METADATA +105 -28
- pyalex-0.20.dist-info/RECORD +8 -0
- {pyalex-0.19.dist-info → pyalex-0.20.dist-info}/WHEEL +1 -1
- pyalex-0.19.dist-info/RECORD +0 -8
- {pyalex-0.19.dist-info → pyalex-0.20.dist-info}/licenses/LICENSE +0 -0
- {pyalex-0.19.dist-info → pyalex-0.20.dist-info}/top_level.txt +0 -0
pyalex/__init__.py
CHANGED
|
@@ -7,6 +7,8 @@ except ImportError:
|
|
|
7
7
|
|
|
8
8
|
from pyalex.api import Author
|
|
9
9
|
from pyalex.api import Authors
|
|
10
|
+
from pyalex.api import Award
|
|
11
|
+
from pyalex.api import Awards
|
|
10
12
|
from pyalex.api import Concept
|
|
11
13
|
from pyalex.api import Concepts
|
|
12
14
|
from pyalex.api import Domain
|
|
@@ -18,6 +20,8 @@ from pyalex.api import Funders
|
|
|
18
20
|
from pyalex.api import Institution
|
|
19
21
|
from pyalex.api import Institutions
|
|
20
22
|
from pyalex.api import Journals
|
|
23
|
+
from pyalex.api import Keyword
|
|
24
|
+
from pyalex.api import Keywords
|
|
21
25
|
from pyalex.api import OpenAlexResponseList
|
|
22
26
|
from pyalex.api import People
|
|
23
27
|
from pyalex.api import Publisher
|
|
@@ -35,6 +39,8 @@ from pyalex.api import config
|
|
|
35
39
|
from pyalex.api import invert_abstract
|
|
36
40
|
|
|
37
41
|
__all__ = [
|
|
42
|
+
"Award",
|
|
43
|
+
"Awards",
|
|
38
44
|
"Works",
|
|
39
45
|
"Work",
|
|
40
46
|
"Authors",
|
|
@@ -57,6 +63,8 @@ __all__ = [
|
|
|
57
63
|
"Subfield",
|
|
58
64
|
"Topics",
|
|
59
65
|
"Topic",
|
|
66
|
+
"Keywords",
|
|
67
|
+
"Keyword",
|
|
60
68
|
"People",
|
|
61
69
|
"Journals",
|
|
62
70
|
"autocomplete",
|
pyalex/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.20'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 20)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
pyalex/api.py
CHANGED
|
@@ -874,9 +874,88 @@ class BaseOpenAlex:
|
|
|
874
874
|
return resp_list
|
|
875
875
|
|
|
876
876
|
|
|
877
|
+
class BaseContent:
|
|
878
|
+
"""Class representing content in OpenAlex."""
|
|
879
|
+
|
|
880
|
+
def __init__(self, key):
|
|
881
|
+
self.key = key
|
|
882
|
+
|
|
883
|
+
def __repr__(self):
|
|
884
|
+
return f"Content(key='{self.key}')"
|
|
885
|
+
|
|
886
|
+
@property
|
|
887
|
+
def url(self):
|
|
888
|
+
"""Get the URL for the content.
|
|
889
|
+
|
|
890
|
+
Returns
|
|
891
|
+
-------
|
|
892
|
+
str
|
|
893
|
+
URL for the content.
|
|
894
|
+
"""
|
|
895
|
+
return f"https://content.openalex.org/works/{self.key}"
|
|
896
|
+
|
|
897
|
+
def get(self):
|
|
898
|
+
"""Get the content
|
|
899
|
+
|
|
900
|
+
Returns
|
|
901
|
+
-------
|
|
902
|
+
bytes
|
|
903
|
+
Content of the request.
|
|
904
|
+
"""
|
|
905
|
+
content_url = f"https://content.openalex.org/works/{self.key}"
|
|
906
|
+
|
|
907
|
+
res = _get_requests_session().get(
|
|
908
|
+
content_url, auth=OpenAlexAuth(config), allow_redirects=True
|
|
909
|
+
)
|
|
910
|
+
res.raise_for_status()
|
|
911
|
+
return res.content
|
|
912
|
+
|
|
913
|
+
def download(self, filepath):
|
|
914
|
+
"""Download the content to a file.
|
|
915
|
+
|
|
916
|
+
Parameters
|
|
917
|
+
----------
|
|
918
|
+
filepath : str
|
|
919
|
+
Path to save the content.
|
|
920
|
+
"""
|
|
921
|
+
|
|
922
|
+
with open(filepath, "wb") as f:
|
|
923
|
+
f.write(self.get())
|
|
924
|
+
|
|
925
|
+
|
|
877
926
|
# The API
|
|
878
927
|
|
|
879
928
|
|
|
929
|
+
class PDF(BaseContent):
|
|
930
|
+
"""Class representing a PDF content in OpenAlex."""
|
|
931
|
+
|
|
932
|
+
@property
|
|
933
|
+
def url(self):
|
|
934
|
+
"""Get the URL for the content.
|
|
935
|
+
|
|
936
|
+
Returns
|
|
937
|
+
-------
|
|
938
|
+
str
|
|
939
|
+
URL for the content.
|
|
940
|
+
"""
|
|
941
|
+
return f"https://content.openalex.org/works/{self.key}.pdf"
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
class TEI(BaseContent):
|
|
945
|
+
"""Class representing a TEI content in OpenAlex."""
|
|
946
|
+
|
|
947
|
+
@property
|
|
948
|
+
def url(self):
|
|
949
|
+
"""Get the URL for the content.
|
|
950
|
+
|
|
951
|
+
Returns
|
|
952
|
+
-------
|
|
953
|
+
str
|
|
954
|
+
URL for the content.
|
|
955
|
+
"""
|
|
956
|
+
return f"https://content.openalex.org/works/{self.key}.grobid-xml"
|
|
957
|
+
|
|
958
|
+
|
|
880
959
|
class Work(OpenAlexEntity):
|
|
881
960
|
"""Class representing a work entity in OpenAlex."""
|
|
882
961
|
|
|
@@ -918,6 +997,28 @@ class Work(OpenAlexEntity):
|
|
|
918
997
|
else:
|
|
919
998
|
return resp_list
|
|
920
999
|
|
|
1000
|
+
@property
|
|
1001
|
+
def pdf(self):
|
|
1002
|
+
"""Get the PDF content for the work.
|
|
1003
|
+
|
|
1004
|
+
Returns
|
|
1005
|
+
-------
|
|
1006
|
+
PDF
|
|
1007
|
+
PDF content object.
|
|
1008
|
+
"""
|
|
1009
|
+
return PDF(self["id"].split("/")[-1])
|
|
1010
|
+
|
|
1011
|
+
@property
|
|
1012
|
+
def tei(self):
|
|
1013
|
+
"""Get the TEI content for the work.
|
|
1014
|
+
|
|
1015
|
+
Returns
|
|
1016
|
+
-------
|
|
1017
|
+
TEI
|
|
1018
|
+
TEI content object.
|
|
1019
|
+
"""
|
|
1020
|
+
return TEI(self["id"].split("/")[-1])
|
|
1021
|
+
|
|
921
1022
|
|
|
922
1023
|
class Works(BaseOpenAlex):
|
|
923
1024
|
"""Class representing a collection of work entities in OpenAlex."""
|
|
@@ -1033,6 +1134,30 @@ class Funders(BaseOpenAlex):
|
|
|
1033
1134
|
resource_class = Funder
|
|
1034
1135
|
|
|
1035
1136
|
|
|
1137
|
+
class Award(OpenAlexEntity):
|
|
1138
|
+
"""Class representing an award entity in OpenAlex."""
|
|
1139
|
+
|
|
1140
|
+
pass
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
class Awards(BaseOpenAlex):
|
|
1144
|
+
"""Class representing a collection of award entities in OpenAlex."""
|
|
1145
|
+
|
|
1146
|
+
resource_class = Award
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
class Keyword(OpenAlexEntity):
|
|
1150
|
+
"""Class representing a keyword entity in OpenAlex."""
|
|
1151
|
+
|
|
1152
|
+
pass
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
class Keywords(BaseOpenAlex):
|
|
1156
|
+
"""Class representing a collection of keyword entities in OpenAlex."""
|
|
1157
|
+
|
|
1158
|
+
resource_class = Keyword
|
|
1159
|
+
|
|
1160
|
+
|
|
1036
1161
|
class Autocomplete(OpenAlexEntity):
|
|
1037
1162
|
"""Class representing an autocomplete entity in OpenAlex."""
|
|
1038
1163
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -39,7 +39,18 @@ institutions, and more. OpenAlex offers a robust, open, and free [REST API](http
|
|
|
39
39
|
PyAlex is a lightweight and thin Python interface to this API. PyAlex tries to
|
|
40
40
|
stay as close as possible to the design of the original service.
|
|
41
41
|
|
|
42
|
-
The following
|
|
42
|
+
The following entities of OpenAlex are currently supported by PyAlex:
|
|
43
|
+
|
|
44
|
+
- [x] Work
|
|
45
|
+
- [x] Author
|
|
46
|
+
- [x] Source
|
|
47
|
+
- [x] Institution
|
|
48
|
+
- [x] Concept
|
|
49
|
+
- [x] Topic
|
|
50
|
+
- [x] Publisher
|
|
51
|
+
- [x] Funder
|
|
52
|
+
|
|
53
|
+
Including the following functionality:
|
|
43
54
|
|
|
44
55
|
- [x] Get single entities
|
|
45
56
|
- [x] Filter entities
|
|
@@ -50,7 +61,7 @@ The following features of OpenAlex are currently supported by PyAlex:
|
|
|
50
61
|
- [x] Sample
|
|
51
62
|
- [x] Pagination
|
|
52
63
|
- [x] Autocomplete endpoint
|
|
53
|
-
- [x] N-grams
|
|
64
|
+
- [x] N-grams [Deprecated by OpenAlex]
|
|
54
65
|
- [x] Authentication
|
|
55
66
|
|
|
56
67
|
We aim to cover the entire API, and we are looking for help. We are welcoming Pull Requests.
|
|
@@ -59,6 +70,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
|
|
|
59
70
|
|
|
60
71
|
- **Pipe operations** - PyAlex can handle multiple operations in a sequence. This allows the developer to write understandable queries. For examples, see [code snippets](#code-snippets).
|
|
61
72
|
- **Plaintext abstracts** - OpenAlex [doesn't include plaintext abstracts](https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index) due to legal constraints. PyAlex can convert the inverted abstracts into [plaintext abstracts on the fly](#get-abstract).
|
|
73
|
+
- **Fetch content in PDF and TEI format** - Retrieve full-text content from OpenAlex in PDF or TEI XML formats. See [fetching content](#fetch-content-in-pdf-and-tei-format).
|
|
62
74
|
- **Permissive license** - OpenAlex data is CC0 licensed :raised_hands:. PyAlex is published under the MIT license.
|
|
63
75
|
|
|
64
76
|
## Installation
|
|
@@ -74,40 +86,51 @@ pip install pyalex
|
|
|
74
86
|
PyAlex offers support for all [Entity Objects](https://docs.openalex.org/api-entities/entities-overview): [Works](https://docs.openalex.org/api-entities/works), [Authors](https://docs.openalex.org/api-entities/authors), [Sources](https://docs.openalex.org/api-entities/sourcese), [Institutions](https://docs.openalex.org/api-entities/institutions), [Topics](https://docs.openalex.org/api-entities/topics), [Publishers](https://docs.openalex.org/api-entities/publishers), and [Funders](https://docs.openalex.org/api-entities/funders).
|
|
75
87
|
|
|
76
88
|
```python
|
|
77
|
-
from pyalex import
|
|
89
|
+
from pyalex import (
|
|
90
|
+
Works,
|
|
91
|
+
Authors,
|
|
92
|
+
Sources,
|
|
93
|
+
Institutions,
|
|
94
|
+
Topics,
|
|
95
|
+
Keywords,
|
|
96
|
+
Publishers,
|
|
97
|
+
Funders,
|
|
98
|
+
Awards,
|
|
99
|
+
Concepts,
|
|
100
|
+
)
|
|
78
101
|
```
|
|
79
102
|
|
|
80
|
-
###
|
|
103
|
+
### Rate limits and authentication [Changed!]
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
faster and more consistent response times. To get into the polite pool, you
|
|
84
|
-
set your email:
|
|
105
|
+
**⚠️ API Key Required**: Starting February 13, 2026, an API key is **required** to use the OpenAlex API. API keys are free!
|
|
85
106
|
|
|
86
|
-
|
|
87
|
-
import pyalex
|
|
107
|
+
The OpenAlex API uses a credit-based rate limiting system. Different endpoint types consume different amounts of credits per request:
|
|
88
108
|
|
|
89
|
-
|
|
90
|
-
|
|
109
|
+
- **Without API key**: 100 credits per day (testing/demos only)
|
|
110
|
+
- **With free API key**: 100,000 credits per day
|
|
111
|
+
- **Singleton requests** (e.g., `/works/W123`): Free (0 credits)
|
|
112
|
+
- **List requests** (e.g., `/works?filter=...`): 1 credit each
|
|
91
113
|
|
|
92
|
-
|
|
114
|
+
All users are limited to a maximum of 100 requests per second.
|
|
93
115
|
|
|
94
|
-
|
|
116
|
+
#### Get an API Key
|
|
117
|
+
|
|
118
|
+
1. Create a free account at [openalex.org](https://openalex.org/)
|
|
119
|
+
2. Go to [openalex.org/settings/api](https://openalex.org/settings/api) to get your API key
|
|
120
|
+
3. Configure PyAlex with your key:
|
|
95
121
|
|
|
96
122
|
```python
|
|
97
|
-
|
|
123
|
+
import pyalex
|
|
98
124
|
|
|
99
|
-
config.
|
|
100
|
-
config.retry_backoff_factor = 0.1
|
|
101
|
-
config.retry_http_codes = [429, 500, 503]
|
|
125
|
+
pyalex.config.api_key = "<YOUR_API_KEY>"
|
|
102
126
|
```
|
|
103
127
|
|
|
104
|
-
|
|
128
|
+
For more information, see the [OpenAlex Rate limits and authentication documentation](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication).
|
|
105
129
|
|
|
106
|
-
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
130
|
|
|
108
131
|
### Get single entity
|
|
109
132
|
|
|
110
|
-
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or
|
|
133
|
+
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher, Funders or Awards from OpenAlex by the
|
|
111
134
|
OpenAlex ID, or by DOI or ROR.
|
|
112
135
|
|
|
113
136
|
```python
|
|
@@ -172,6 +195,55 @@ w["abstract"]
|
|
|
172
195
|
|
|
173
196
|
Please respect the legal constraints when using this feature.
|
|
174
197
|
|
|
198
|
+
#### Fetch content in PDF and TEI format
|
|
199
|
+
|
|
200
|
+
OpenAlex reference: [Get content](https://docs.openalex.org/how-to-use-the-api/get-content)
|
|
201
|
+
|
|
202
|
+
Only for Works. Retrieve the full-text content of a work in PDF or TEI (Text Encoding Initiative) XML format, if available.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from pyalex import Works
|
|
206
|
+
|
|
207
|
+
# Get a work
|
|
208
|
+
w = Works()["W4412002745"]
|
|
209
|
+
|
|
210
|
+
# Access the PDF content
|
|
211
|
+
pdf_content = w.pdf.get()
|
|
212
|
+
|
|
213
|
+
# Or access the TEI content
|
|
214
|
+
tei_content = w.tei.get()
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
You can also download the content directly to a file:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from pyalex import Works
|
|
221
|
+
|
|
222
|
+
w = Works()["W4412002745"]
|
|
223
|
+
|
|
224
|
+
# Download PDF to a file
|
|
225
|
+
w.pdf.download("document.pdf")
|
|
226
|
+
|
|
227
|
+
# Download TEI to a file
|
|
228
|
+
w.tei.download("document.xml")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
You can also get the URL of the content without downloading it:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from pyalex import Works
|
|
235
|
+
|
|
236
|
+
w = Works()["W4412002745"]
|
|
237
|
+
|
|
238
|
+
# Get the URL of the PDF
|
|
239
|
+
pdf_url = w.pdf.url
|
|
240
|
+
|
|
241
|
+
# Get the URL of the TEI
|
|
242
|
+
tei_url = w.tei.url
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Note: Content availability depends on the publisher's open access policies and licensing agreements.
|
|
246
|
+
|
|
175
247
|
### Get lists of entities
|
|
176
248
|
|
|
177
249
|
```python
|
|
@@ -420,6 +492,10 @@ with open(Path("works.json")) as f:
|
|
|
420
492
|
works = [Work(w) for w in json.load(f)]
|
|
421
493
|
```
|
|
422
494
|
|
|
495
|
+
## Standards
|
|
496
|
+
|
|
497
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
498
|
+
|
|
423
499
|
## Code snippets
|
|
424
500
|
|
|
425
501
|
A list of awesome use cases of the OpenAlex dataset.
|
|
@@ -498,20 +574,21 @@ Works() \
|
|
|
498
574
|
|
|
499
575
|
```
|
|
500
576
|
|
|
501
|
-
## Experimental
|
|
502
577
|
|
|
503
|
-
|
|
578
|
+
## Troubleshooting
|
|
504
579
|
|
|
505
|
-
|
|
580
|
+
### Max retries
|
|
581
|
+
|
|
582
|
+
By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
|
|
506
583
|
|
|
507
584
|
```python
|
|
508
|
-
import
|
|
585
|
+
from pyalex import config
|
|
509
586
|
|
|
510
|
-
|
|
587
|
+
config.max_retries = 0
|
|
588
|
+
config.retry_backoff_factor = 0.1
|
|
589
|
+
config.retry_http_codes = [429, 500, 503]
|
|
511
590
|
```
|
|
512
591
|
|
|
513
|
-
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
-
|
|
515
592
|
## Alternatives
|
|
516
593
|
|
|
517
594
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pyalex/__init__.py,sha256=WlhyaRF8dXjCM7jzr9kiEcXMYLtD33lcXkZi-WNdcC0,1719
|
|
2
|
+
pyalex/_version.py,sha256=p-gqOONSPfi625BzXHIXfHuTTii-Zx-jV6poH7i3Jb8,701
|
|
3
|
+
pyalex/api.py,sha256=szLCHR3xTZrOaO45Fq1ShQk1nNdDB3OdgpIoyTXOyLY,30383
|
|
4
|
+
pyalex-0.20.dist-info/licenses/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
+
pyalex-0.20.dist-info/METADATA,sha256=YXv_LXlaV5vEF7qrge1Ln28pFx_IqVoMmixFcce5_mU,18133
|
|
6
|
+
pyalex-0.20.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
+
pyalex-0.20.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
+
pyalex-0.20.dist-info/RECORD,,
|
pyalex-0.19.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
pyalex/__init__.py,sha256=upMXti6aJF6lz8J4EbdnQa13GhJzFGre7fnS_tj8NOw,1539
|
|
2
|
-
pyalex/_version.py,sha256=lc7e3Va7b6LwW9-6o0HxJBuB54_pZOZmQ9I6QHKL8AQ,701
|
|
3
|
-
pyalex/api.py,sha256=_g42dO3hkeKUQR3uXjLcAPAj3zxuT190Z2_jyH3p3Es,27769
|
|
4
|
-
pyalex-0.19.dist-info/licenses/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
-
pyalex-0.19.dist-info/METADATA,sha256=mawFYplV4A8N7p4lR0zVHGFT2ZRRNZ16bcVDUSX9ddU,16197
|
|
6
|
-
pyalex-0.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
-
pyalex-0.19.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
-
pyalex-0.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|