pyalex 0.19__py3-none-any.whl → 0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyalex/__init__.py CHANGED
@@ -7,6 +7,8 @@ except ImportError:
7
7
 
8
8
  from pyalex.api import Author
9
9
  from pyalex.api import Authors
10
+ from pyalex.api import Award
11
+ from pyalex.api import Awards
10
12
  from pyalex.api import Concept
11
13
  from pyalex.api import Concepts
12
14
  from pyalex.api import Domain
@@ -18,6 +20,8 @@ from pyalex.api import Funders
18
20
  from pyalex.api import Institution
19
21
  from pyalex.api import Institutions
20
22
  from pyalex.api import Journals
23
+ from pyalex.api import Keyword
24
+ from pyalex.api import Keywords
21
25
  from pyalex.api import OpenAlexResponseList
22
26
  from pyalex.api import People
23
27
  from pyalex.api import Publisher
@@ -35,6 +39,8 @@ from pyalex.api import config
35
39
  from pyalex.api import invert_abstract
36
40
 
37
41
  __all__ = [
42
+ "Award",
43
+ "Awards",
38
44
  "Works",
39
45
  "Work",
40
46
  "Authors",
@@ -57,6 +63,8 @@ __all__ = [
57
63
  "Subfield",
58
64
  "Topics",
59
65
  "Topic",
66
+ "Keywords",
67
+ "Keyword",
60
68
  "People",
61
69
  "Journals",
62
70
  "autocomplete",
pyalex/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.19'
32
- __version_tuple__ = version_tuple = (0, 19)
31
+ __version__ = version = '0.20'
32
+ __version_tuple__ = version_tuple = (0, 20)
33
33
 
34
34
  __commit_id__ = commit_id = None
pyalex/api.py CHANGED
@@ -874,9 +874,88 @@ class BaseOpenAlex:
874
874
  return resp_list
875
875
 
876
876
 
877
+ class BaseContent:
878
+ """Class representing content in OpenAlex."""
879
+
880
+ def __init__(self, key):
881
+ self.key = key
882
+
883
+ def __repr__(self):
884
+ return f"Content(key='{self.key}')"
885
+
886
+ @property
887
+ def url(self):
888
+ """Get the URL for the content.
889
+
890
+ Returns
891
+ -------
892
+ str
893
+ URL for the content.
894
+ """
895
+ return f"https://content.openalex.org/works/{self.key}"
896
+
897
+ def get(self):
898
+ """Get the content
899
+
900
+ Returns
901
+ -------
902
+ bytes
903
+ Content of the request.
904
+ """
905
+ content_url = f"https://content.openalex.org/works/{self.key}"
906
+
907
+ res = _get_requests_session().get(
908
+ content_url, auth=OpenAlexAuth(config), allow_redirects=True
909
+ )
910
+ res.raise_for_status()
911
+ return res.content
912
+
913
+ def download(self, filepath):
914
+ """Download the content to a file.
915
+
916
+ Parameters
917
+ ----------
918
+ filepath : str
919
+ Path to save the content.
920
+ """
921
+
922
+ with open(filepath, "wb") as f:
923
+ f.write(self.get())
924
+
925
+
877
926
  # The API
878
927
 
879
928
 
929
+ class PDF(BaseContent):
930
+ """Class representing a PDF content in OpenAlex."""
931
+
932
+ @property
933
+ def url(self):
934
+ """Get the URL for the content.
935
+
936
+ Returns
937
+ -------
938
+ str
939
+ URL for the content.
940
+ """
941
+ return f"https://content.openalex.org/works/{self.key}.pdf"
942
+
943
+
944
+ class TEI(BaseContent):
945
+ """Class representing a TEI content in OpenAlex."""
946
+
947
+ @property
948
+ def url(self):
949
+ """Get the URL for the content.
950
+
951
+ Returns
952
+ -------
953
+ str
954
+ URL for the content.
955
+ """
956
+ return f"https://content.openalex.org/works/{self.key}.grobid-xml"
957
+
958
+
880
959
  class Work(OpenAlexEntity):
881
960
  """Class representing a work entity in OpenAlex."""
882
961
 
@@ -918,6 +997,28 @@ class Work(OpenAlexEntity):
918
997
  else:
919
998
  return resp_list
920
999
 
1000
+ @property
1001
+ def pdf(self):
1002
+ """Get the PDF content for the work.
1003
+
1004
+ Returns
1005
+ -------
1006
+ PDF
1007
+ PDF content object.
1008
+ """
1009
+ return PDF(self["id"].split("/")[-1])
1010
+
1011
+ @property
1012
+ def tei(self):
1013
+ """Get the TEI content for the work.
1014
+
1015
+ Returns
1016
+ -------
1017
+ TEI
1018
+ TEI content object.
1019
+ """
1020
+ return TEI(self["id"].split("/")[-1])
1021
+
921
1022
 
922
1023
  class Works(BaseOpenAlex):
923
1024
  """Class representing a collection of work entities in OpenAlex."""
@@ -1033,6 +1134,30 @@ class Funders(BaseOpenAlex):
1033
1134
  resource_class = Funder
1034
1135
 
1035
1136
 
1137
+ class Award(OpenAlexEntity):
1138
+ """Class representing an award entity in OpenAlex."""
1139
+
1140
+ pass
1141
+
1142
+
1143
+ class Awards(BaseOpenAlex):
1144
+ """Class representing a collection of award entities in OpenAlex."""
1145
+
1146
+ resource_class = Award
1147
+
1148
+
1149
+ class Keyword(OpenAlexEntity):
1150
+ """Class representing a keyword entity in OpenAlex."""
1151
+
1152
+ pass
1153
+
1154
+
1155
+ class Keywords(BaseOpenAlex):
1156
+ """Class representing a collection of keyword entities in OpenAlex."""
1157
+
1158
+ resource_class = Keyword
1159
+
1160
+
1036
1161
  class Autocomplete(OpenAlexEntity):
1037
1162
  """Class representing an autocomplete entity in OpenAlex."""
1038
1163
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyalex
3
- Version: 0.19
3
+ Version: 0.20
4
4
  Summary: Python interface to the OpenAlex database
5
5
  Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
6
6
  License: MIT
@@ -39,7 +39,18 @@ institutions, and more. OpenAlex offers a robust, open, and free [REST API](http
39
39
  PyAlex is a lightweight and thin Python interface to this API. PyAlex tries to
40
40
  stay as close as possible to the design of the original service.
41
41
 
42
- The following features of OpenAlex are currently supported by PyAlex:
42
+ The following entities of OpenAlex are currently supported by PyAlex:
43
+
44
+ - [x] Work
45
+ - [x] Author
46
+ - [x] Source
47
+ - [x] Institution
48
+ - [x] Concept
49
+ - [x] Topic
50
+ - [x] Publisher
51
+ - [x] Funder
52
+
53
+ Including the following functionality:
43
54
 
44
55
  - [x] Get single entities
45
56
  - [x] Filter entities
@@ -50,7 +61,7 @@ The following features of OpenAlex are currently supported by PyAlex:
50
61
  - [x] Sample
51
62
  - [x] Pagination
52
63
  - [x] Autocomplete endpoint
53
- - [x] N-grams
64
+ - [x] N-grams [Deprecated by OpenAlex]
54
65
  - [x] Authentication
55
66
 
56
67
  We aim to cover the entire API, and we are looking for help. We are welcoming Pull Requests.
@@ -59,6 +70,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
59
70
 
60
71
  - **Pipe operations** - PyAlex can handle multiple operations in a sequence. This allows the developer to write understandable queries. For examples, see [code snippets](#code-snippets).
61
72
  - **Plaintext abstracts** - OpenAlex [doesn't include plaintext abstracts](https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index) due to legal constraints. PyAlex can convert the inverted abstracts into [plaintext abstracts on the fly](#get-abstract).
73
+ - **Fetch content in PDF and TEI format** - Retrieve full-text content from OpenAlex in PDF or TEI XML formats. See [fetching content](#fetch-content-in-pdf-and-tei-format).
62
74
  - **Permissive license** - OpenAlex data is CC0 licensed :raised_hands:. PyAlex is published under the MIT license.
63
75
 
64
76
  ## Installation
@@ -74,40 +86,51 @@ pip install pyalex
74
86
  PyAlex offers support for all [Entity Objects](https://docs.openalex.org/api-entities/entities-overview): [Works](https://docs.openalex.org/api-entities/works), [Authors](https://docs.openalex.org/api-entities/authors), [Sources](https://docs.openalex.org/api-entities/sourcese), [Institutions](https://docs.openalex.org/api-entities/institutions), [Topics](https://docs.openalex.org/api-entities/topics), [Publishers](https://docs.openalex.org/api-entities/publishers), and [Funders](https://docs.openalex.org/api-entities/funders).
75
87
 
76
88
  ```python
77
- from pyalex import Works, Authors, Sources, Institutions, Topics, Publishers, Funders
89
+ from pyalex import (
90
+ Works,
91
+ Authors,
92
+ Sources,
93
+ Institutions,
94
+ Topics,
95
+ Keywords,
96
+ Publishers,
97
+ Funders,
98
+ Awards,
99
+ Concepts,
100
+ )
78
101
  ```
79
102
 
80
- ### The polite pool
103
+ ### Rate limits and authentication [Changed!]
81
104
 
82
- [The polite pool](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication#the-polite-pool) has much
83
- faster and more consistent response times. To get into the polite pool, you
84
- set your email:
105
+ **⚠️ API Key Required**: Starting February 13, 2026, an API key is **required** to use the OpenAlex API. API keys are free!
85
106
 
86
- ```python
87
- import pyalex
107
+ The OpenAlex API uses a credit-based rate limiting system. Different endpoint types consume different amounts of credits per request:
88
108
 
89
- pyalex.config.email = "mail@example.com"
90
- ```
109
+ - **Without API key**: 100 credits per day (testing/demos only)
110
+ - **With free API key**: 100,000 credits per day
111
+ - **Singleton requests** (e.g., `/works/W123`): Free (0 credits)
112
+ - **List requests** (e.g., `/works?filter=...`): 1 credit each
91
113
 
92
- ### Max retries
114
+ All users are limited to a maximum of 100 requests per second.
93
115
 
94
- By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
116
+ #### Get an API Key
117
+
118
+ 1. Create a free account at [openalex.org](https://openalex.org/)
119
+ 2. Go to [openalex.org/settings/api](https://openalex.org/settings/api) to get your API key
120
+ 3. Configure PyAlex with your key:
95
121
 
96
122
  ```python
97
- from pyalex import config
123
+ import pyalex
98
124
 
99
- config.max_retries = 0
100
- config.retry_backoff_factor = 0.1
101
- config.retry_http_codes = [429, 500, 503]
125
+ pyalex.config.api_key = "<YOUR_API_KEY>"
102
126
  ```
103
127
 
104
- ### Standards
128
+ For more information, see the [OpenAlex Rate limits and authentication documentation](https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication).
105
129
 
106
- OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
107
130
 
108
131
  ### Get single entity
109
132
 
110
- Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or Funder from OpenAlex by the
133
+ Get a single Work, Author, Source, Institution, Concept, Topic, Publisher, Funders or Awards from OpenAlex by the
111
134
  OpenAlex ID, or by DOI or ROR.
112
135
 
113
136
  ```python
@@ -172,6 +195,55 @@ w["abstract"]
172
195
 
173
196
  Please respect the legal constraints when using this feature.
174
197
 
198
+ #### Fetch content in PDF and TEI format
199
+
200
+ OpenAlex reference: [Get content](https://docs.openalex.org/how-to-use-the-api/get-content)
201
+
202
+ Only for Works. Retrieve the full-text content of a work in PDF or TEI (Text Encoding Initiative) XML format, if available.
203
+
204
+ ```python
205
+ from pyalex import Works
206
+
207
+ # Get a work
208
+ w = Works()["W4412002745"]
209
+
210
+ # Access the PDF content
211
+ pdf_content = w.pdf.get()
212
+
213
+ # Or access the TEI content
214
+ tei_content = w.tei.get()
215
+ ```
216
+
217
+ You can also download the content directly to a file:
218
+
219
+ ```python
220
+ from pyalex import Works
221
+
222
+ w = Works()["W4412002745"]
223
+
224
+ # Download PDF to a file
225
+ w.pdf.download("document.pdf")
226
+
227
+ # Download TEI to a file
228
+ w.tei.download("document.xml")
229
+ ```
230
+
231
+ You can also get the URL of the content without downloading it:
232
+
233
+ ```python
234
+ from pyalex import Works
235
+
236
+ w = Works()["W4412002745"]
237
+
238
+ # Get the URL of the PDF
239
+ pdf_url = w.pdf.url
240
+
241
+ # Get the URL of the TEI
242
+ tei_url = w.tei.url
243
+ ```
244
+
245
+ Note: Content availability depends on the publisher's open access policies and licensing agreements.
246
+
175
247
  ### Get lists of entities
176
248
 
177
249
  ```python
@@ -420,6 +492,10 @@ with open(Path("works.json")) as f:
420
492
  works = [Work(w) for w in json.load(f)]
421
493
  ```
422
494
 
495
+ ## Standards
496
+
497
+ OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
498
+
423
499
  ## Code snippets
424
500
 
425
501
  A list of awesome use cases of the OpenAlex dataset.
@@ -498,20 +574,21 @@ Works() \
498
574
 
499
575
  ```
500
576
 
501
- ## Experimental
502
577
 
503
- ### Authentication
578
+ ## Troubleshooting
504
579
 
505
- OpenAlex experiments with authenticated requests at the moment. Authenticate your requests with
580
+ ### Max retries
581
+
582
+ By default, PyAlex will raise an error at the first failure when querying the OpenAlex API. You can set `max_retries` to a number higher than 0 to allow PyAlex to retry when an error occurs. `retry_backoff_factor` is related to the delay between two retry, and `retry_http_codes` are the HTTP error codes that should trigger a retry.
506
583
 
507
584
  ```python
508
- import pyalex
585
+ from pyalex import config
509
586
 
510
- pyalex.config.api_key = "<MY_KEY>"
587
+ config.max_retries = 0
588
+ config.retry_backoff_factor = 0.1
589
+ config.retry_http_codes = [429, 500, 503]
511
590
  ```
512
591
 
513
- If you configure an invalid API key all requests to OpenAlex will fail.
514
-
515
592
  ## Alternatives
516
593
 
517
594
  R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
@@ -0,0 +1,8 @@
1
+ pyalex/__init__.py,sha256=WlhyaRF8dXjCM7jzr9kiEcXMYLtD33lcXkZi-WNdcC0,1719
2
+ pyalex/_version.py,sha256=p-gqOONSPfi625BzXHIXfHuTTii-Zx-jV6poH7i3Jb8,701
3
+ pyalex/api.py,sha256=szLCHR3xTZrOaO45Fq1ShQk1nNdDB3OdgpIoyTXOyLY,30383
4
+ pyalex-0.20.dist-info/licenses/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
5
+ pyalex-0.20.dist-info/METADATA,sha256=YXv_LXlaV5vEF7qrge1Ln28pFx_IqVoMmixFcce5_mU,18133
6
+ pyalex-0.20.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
+ pyalex-0.20.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
8
+ pyalex-0.20.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,8 +0,0 @@
1
- pyalex/__init__.py,sha256=upMXti6aJF6lz8J4EbdnQa13GhJzFGre7fnS_tj8NOw,1539
2
- pyalex/_version.py,sha256=lc7e3Va7b6LwW9-6o0HxJBuB54_pZOZmQ9I6QHKL8AQ,701
3
- pyalex/api.py,sha256=_g42dO3hkeKUQR3uXjLcAPAj3zxuT190Z2_jyH3p3Es,27769
4
- pyalex-0.19.dist-info/licenses/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
5
- pyalex-0.19.dist-info/METADATA,sha256=mawFYplV4A8N7p4lR0zVHGFT2ZRRNZ16bcVDUSX9ddU,16197
6
- pyalex-0.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- pyalex-0.19.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
8
- pyalex-0.19.dist-info/RECORD,,