pyalex 0.11__py3-none-any.whl → 0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyalex/_version.py +14 -2
- pyalex/api.py +172 -145
- {pyalex-0.11.dist-info → pyalex-0.13.dist-info}/METADATA +43 -15
- pyalex-0.13.dist-info/RECORD +8 -0
- {pyalex-0.11.dist-info → pyalex-0.13.dist-info}/WHEEL +1 -1
- pyalex-0.11.dist-info/RECORD +0 -8
- {pyalex-0.11.dist-info → pyalex-0.13.dist-info}/LICENSE +0 -0
- {pyalex-0.11.dist-info → pyalex-0.13.dist-info}/top_level.txt +0 -0
pyalex/_version.py
CHANGED
|
@@ -1,4 +1,16 @@
|
|
|
1
1
|
# file generated by setuptools_scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
TYPE_CHECKING = False
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from typing import Tuple, Union
|
|
6
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
+
else:
|
|
8
|
+
VERSION_TUPLE = object
|
|
9
|
+
|
|
10
|
+
version: str
|
|
11
|
+
__version__: str
|
|
12
|
+
__version_tuple__: VERSION_TUPLE
|
|
13
|
+
version_tuple: VERSION_TUPLE
|
|
14
|
+
|
|
15
|
+
__version__ = version = '0.13'
|
|
16
|
+
__version_tuple__ = version_tuple = (0, 13)
|
pyalex/api.py
CHANGED
|
@@ -3,6 +3,8 @@ import warnings
|
|
|
3
3
|
from urllib.parse import quote_plus
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from requests.auth import AuthBase
|
|
7
|
+
from urllib3.util import Retry
|
|
6
8
|
|
|
7
9
|
try:
|
|
8
10
|
from pyalex._version import __version__
|
|
@@ -18,13 +20,19 @@ class AlexConfig(dict):
|
|
|
18
20
|
return super().__setitem__(key, value)
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
config = AlexConfig(
|
|
23
|
+
config = AlexConfig(
|
|
24
|
+
email=None,
|
|
25
|
+
api_key=None,
|
|
26
|
+
user_agent="pyalex/" + __version__,
|
|
27
|
+
openalex_url="https://api.openalex.org",
|
|
28
|
+
max_retries=0,
|
|
29
|
+
retry_backoff_factor=0.1,
|
|
30
|
+
retry_http_codes=[429, 500, 503],
|
|
31
|
+
)
|
|
22
32
|
|
|
23
33
|
|
|
24
34
|
def _flatten_kv(d, prefix=""):
|
|
25
|
-
|
|
26
35
|
if isinstance(d, dict):
|
|
27
|
-
|
|
28
36
|
t = []
|
|
29
37
|
for k, v in d.items():
|
|
30
38
|
if isinstance(v, list):
|
|
@@ -36,7 +44,6 @@ def _flatten_kv(d, prefix=""):
|
|
|
36
44
|
|
|
37
45
|
return ",".join(t)
|
|
38
46
|
else:
|
|
39
|
-
|
|
40
47
|
# workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc
|
|
41
48
|
d = str(d).lower() if isinstance(d, bool) else d
|
|
42
49
|
|
|
@@ -44,7 +51,6 @@ def _flatten_kv(d, prefix=""):
|
|
|
44
51
|
|
|
45
52
|
|
|
46
53
|
def _params_merge(params, add_params):
|
|
47
|
-
|
|
48
54
|
for k, _v in add_params.items():
|
|
49
55
|
if (
|
|
50
56
|
k in params
|
|
@@ -72,8 +78,23 @@ def _params_merge(params, add_params):
|
|
|
72
78
|
params[k] = add_params[k]
|
|
73
79
|
|
|
74
80
|
|
|
75
|
-
def
|
|
81
|
+
def _get_requests_session():
|
|
82
|
+
# create an Requests Session with automatic retry:
|
|
83
|
+
requests_session = requests.Session()
|
|
84
|
+
retries = Retry(
|
|
85
|
+
total=config.max_retries,
|
|
86
|
+
backoff_factor=config.retry_backoff_factor,
|
|
87
|
+
status_forcelist=config.retry_http_codes,
|
|
88
|
+
allowed_methods={"GET"},
|
|
89
|
+
)
|
|
90
|
+
requests_session.mount(
|
|
91
|
+
"https://", requests.adapters.HTTPAdapter(max_retries=retries)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return requests_session
|
|
95
|
+
|
|
76
96
|
|
|
97
|
+
def invert_abstract(inv_index):
|
|
77
98
|
if inv_index is not None:
|
|
78
99
|
l_inv = [(w, p) for w, pos in inv_index.items() for p in pos]
|
|
79
100
|
return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1])))
|
|
@@ -84,126 +105,102 @@ class QueryError(ValueError):
|
|
|
84
105
|
|
|
85
106
|
|
|
86
107
|
class OpenAlexEntity(dict):
|
|
87
|
-
|
|
88
108
|
pass
|
|
89
109
|
|
|
90
110
|
|
|
91
|
-
class
|
|
92
|
-
|
|
111
|
+
class Paginator:
|
|
112
|
+
VALUE_CURSOR_START = "*"
|
|
113
|
+
VALUE_NUMBER_START = 1
|
|
93
114
|
|
|
94
|
-
def
|
|
115
|
+
def __init__(
|
|
116
|
+
self, endpoint_class, method="cursor", value=None, per_page=None, n_max=None
|
|
117
|
+
):
|
|
118
|
+
self.method = method
|
|
119
|
+
self.endpoint_class = endpoint_class
|
|
120
|
+
self.value = value
|
|
121
|
+
self.per_page = per_page
|
|
122
|
+
self.n_max = n_max
|
|
95
123
|
|
|
96
|
-
|
|
97
|
-
return invert_abstract(self["abstract_inverted_index"])
|
|
124
|
+
self._next_value = value
|
|
98
125
|
|
|
99
|
-
|
|
126
|
+
def __iter__(self):
|
|
127
|
+
self.n = 0
|
|
100
128
|
|
|
101
|
-
|
|
129
|
+
return self
|
|
102
130
|
|
|
103
|
-
|
|
131
|
+
def _is_max(self):
|
|
132
|
+
if self.n_max and self.n >= self.n_max:
|
|
133
|
+
return True
|
|
134
|
+
return False
|
|
104
135
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
)
|
|
109
|
-
res.raise_for_status()
|
|
110
|
-
results = res.json()
|
|
136
|
+
def __next__(self):
|
|
137
|
+
if self._next_value is None or self._is_max():
|
|
138
|
+
raise StopIteration
|
|
111
139
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
140
|
+
if self.method == "cursor":
|
|
141
|
+
pagination_params = {"cursor": self._next_value}
|
|
142
|
+
elif self.method == "page":
|
|
143
|
+
pagination_params = {"page": self._next_value}
|
|
115
144
|
else:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class Author(OpenAlexEntity):
|
|
120
|
-
pass
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
class Source(OpenAlexEntity):
|
|
124
|
-
pass
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
class Institution(OpenAlexEntity):
|
|
128
|
-
pass
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
class Concept(OpenAlexEntity):
|
|
132
|
-
pass
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
class Publisher(OpenAlexEntity):
|
|
136
|
-
pass
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
class Funder(OpenAlexEntity):
|
|
140
|
-
pass
|
|
141
|
-
|
|
142
|
-
# deprecated
|
|
145
|
+
raise ValueError()
|
|
143
146
|
|
|
147
|
+
results, meta = self.endpoint_class.get(
|
|
148
|
+
return_meta=True, per_page=self.per_page, **pagination_params
|
|
149
|
+
)
|
|
144
150
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
# warn about deprecation
|
|
148
|
-
warnings.warn(
|
|
149
|
-
"Venue is deprecated. Use Sources instead.",
|
|
150
|
-
DeprecationWarning,
|
|
151
|
-
stacklevel=2,
|
|
152
|
-
)
|
|
151
|
+
if self.method == "cursor":
|
|
152
|
+
self._next_value = meta["next_cursor"]
|
|
153
153
|
|
|
154
|
-
|
|
154
|
+
if self.method == "page":
|
|
155
|
+
if len(results) > 0:
|
|
156
|
+
self._next_value = meta["page"] + 1
|
|
157
|
+
else:
|
|
158
|
+
self._next_value = None
|
|
155
159
|
|
|
160
|
+
self.n = self.n + len(results)
|
|
156
161
|
|
|
157
|
-
|
|
158
|
-
def __init__(self, alex_class=None, per_page=None, cursor="*", n_max=None):
|
|
162
|
+
return results
|
|
159
163
|
|
|
160
|
-
self.alex_class = alex_class
|
|
161
|
-
self.per_page = per_page
|
|
162
|
-
self.cursor = cursor
|
|
163
|
-
self.n_max = n_max
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
class OpenAlexAuth(AuthBase):
|
|
166
|
+
"""OpenAlex auth class based on requests auth
|
|
166
167
|
|
|
167
|
-
|
|
168
|
+
Includes the email, api_key and user-agent headers.
|
|
168
169
|
|
|
169
|
-
|
|
170
|
+
arguments:
|
|
171
|
+
config: an AlexConfig object
|
|
170
172
|
|
|
171
|
-
|
|
173
|
+
"""
|
|
172
174
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
+
def __init__(self, config):
|
|
176
|
+
self.config = config
|
|
175
177
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
178
|
+
def __call__(self, r):
|
|
179
|
+
if self.config.api_key:
|
|
180
|
+
r.headers["Authorization"] = f"Bearer {self.config.api_key}"
|
|
179
181
|
|
|
180
|
-
if
|
|
181
|
-
|
|
182
|
+
if self.config.email:
|
|
183
|
+
r.headers["From"] = self.config.email
|
|
182
184
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
+
if self.config.user_agent:
|
|
186
|
+
r.headers["User-Agent"] = self.config.user_agent
|
|
185
187
|
|
|
186
188
|
return r
|
|
187
189
|
|
|
188
190
|
|
|
189
191
|
class BaseOpenAlex:
|
|
190
|
-
|
|
191
192
|
"""Base class for OpenAlex objects."""
|
|
192
193
|
|
|
193
194
|
def __init__(self, params=None):
|
|
194
|
-
|
|
195
195
|
self.params = params
|
|
196
196
|
|
|
197
197
|
def _get_multi_items(self, record_list):
|
|
198
|
-
|
|
199
198
|
return self.filter(openalex_id="|".join(record_list)).get()
|
|
200
199
|
|
|
201
200
|
def _full_collection_name(self):
|
|
202
|
-
|
|
203
201
|
return config.openalex_url + "/" + self.__class__.__name__.lower()
|
|
204
202
|
|
|
205
203
|
def __getattr__(self, key):
|
|
206
|
-
|
|
207
204
|
if key == "groupby":
|
|
208
205
|
raise AttributeError(
|
|
209
206
|
"Object has no attribute 'groupby'. " "Did you mean 'group_by'?"
|
|
@@ -218,31 +215,20 @@ class BaseOpenAlex:
|
|
|
218
215
|
return getattr(self, key)
|
|
219
216
|
|
|
220
217
|
def __getitem__(self, record_id):
|
|
221
|
-
|
|
222
218
|
if isinstance(record_id, list):
|
|
223
219
|
return self._get_multi_items(record_id)
|
|
224
220
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
res = requests.get(
|
|
228
|
-
url,
|
|
229
|
-
headers={"User-Agent": "pyalex/" + __version__, "email": config.email},
|
|
230
|
-
params=params,
|
|
221
|
+
return self._get_from_url(
|
|
222
|
+
self._full_collection_name() + "/" + record_id, return_meta=False
|
|
231
223
|
)
|
|
232
|
-
res.raise_for_status()
|
|
233
|
-
res_json = res.json()
|
|
234
|
-
|
|
235
|
-
return self.resource_class(res_json)
|
|
236
224
|
|
|
237
225
|
@property
|
|
238
226
|
def url(self):
|
|
239
|
-
|
|
240
227
|
if not self.params:
|
|
241
228
|
return self._full_collection_name()
|
|
242
229
|
|
|
243
230
|
l_params = []
|
|
244
231
|
for k, v in self.params.items():
|
|
245
|
-
|
|
246
232
|
if v is None:
|
|
247
233
|
pass
|
|
248
234
|
elif isinstance(v, list):
|
|
@@ -263,39 +249,29 @@ class BaseOpenAlex:
|
|
|
263
249
|
|
|
264
250
|
return m["count"]
|
|
265
251
|
|
|
266
|
-
def
|
|
267
|
-
|
|
268
|
-
if per_page is not None and (per_page < 1 or per_page > 200):
|
|
269
|
-
raise ValueError("per_page should be a number between 1 and 200.")
|
|
270
|
-
|
|
271
|
-
self._add_params("per-page", per_page)
|
|
272
|
-
self._add_params("page", page)
|
|
273
|
-
self._add_params("cursor", cursor)
|
|
274
|
-
|
|
275
|
-
params = {"api_key": config.api_key} if config.api_key else {}
|
|
276
|
-
res = requests.get(
|
|
277
|
-
self.url,
|
|
278
|
-
headers={"User-Agent": "pyalex/" + __version__, "email": config.email},
|
|
279
|
-
params=params,
|
|
280
|
-
)
|
|
252
|
+
def _get_from_url(self, url, return_meta=False):
|
|
253
|
+
res = _get_requests_session().get(url, auth=OpenAlexAuth(config))
|
|
281
254
|
|
|
282
255
|
# handle query errors
|
|
283
256
|
if res.status_code == 403:
|
|
284
|
-
res_json = res.json()
|
|
285
257
|
if (
|
|
286
|
-
isinstance(
|
|
287
|
-
and "query parameters" in
|
|
258
|
+
isinstance(res.json()["error"], str)
|
|
259
|
+
and "query parameters" in res.json()["error"]
|
|
288
260
|
):
|
|
289
|
-
raise QueryError(
|
|
290
|
-
res.raise_for_status()
|
|
261
|
+
raise QueryError(res.json()["message"])
|
|
291
262
|
|
|
263
|
+
res.raise_for_status()
|
|
292
264
|
res_json = res.json()
|
|
293
265
|
|
|
294
266
|
# group-by or results page
|
|
295
|
-
if "group-by" in self.params:
|
|
267
|
+
if self.params and "group-by" in self.params:
|
|
296
268
|
results = res_json["group_by"]
|
|
297
|
-
|
|
269
|
+
elif "results" in res_json:
|
|
298
270
|
results = [self.resource_class(ent) for ent in res_json["results"]]
|
|
271
|
+
elif "id" in res_json:
|
|
272
|
+
results = self.resource_class(res_json)
|
|
273
|
+
else:
|
|
274
|
+
raise ValueError("Unknown response format")
|
|
299
275
|
|
|
300
276
|
# return result and metadata
|
|
301
277
|
if return_meta:
|
|
@@ -303,31 +279,32 @@ class BaseOpenAlex:
|
|
|
303
279
|
else:
|
|
304
280
|
return results
|
|
305
281
|
|
|
306
|
-
def
|
|
307
|
-
|
|
282
|
+
def get(self, return_meta=False, page=None, per_page=None, cursor=None):
|
|
283
|
+
if per_page is not None and (per_page < 1 or per_page > 200):
|
|
284
|
+
raise ValueError("per_page should be a number between 1 and 200.")
|
|
285
|
+
|
|
286
|
+
self._add_params("per-page", per_page)
|
|
287
|
+
self._add_params("page", page)
|
|
288
|
+
self._add_params("cursor", cursor)
|
|
308
289
|
|
|
309
|
-
|
|
310
|
-
Both methods are supported by PyAlex, although cursor paging seems to be
|
|
311
|
-
easier to implement and less error-prone.
|
|
290
|
+
return self._get_from_url(self.url, return_meta=return_meta)
|
|
312
291
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
292
|
+
def paginate(self, method="cursor", page=1, per_page=None, cursor="*", n_max=10000):
|
|
293
|
+
if method == "cursor":
|
|
294
|
+
value = cursor
|
|
295
|
+
elif method == "page":
|
|
296
|
+
value = page
|
|
297
|
+
else:
|
|
298
|
+
raise ValueError("Method should be 'cursor' or 'page'")
|
|
318
299
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
"""
|
|
323
|
-
return CursorPaginator(self, per_page=per_page, cursor=cursor, n_max=n_max)
|
|
300
|
+
return Paginator(
|
|
301
|
+
self, method=method, value=value, per_page=per_page, n_max=n_max
|
|
302
|
+
)
|
|
324
303
|
|
|
325
304
|
def random(self):
|
|
326
|
-
|
|
327
305
|
return self.__getitem__("random")
|
|
328
306
|
|
|
329
307
|
def _add_params(self, argument, new_params):
|
|
330
|
-
|
|
331
308
|
if self.params is None:
|
|
332
309
|
self.params = {argument: new_params}
|
|
333
310
|
elif argument in self.params and isinstance(self.params[argument], dict):
|
|
@@ -338,74 +315,124 @@ class BaseOpenAlex:
|
|
|
338
315
|
logging.debug("Params updated:", self.params)
|
|
339
316
|
|
|
340
317
|
def filter(self, **kwargs):
|
|
341
|
-
|
|
342
318
|
self._add_params("filter", kwargs)
|
|
343
319
|
return self
|
|
344
320
|
|
|
345
321
|
def search_filter(self, **kwargs):
|
|
346
|
-
|
|
347
322
|
self._add_params("filter", {f"{k}.search": v for k, v in kwargs.items()})
|
|
348
323
|
return self
|
|
349
324
|
|
|
350
325
|
def sort(self, **kwargs):
|
|
351
|
-
|
|
352
326
|
self._add_params("sort", kwargs)
|
|
353
327
|
return self
|
|
354
328
|
|
|
355
329
|
def group_by(self, group_key):
|
|
356
|
-
|
|
357
330
|
self._add_params("group-by", group_key)
|
|
358
331
|
return self
|
|
359
332
|
|
|
360
333
|
def search(self, s):
|
|
361
|
-
|
|
362
334
|
self._add_params("search", s)
|
|
363
335
|
return self
|
|
364
336
|
|
|
365
337
|
def sample(self, n, seed=None):
|
|
366
|
-
|
|
367
338
|
self._add_params("sample", n)
|
|
368
339
|
self._add_params("seed", seed)
|
|
369
340
|
return self
|
|
370
341
|
|
|
371
342
|
def select(self, s):
|
|
372
|
-
|
|
373
343
|
self._add_params("select", s)
|
|
374
344
|
return self
|
|
375
345
|
|
|
376
346
|
|
|
347
|
+
# The API
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class Work(OpenAlexEntity):
|
|
351
|
+
def __getitem__(self, key):
|
|
352
|
+
if key == "abstract":
|
|
353
|
+
return invert_abstract(self["abstract_inverted_index"])
|
|
354
|
+
|
|
355
|
+
return super().__getitem__(key)
|
|
356
|
+
|
|
357
|
+
def ngrams(self, return_meta=False):
|
|
358
|
+
openalex_id = self["id"].split("/")[-1]
|
|
359
|
+
n_gram_url = f"{config.openalex_url}/works/{openalex_id}/ngrams"
|
|
360
|
+
|
|
361
|
+
res = _get_requests_session().get(n_gram_url, auth=OpenAlexAuth(config))
|
|
362
|
+
res.raise_for_status()
|
|
363
|
+
results = res.json()
|
|
364
|
+
|
|
365
|
+
# return result and metadata
|
|
366
|
+
if return_meta:
|
|
367
|
+
return results["ngrams"], results["meta"]
|
|
368
|
+
else:
|
|
369
|
+
return results["ngrams"]
|
|
370
|
+
|
|
371
|
+
|
|
377
372
|
class Works(BaseOpenAlex):
|
|
378
373
|
resource_class = Work
|
|
379
374
|
|
|
380
375
|
|
|
376
|
+
class Author(OpenAlexEntity):
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
|
|
381
380
|
class Authors(BaseOpenAlex):
|
|
382
381
|
resource_class = Author
|
|
383
382
|
|
|
384
383
|
|
|
384
|
+
class Source(OpenAlexEntity):
|
|
385
|
+
pass
|
|
386
|
+
|
|
387
|
+
|
|
385
388
|
class Sources(BaseOpenAlex):
|
|
386
389
|
resource_class = Source
|
|
387
390
|
|
|
388
391
|
|
|
392
|
+
class Institution(OpenAlexEntity):
|
|
393
|
+
pass
|
|
394
|
+
|
|
395
|
+
|
|
389
396
|
class Institutions(BaseOpenAlex):
|
|
390
397
|
resource_class = Institution
|
|
391
398
|
|
|
392
399
|
|
|
400
|
+
class Concept(OpenAlexEntity):
|
|
401
|
+
pass
|
|
402
|
+
|
|
403
|
+
|
|
393
404
|
class Concepts(BaseOpenAlex):
|
|
394
405
|
resource_class = Concept
|
|
395
406
|
|
|
396
407
|
|
|
408
|
+
class Publisher(OpenAlexEntity):
|
|
409
|
+
pass
|
|
410
|
+
|
|
411
|
+
|
|
397
412
|
class Publishers(BaseOpenAlex):
|
|
398
413
|
resource_class = Publisher
|
|
399
414
|
|
|
400
415
|
|
|
416
|
+
class Funder(OpenAlexEntity):
|
|
417
|
+
pass
|
|
418
|
+
|
|
419
|
+
|
|
401
420
|
class Funders(BaseOpenAlex):
|
|
402
421
|
resource_class = Funder
|
|
403
422
|
|
|
404
|
-
# deprecated
|
|
405
423
|
|
|
424
|
+
def Venue(*args, **kwargs): # deprecated
|
|
425
|
+
# warn about deprecation
|
|
426
|
+
warnings.warn(
|
|
427
|
+
"Venue is deprecated. Use Sources instead.",
|
|
428
|
+
DeprecationWarning,
|
|
429
|
+
stacklevel=2,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return Source(*args, **kwargs)
|
|
406
433
|
|
|
407
|
-
def Venues(*args, **kwargs):
|
|
408
434
|
|
|
435
|
+
def Venues(*args, **kwargs): # deprecated
|
|
409
436
|
# warn about deprecation
|
|
410
437
|
warnings.warn(
|
|
411
438
|
"Venues is deprecated. Use Sources instead.",
|
|
@@ -1,24 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Classifier: Development Status :: 5 - Production/Stable
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.6
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
11
9
|
Classifier: Programming Language :: Python :: 3.8
|
|
12
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
17
|
Requires-Dist: requests
|
|
18
|
+
Requires-Dist: urllib3
|
|
18
19
|
Provides-Extra: lint
|
|
19
20
|
Requires-Dist: ruff ; extra == 'lint'
|
|
20
21
|
Provides-Extra: test
|
|
21
22
|
Requires-Dist: pytest ; extra == 'test'
|
|
23
|
+
Requires-Dist: pytest-xdist ; extra == 'test'
|
|
22
24
|
|
|
23
25
|
<p align="center">
|
|
24
26
|
<img alt="PyAlex - a Python wrapper for OpenAlex" src="https://github.com/J535D165/pyalex/raw/main/pyalex_repocard.svg">
|
|
@@ -28,6 +30,9 @@ Requires-Dist: pytest ; extra == 'test'
|
|
|
28
30
|
|
|
29
31
|
 [](https://zenodo.org/badge/latestdoi/557541347)
|
|
30
32
|
|
|
33
|
+
[](https://securityscorecards.dev/viewer/?uri=github.com/J535D165/pyalex)
|
|
34
|
+
|
|
35
|
+
|
|
31
36
|
PyAlex is a Python library for [OpenAlex](https://openalex.org/). OpenAlex is
|
|
32
37
|
an index of hundreds of millions of interconnected scholarly papers, authors,
|
|
33
38
|
institutions, and more. OpenAlex offers a robust, open, and free [REST API](https://docs.openalex.org/) to extract, aggregate, or search scholarly data.
|
|
@@ -58,7 +63,7 @@ We aim to cover the entire API, and we are looking for help. We are welcoming Pu
|
|
|
58
63
|
|
|
59
64
|
## Installation
|
|
60
65
|
|
|
61
|
-
PyAlex requires Python 3.
|
|
66
|
+
PyAlex requires Python 3.8 or later.
|
|
62
67
|
|
|
63
68
|
```sh
|
|
64
69
|
pip install pyalex
|
|
@@ -282,29 +287,50 @@ Works().filter(institutions={"country_code": "fr|gb"}).get()
|
|
|
282
287
|
|
|
283
288
|
#### Paging
|
|
284
289
|
|
|
285
|
-
OpenAlex offers two methods for paging: [basic paging](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#basic-paging) and [cursor paging](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#cursor-paging). Both methods are supported by
|
|
286
|
-
PyAlex, although cursor paging seems to be easier to implement and less error-prone.
|
|
290
|
+
OpenAlex offers two methods for paging: [basic (offset) paging](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#basic-paging) and [cursor paging](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#cursor-paging). Both methods are supported by PyAlex.
|
|
287
291
|
|
|
288
|
-
#####
|
|
292
|
+
##### Cursor paging (default)
|
|
289
293
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
294
|
+
Use the method `paginate()` to paginate results. Each returned page is a list
|
|
295
|
+
of records, with a maximum of `per_page` (default 25). By default,
|
|
296
|
+
`paginate`s argument `n_max` is set to 10000. Use `None` to retrieve all
|
|
297
|
+
results.
|
|
293
298
|
|
|
294
|
-
|
|
299
|
+
```python
|
|
300
|
+
from pyalex import Authors
|
|
295
301
|
|
|
296
|
-
|
|
297
|
-
|
|
302
|
+
pager = Authors().search_filter(display_name="einstein").paginate(per_page=200)
|
|
303
|
+
|
|
304
|
+
for page in pager:
|
|
305
|
+
print(len(page))
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
> Looking for an easy method to iterate the records of a pager?
|
|
298
309
|
|
|
299
310
|
```python
|
|
311
|
+
from itertools import chain
|
|
300
312
|
from pyalex import Authors
|
|
301
313
|
|
|
302
|
-
|
|
314
|
+
query = Authors().search_filter(display_name="einstein")
|
|
315
|
+
|
|
316
|
+
for record in chain(*query.paginate(per_page=200)):
|
|
317
|
+
print(record["id"])
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
##### Basic paging
|
|
321
|
+
|
|
322
|
+
See limitations of [basic paging](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#basic-paging) in the OpenAlex documentation.
|
|
323
|
+
|
|
324
|
+
```python
|
|
325
|
+
from pyalex import Authors
|
|
326
|
+
|
|
327
|
+
pager = Authors().search_filter(display_name="einstein").paginate(method="page", per_page=200)
|
|
303
328
|
|
|
304
329
|
for page in pager:
|
|
305
330
|
print(len(page))
|
|
306
331
|
```
|
|
307
332
|
|
|
333
|
+
|
|
308
334
|
### Get N-grams
|
|
309
335
|
|
|
310
336
|
OpenAlex reference: [Get N-grams](https://docs.openalex.org/api-entities/works/get-n-grams).
|
|
@@ -389,6 +415,8 @@ R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR)
|
|
|
389
415
|
|
|
390
416
|
## Contact
|
|
391
417
|
|
|
418
|
+
> This library is a community contribution. The authors of this Python library aren't affiliated with OpenAlex.
|
|
419
|
+
|
|
392
420
|
Feel free to reach out with questions, remarks, and suggestions. The
|
|
393
421
|
[issue tracker](/issues) is a good starting point. You can also email me at
|
|
394
422
|
[jonathandebruinos@gmail.com](mailto:jonathandebruinos@gmail.com).
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pyalex/__init__.py,sha256=UrEW9s9NbULtmmnYUUgfbrDujleV8XQLiMRYGdRnm9M,1137
|
|
2
|
+
pyalex/_version.py,sha256=4Ti_UJ2UR2LyAc9zW9aeUVpFeq-DXxy65V2wynOuCi0,408
|
|
3
|
+
pyalex/api.py,sha256=vefNV54OG1daTrKHSDq9Jcy4JhiYi0HSP3GsEzA47uk,11633
|
|
4
|
+
pyalex-0.13.dist-info/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
+
pyalex-0.13.dist-info/METADATA,sha256=vMMbnCUgNUaw99XlqucQZMfZsOZWrXXJOmRpPJsTdSU,12916
|
|
6
|
+
pyalex-0.13.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
|
7
|
+
pyalex-0.13.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
+
pyalex-0.13.dist-info/RECORD,,
|
pyalex-0.11.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
pyalex/__init__.py,sha256=UrEW9s9NbULtmmnYUUgfbrDujleV8XQLiMRYGdRnm9M,1137
|
|
2
|
-
pyalex/_version.py,sha256=BAt_8Sc_nk9NuW3Q0g_MC0YGwpwY1603ia-96CY7p1k,157
|
|
3
|
-
pyalex/api.py,sha256=h0eNgT8LAhubqkzE0WkwXTGzagpstRkJenW_zSrOR0Y,10396
|
|
4
|
-
pyalex-0.11.dist-info/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
-
pyalex-0.11.dist-info/METADATA,sha256=9tloa1HKweAmPNz3LQ0Z2AuBsEg294AVH2aK9YF3G-U,12202
|
|
6
|
-
pyalex-0.11.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
7
|
-
pyalex-0.11.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
-
pyalex-0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|