pyalex 0.17__py3-none-any.whl → 0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyalex/_version.py +16 -3
- pyalex/api.py +38 -14
- {pyalex-0.17.dist-info → pyalex-0.19.dist-info}/METADATA +53 -2
- pyalex-0.19.dist-info/RECORD +8 -0
- {pyalex-0.17.dist-info → pyalex-0.19.dist-info}/WHEEL +1 -1
- pyalex-0.17.dist-info/RECORD +0 -8
- {pyalex-0.17.dist-info → pyalex-0.19.dist-info/licenses}/LICENSE +0 -0
- {pyalex-0.17.dist-info → pyalex-0.19.dist-info}/top_level.txt +0 -0
pyalex/_version.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '0.
|
|
21
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.19'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 19)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
pyalex/api.py
CHANGED
|
@@ -12,6 +12,8 @@ try:
|
|
|
12
12
|
except ImportError:
|
|
13
13
|
__version__ = "0.0.0"
|
|
14
14
|
|
|
15
|
+
logger = logging.getLogger("pyalex")
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class AlexConfig(dict):
|
|
17
19
|
"""Configuration class for OpenAlex API.
|
|
@@ -338,12 +340,12 @@ class Paginator:
|
|
|
338
340
|
self.value = value
|
|
339
341
|
self.per_page = per_page
|
|
340
342
|
self.n_max = n_max
|
|
343
|
+
self.n = 0
|
|
341
344
|
|
|
342
345
|
self._next_value = value
|
|
346
|
+
self._session = _get_requests_session()
|
|
343
347
|
|
|
344
348
|
def __iter__(self):
|
|
345
|
-
self.n = 0
|
|
346
|
-
|
|
347
349
|
return self
|
|
348
350
|
|
|
349
351
|
def _is_max(self):
|
|
@@ -356,13 +358,22 @@ class Paginator:
|
|
|
356
358
|
raise StopIteration
|
|
357
359
|
|
|
358
360
|
if self.method == "cursor":
|
|
359
|
-
|
|
361
|
+
self.endpoint_class._add_params("cursor", self._next_value)
|
|
360
362
|
elif self.method == "page":
|
|
361
|
-
|
|
363
|
+
self.endpoint_class._add_params("page", self._next_value)
|
|
362
364
|
else:
|
|
363
|
-
raise ValueError()
|
|
365
|
+
raise ValueError("Method should be 'cursor' or 'page'")
|
|
366
|
+
|
|
367
|
+
if self.per_page is not None and (
|
|
368
|
+
not isinstance(self.per_page, int)
|
|
369
|
+
or (self.per_page < 1 or self.per_page > 200)
|
|
370
|
+
):
|
|
371
|
+
raise ValueError("per_page should be a integer between 1 and 200")
|
|
364
372
|
|
|
365
|
-
|
|
373
|
+
if self.per_page is not None:
|
|
374
|
+
self.endpoint_class._add_params("per-page", self.per_page)
|
|
375
|
+
|
|
376
|
+
r = self.endpoint_class._get_from_url(self.endpoint_class.url, self._session)
|
|
366
377
|
|
|
367
378
|
if self.method == "cursor":
|
|
368
379
|
self._next_value = r.meta["next_cursor"]
|
|
@@ -428,7 +439,9 @@ class BaseOpenAlex:
|
|
|
428
439
|
"Object has no attribute 'filter_search'. Did you mean 'search_filter'?"
|
|
429
440
|
)
|
|
430
441
|
|
|
431
|
-
|
|
442
|
+
raise AttributeError(
|
|
443
|
+
f"'{self.__class__.__name__}' object has no attribute '{key}'"
|
|
444
|
+
)
|
|
432
445
|
|
|
433
446
|
def __getitem__(self, record_id):
|
|
434
447
|
if isinstance(record_id, list):
|
|
@@ -499,15 +512,24 @@ class BaseOpenAlex:
|
|
|
499
512
|
"""
|
|
500
513
|
return self.get(per_page=1).meta["count"]
|
|
501
514
|
|
|
502
|
-
def _get_from_url(self, url):
|
|
503
|
-
|
|
515
|
+
def _get_from_url(self, url, session=None):
|
|
516
|
+
if session is None:
|
|
517
|
+
session = _get_requests_session()
|
|
518
|
+
|
|
519
|
+
logger.debug(f"Requesting URL: {url}")
|
|
504
520
|
|
|
505
|
-
|
|
521
|
+
res = session.get(url, auth=OpenAlexAuth(config))
|
|
522
|
+
|
|
523
|
+
if res.status_code == 400:
|
|
506
524
|
if (
|
|
507
525
|
isinstance(res.json()["error"], str)
|
|
508
526
|
and "query parameters" in res.json()["error"]
|
|
509
527
|
):
|
|
510
528
|
raise QueryError(res.json()["message"])
|
|
529
|
+
if res.status_code == 401 and "API key" in res.json()["error"]:
|
|
530
|
+
raise QueryError(
|
|
531
|
+
f"{res.json()['error']}. Did you configure a valid API key?"
|
|
532
|
+
)
|
|
511
533
|
|
|
512
534
|
res.raise_for_status()
|
|
513
535
|
res_json = res.json()
|
|
@@ -526,8 +548,10 @@ class BaseOpenAlex:
|
|
|
526
548
|
raise ValueError("Unknown response format")
|
|
527
549
|
|
|
528
550
|
def get(self, return_meta=False, page=None, per_page=None, cursor=None):
|
|
529
|
-
if per_page is not None and (
|
|
530
|
-
|
|
551
|
+
if per_page is not None and (
|
|
552
|
+
not isinstance(per_page, int) or (per_page < 1 or per_page > 200)
|
|
553
|
+
):
|
|
554
|
+
raise ValueError("per_page should be an integer between 1 and 200")
|
|
531
555
|
|
|
532
556
|
if not isinstance(self.params, (str, list)):
|
|
533
557
|
self._add_params("per-page", per_page)
|
|
@@ -568,7 +592,7 @@ class BaseOpenAlex:
|
|
|
568
592
|
Paginator object.
|
|
569
593
|
"""
|
|
570
594
|
if method == "cursor":
|
|
571
|
-
if self.params.get("sample"):
|
|
595
|
+
if isinstance(self.params, dict) and self.params.get("sample"):
|
|
572
596
|
raise ValueError("method should be 'page' when using sample")
|
|
573
597
|
value = cursor
|
|
574
598
|
elif method == "page":
|
|
@@ -612,7 +636,7 @@ class BaseOpenAlex:
|
|
|
612
636
|
else:
|
|
613
637
|
self.params[argument] = new_params
|
|
614
638
|
|
|
615
|
-
|
|
639
|
+
logger.debug(f"Params updated: {self.params}")
|
|
616
640
|
|
|
617
641
|
def filter(self, **kwargs):
|
|
618
642
|
"""Add filter parameters to the API request.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,8 @@ Requires-Dist: ruff; extra == "lint"
|
|
|
21
21
|
Provides-Extra: test
|
|
22
22
|
Requires-Dist: pytest; extra == "test"
|
|
23
23
|
Requires-Dist: pytest-xdist; extra == "test"
|
|
24
|
+
Requires-Dist: dotenv; extra == "test"
|
|
25
|
+
Dynamic: license-file
|
|
24
26
|
|
|
25
27
|
<p align="center">
|
|
26
28
|
<img alt="PyAlex - a Python wrapper for OpenAlex" src="https://github.com/J535D165/pyalex/raw/main/pyalex_repocard.svg">
|
|
@@ -99,6 +101,10 @@ config.retry_backoff_factor = 0.1
|
|
|
99
101
|
config.retry_http_codes = [429, 500, 503]
|
|
100
102
|
```
|
|
101
103
|
|
|
104
|
+
### Standards
|
|
105
|
+
|
|
106
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
|
+
|
|
102
108
|
### Get single entity
|
|
103
109
|
|
|
104
110
|
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or Funder from OpenAlex by the
|
|
@@ -144,6 +150,8 @@ Publishers().random()
|
|
|
144
150
|
Funders().random()
|
|
145
151
|
```
|
|
146
152
|
|
|
153
|
+
Check also [sample](#sample), which does support filters.
|
|
154
|
+
|
|
147
155
|
#### Get abstract
|
|
148
156
|
|
|
149
157
|
Only for Works. Request a work from the OpenAlex database:
|
|
@@ -215,6 +223,17 @@ Works()
|
|
|
215
223
|
.get()
|
|
216
224
|
```
|
|
217
225
|
|
|
226
|
+
#### Filter on a set of values
|
|
227
|
+
You can filter on a set of values, for example if you want all works from a list of DOI's:
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
Works()
|
|
231
|
+
.filter_or(doi=["10.1016/s0924-9338(99)80239-9", "10.1002/andp.19213690304"])
|
|
232
|
+
.get()
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
You can use a maximum of 100 items in the set of values. Also note that OpenAlex allows a maximum URL length of 4096 characters. If you have a big list of identifiers you want to filter on you can run into this limit. It can be helpful to use the short form of the identifiers, so `W2001676859` instead of `https://openalex.org/W2001676859` and `10.1002/andp.19213690304` instead of `https://doi.org/10.1002/andp.19213690304`.
|
|
236
|
+
|
|
218
237
|
#### Search entities
|
|
219
238
|
|
|
220
239
|
OpenAlex reference: [The search parameter](https://docs.openalex.org/api-entities/works/search-works)
|
|
@@ -264,6 +283,14 @@ OpenAlex reference: [Sample entity lists](https://docs.openalex.org/how-to-use-t
|
|
|
264
283
|
Works().sample(100, seed=535).get()
|
|
265
284
|
```
|
|
266
285
|
|
|
286
|
+
Get 10 random German-based institutions:
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
Institutions().filter(country_code="DE").sample(10).get()
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Check also [random](#get-random), which does not support filters.
|
|
293
|
+
|
|
267
294
|
#### Logical expressions
|
|
268
295
|
|
|
269
296
|
OpenAlex reference: [Logical expressions](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/filter-entity-lists#logical-expressions)
|
|
@@ -397,6 +424,25 @@ with open(Path("works.json")) as f:
|
|
|
397
424
|
|
|
398
425
|
A list of awesome use cases of the OpenAlex dataset.
|
|
399
426
|
|
|
427
|
+
### Search author by name and affiliation
|
|
428
|
+
|
|
429
|
+
This requires searching for the affiliation first, retrieving the affiliation ID, and then searching for the author while filtering for the affiliation:
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
from pyalex import Authors, Institutions
|
|
433
|
+
import logging
|
|
434
|
+
|
|
435
|
+
# Search for the institution
|
|
436
|
+
insts = Institutions().search("MIT").get()
|
|
437
|
+
logging.info(f"{len(insts)} search results found for the institution")
|
|
438
|
+
inst_id = insts[0]["id"].replace("https://openalex.org/", "")
|
|
439
|
+
|
|
440
|
+
# Search for the author within the institution
|
|
441
|
+
auths = Authors().search("Daron Acemoglu").filter(affiliations={"institution":{"id": inst_id}}).get()
|
|
442
|
+
logging.info(f"{len(auths)} search results found for the author")
|
|
443
|
+
auth = auths[0]
|
|
444
|
+
```
|
|
445
|
+
|
|
400
446
|
### Cited publications (works referenced by this paper, outgoing citations)
|
|
401
447
|
|
|
402
448
|
```python
|
|
@@ -423,6 +469,9 @@ from pyalex import Works
|
|
|
423
469
|
Works().filter(author={"id": "A2887243803"}).get()
|
|
424
470
|
```
|
|
425
471
|
|
|
472
|
+
> [!WARNING]
|
|
473
|
+
> This gets only the first 25 works of the author. To get all of them, see the [paging section](#paging).
|
|
474
|
+
|
|
426
475
|
### Dataset publications in the global south
|
|
427
476
|
|
|
428
477
|
```python
|
|
@@ -461,6 +510,8 @@ import pyalex
|
|
|
461
510
|
pyalex.config.api_key = "<MY_KEY>"
|
|
462
511
|
```
|
|
463
512
|
|
|
513
|
+
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
+
|
|
464
515
|
## Alternatives
|
|
465
516
|
|
|
466
517
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pyalex/__init__.py,sha256=upMXti6aJF6lz8J4EbdnQa13GhJzFGre7fnS_tj8NOw,1539
|
|
2
|
+
pyalex/_version.py,sha256=lc7e3Va7b6LwW9-6o0HxJBuB54_pZOZmQ9I6QHKL8AQ,701
|
|
3
|
+
pyalex/api.py,sha256=_g42dO3hkeKUQR3uXjLcAPAj3zxuT190Z2_jyH3p3Es,27769
|
|
4
|
+
pyalex-0.19.dist-info/licenses/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
+
pyalex-0.19.dist-info/METADATA,sha256=mawFYplV4A8N7p4lR0zVHGFT2ZRRNZ16bcVDUSX9ddU,16197
|
|
6
|
+
pyalex-0.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
pyalex-0.19.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
+
pyalex-0.19.dist-info/RECORD,,
|
pyalex-0.17.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
pyalex/__init__.py,sha256=upMXti6aJF6lz8J4EbdnQa13GhJzFGre7fnS_tj8NOw,1539
|
|
2
|
-
pyalex/_version.py,sha256=t3d5dJC864lqQ-TUIs6gpWRp7YVH04dI08mpFt6wvR0,508
|
|
3
|
-
pyalex/api.py,sha256=05clKZlIcH7g7G5d7tELxrfUIyyzdPKwbRinZ1Pliy4,26783
|
|
4
|
-
pyalex-0.17.dist-info/LICENSE,sha256=Mhf5MImRYP06a1EPVJCpkpTstOOEfGajN3T_Fz4izMg,1074
|
|
5
|
-
pyalex-0.17.dist-info/METADATA,sha256=3K3i_n7uMyF91DfrwFI5t0X5jaYD8lD2HcySMNHcfbg,14208
|
|
6
|
-
pyalex-0.17.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
7
|
-
pyalex-0.17.dist-info/top_level.txt,sha256=D0An8hWy9e0xPhTaT6K-yuJKVeVV3bYGxZ6Y-v2WXSU,7
|
|
8
|
-
pyalex-0.17.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|