pyalex 0.17__tar.gz → 0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyalex-0.17 → pyalex-0.19}/.github/workflows/python-package.yml +2 -0
- {pyalex-0.17 → pyalex-0.19}/PKG-INFO +53 -2
- {pyalex-0.17 → pyalex-0.19}/README.md +49 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex/_version.py +16 -3
- {pyalex-0.17 → pyalex-0.19}/pyalex/api.py +38 -14
- {pyalex-0.17 → pyalex-0.19}/pyalex.egg-info/PKG-INFO +53 -2
- {pyalex-0.17 → pyalex-0.19}/pyalex.egg-info/requires.txt +1 -0
- {pyalex-0.17 → pyalex-0.19}/pyproject.toml +1 -1
- {pyalex-0.17 → pyalex-0.19}/tests/test_paging.py +30 -0
- {pyalex-0.17 → pyalex-0.19}/tests/test_pyalex.py +43 -15
- {pyalex-0.17 → pyalex-0.19}/.github/workflows/python-lint.yml +0 -0
- {pyalex-0.17 → pyalex-0.19}/.github/workflows/python-publish.yml +0 -0
- {pyalex-0.17 → pyalex-0.19}/.gitignore +0 -0
- {pyalex-0.17 → pyalex-0.19}/.pre-commit-config.yaml +0 -0
- {pyalex-0.17 → pyalex-0.19}/CITATION.cff +0 -0
- {pyalex-0.17 → pyalex-0.19}/LICENSE +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex/__init__.py +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex.egg-info/SOURCES.txt +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex.egg-info/dependency_links.txt +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex.egg-info/top_level.txt +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex_repocard.png +0 -0
- {pyalex-0.17 → pyalex-0.19}/pyalex_repocard.svg +0 -0
- {pyalex-0.17 → pyalex-0.19}/setup.cfg +0 -0
|
@@ -16,6 +16,8 @@ jobs:
|
|
|
16
16
|
uses: actions/setup-python@v4
|
|
17
17
|
with:
|
|
18
18
|
python-version: ${{ matrix.python-version }}
|
|
19
|
+
- name: Set OPENALEX_API_KEY from secret
|
|
20
|
+
run: echo "OPENALEX_API_KEY=${{ secrets.OPENALEX_API_KEY }}" >> $GITHUB_ENV
|
|
19
21
|
- name: Install package and dependencies
|
|
20
22
|
run: |
|
|
21
23
|
python -m pip install --upgrade pip
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,8 @@ Requires-Dist: ruff; extra == "lint"
|
|
|
21
21
|
Provides-Extra: test
|
|
22
22
|
Requires-Dist: pytest; extra == "test"
|
|
23
23
|
Requires-Dist: pytest-xdist; extra == "test"
|
|
24
|
+
Requires-Dist: dotenv; extra == "test"
|
|
25
|
+
Dynamic: license-file
|
|
24
26
|
|
|
25
27
|
<p align="center">
|
|
26
28
|
<img alt="PyAlex - a Python wrapper for OpenAlex" src="https://github.com/J535D165/pyalex/raw/main/pyalex_repocard.svg">
|
|
@@ -99,6 +101,10 @@ config.retry_backoff_factor = 0.1
|
|
|
99
101
|
config.retry_http_codes = [429, 500, 503]
|
|
100
102
|
```
|
|
101
103
|
|
|
104
|
+
### Standards
|
|
105
|
+
|
|
106
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
|
+
|
|
102
108
|
### Get single entity
|
|
103
109
|
|
|
104
110
|
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or Funder from OpenAlex by the
|
|
@@ -144,6 +150,8 @@ Publishers().random()
|
|
|
144
150
|
Funders().random()
|
|
145
151
|
```
|
|
146
152
|
|
|
153
|
+
Check also [sample](#sample), which does support filters.
|
|
154
|
+
|
|
147
155
|
#### Get abstract
|
|
148
156
|
|
|
149
157
|
Only for Works. Request a work from the OpenAlex database:
|
|
@@ -215,6 +223,17 @@ Works()
|
|
|
215
223
|
.get()
|
|
216
224
|
```
|
|
217
225
|
|
|
226
|
+
#### Filter on a set of values
|
|
227
|
+
You can filter on a set of values, for example if you want all works from a list of DOI's:
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
Works()
|
|
231
|
+
.filter_or(doi=["10.1016/s0924-9338(99)80239-9", "10.1002/andp.19213690304"])
|
|
232
|
+
.get()
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
You can use a maximum of 100 items in the set of values. Also note that OpenAlex allows a maximum URL length of 4096 characters. If you have a big list of identifiers you want to filter on you can run into this limit. It can be helpful to use the short form of the identifiers, so `W2001676859` instead of `https://openalex.org/W2001676859` and `10.1002/andp.19213690304` instead of `https://doi.org/10.1002/andp.19213690304`.
|
|
236
|
+
|
|
218
237
|
#### Search entities
|
|
219
238
|
|
|
220
239
|
OpenAlex reference: [The search parameter](https://docs.openalex.org/api-entities/works/search-works)
|
|
@@ -264,6 +283,14 @@ OpenAlex reference: [Sample entity lists](https://docs.openalex.org/how-to-use-t
|
|
|
264
283
|
Works().sample(100, seed=535).get()
|
|
265
284
|
```
|
|
266
285
|
|
|
286
|
+
Get 10 random German-based institutions:
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
Institutions().filter(country_code="DE").sample(10).get()
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Check also [random](#get-random), which does not support filters.
|
|
293
|
+
|
|
267
294
|
#### Logical expressions
|
|
268
295
|
|
|
269
296
|
OpenAlex reference: [Logical expressions](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/filter-entity-lists#logical-expressions)
|
|
@@ -397,6 +424,25 @@ with open(Path("works.json")) as f:
|
|
|
397
424
|
|
|
398
425
|
A list of awesome use cases of the OpenAlex dataset.
|
|
399
426
|
|
|
427
|
+
### Search author by name and affiliation
|
|
428
|
+
|
|
429
|
+
This requires searching for the affiliation first, retrieving the affiliation ID, and then searching for the author while filtering for the affiliation:
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
from pyalex import Authors, Institutions
|
|
433
|
+
import logging
|
|
434
|
+
|
|
435
|
+
# Search for the institution
|
|
436
|
+
insts = Institutions().search("MIT").get()
|
|
437
|
+
logging.info(f"{len(insts)} search results found for the institution")
|
|
438
|
+
inst_id = insts[0]["id"].replace("https://openalex.org/", "")
|
|
439
|
+
|
|
440
|
+
# Search for the author within the institution
|
|
441
|
+
auths = Authors().search("Daron Acemoglu").filter(affiliations={"institution":{"id": inst_id}}).get()
|
|
442
|
+
logging.info(f"{len(auths)} search results found for the author")
|
|
443
|
+
auth = auths[0]
|
|
444
|
+
```
|
|
445
|
+
|
|
400
446
|
### Cited publications (works referenced by this paper, outgoing citations)
|
|
401
447
|
|
|
402
448
|
```python
|
|
@@ -423,6 +469,9 @@ from pyalex import Works
|
|
|
423
469
|
Works().filter(author={"id": "A2887243803"}).get()
|
|
424
470
|
```
|
|
425
471
|
|
|
472
|
+
> [!WARNING]
|
|
473
|
+
> This gets only the first 25 works of the author. To get all of them, see the [paging section](#paging).
|
|
474
|
+
|
|
426
475
|
### Dataset publications in the global south
|
|
427
476
|
|
|
428
477
|
```python
|
|
@@ -461,6 +510,8 @@ import pyalex
|
|
|
461
510
|
pyalex.config.api_key = "<MY_KEY>"
|
|
462
511
|
```
|
|
463
512
|
|
|
513
|
+
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
+
|
|
464
515
|
## Alternatives
|
|
465
516
|
|
|
466
517
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -75,6 +75,10 @@ config.retry_backoff_factor = 0.1
|
|
|
75
75
|
config.retry_http_codes = [429, 500, 503]
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
+
### Standards
|
|
79
|
+
|
|
80
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
81
|
+
|
|
78
82
|
### Get single entity
|
|
79
83
|
|
|
80
84
|
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or Funder from OpenAlex by the
|
|
@@ -120,6 +124,8 @@ Publishers().random()
|
|
|
120
124
|
Funders().random()
|
|
121
125
|
```
|
|
122
126
|
|
|
127
|
+
Check also [sample](#sample), which does support filters.
|
|
128
|
+
|
|
123
129
|
#### Get abstract
|
|
124
130
|
|
|
125
131
|
Only for Works. Request a work from the OpenAlex database:
|
|
@@ -191,6 +197,17 @@ Works()
|
|
|
191
197
|
.get()
|
|
192
198
|
```
|
|
193
199
|
|
|
200
|
+
#### Filter on a set of values
|
|
201
|
+
You can filter on a set of values, for example if you want all works from a list of DOI's:
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
Works()
|
|
205
|
+
.filter_or(doi=["10.1016/s0924-9338(99)80239-9", "10.1002/andp.19213690304"])
|
|
206
|
+
.get()
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
You can use a maximum of 100 items in the set of values. Also note that OpenAlex allows a maximum URL length of 4096 characters. If you have a big list of identifiers you want to filter on you can run into this limit. It can be helpful to use the short form of the identifiers, so `W2001676859` instead of `https://openalex.org/W2001676859` and `10.1002/andp.19213690304` instead of `https://doi.org/10.1002/andp.19213690304`.
|
|
210
|
+
|
|
194
211
|
#### Search entities
|
|
195
212
|
|
|
196
213
|
OpenAlex reference: [The search parameter](https://docs.openalex.org/api-entities/works/search-works)
|
|
@@ -240,6 +257,14 @@ OpenAlex reference: [Sample entity lists](https://docs.openalex.org/how-to-use-t
|
|
|
240
257
|
Works().sample(100, seed=535).get()
|
|
241
258
|
```
|
|
242
259
|
|
|
260
|
+
Get 10 random German-based institutions:
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
Institutions().filter(country_code="DE").sample(10).get()
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Check also [random](#get-random), which does not support filters.
|
|
267
|
+
|
|
243
268
|
#### Logical expressions
|
|
244
269
|
|
|
245
270
|
OpenAlex reference: [Logical expressions](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/filter-entity-lists#logical-expressions)
|
|
@@ -373,6 +398,25 @@ with open(Path("works.json")) as f:
|
|
|
373
398
|
|
|
374
399
|
A list of awesome use cases of the OpenAlex dataset.
|
|
375
400
|
|
|
401
|
+
### Search author by name and affiliation
|
|
402
|
+
|
|
403
|
+
This requires searching for the affiliation first, retrieving the affiliation ID, and then searching for the author while filtering for the affiliation:
|
|
404
|
+
|
|
405
|
+
```python
|
|
406
|
+
from pyalex import Authors, Institutions
|
|
407
|
+
import logging
|
|
408
|
+
|
|
409
|
+
# Search for the institution
|
|
410
|
+
insts = Institutions().search("MIT").get()
|
|
411
|
+
logging.info(f"{len(insts)} search results found for the institution")
|
|
412
|
+
inst_id = insts[0]["id"].replace("https://openalex.org/", "")
|
|
413
|
+
|
|
414
|
+
# Search for the author within the institution
|
|
415
|
+
auths = Authors().search("Daron Acemoglu").filter(affiliations={"institution":{"id": inst_id}}).get()
|
|
416
|
+
logging.info(f"{len(auths)} search results found for the author")
|
|
417
|
+
auth = auths[0]
|
|
418
|
+
```
|
|
419
|
+
|
|
376
420
|
### Cited publications (works referenced by this paper, outgoing citations)
|
|
377
421
|
|
|
378
422
|
```python
|
|
@@ -399,6 +443,9 @@ from pyalex import Works
|
|
|
399
443
|
Works().filter(author={"id": "A2887243803"}).get()
|
|
400
444
|
```
|
|
401
445
|
|
|
446
|
+
> [!WARNING]
|
|
447
|
+
> This gets only the first 25 works of the author. To get all of them, see the [paging section](#paging).
|
|
448
|
+
|
|
402
449
|
### Dataset publications in the global south
|
|
403
450
|
|
|
404
451
|
```python
|
|
@@ -437,6 +484,8 @@ import pyalex
|
|
|
437
484
|
pyalex.config.api_key = "<MY_KEY>"
|
|
438
485
|
```
|
|
439
486
|
|
|
487
|
+
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
488
|
+
|
|
440
489
|
## Alternatives
|
|
441
490
|
|
|
442
491
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '0.
|
|
21
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.19'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 19)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = 'ga8267859b'
|
|
@@ -12,6 +12,8 @@ try:
|
|
|
12
12
|
except ImportError:
|
|
13
13
|
__version__ = "0.0.0"
|
|
14
14
|
|
|
15
|
+
logger = logging.getLogger("pyalex")
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
class AlexConfig(dict):
|
|
17
19
|
"""Configuration class for OpenAlex API.
|
|
@@ -338,12 +340,12 @@ class Paginator:
|
|
|
338
340
|
self.value = value
|
|
339
341
|
self.per_page = per_page
|
|
340
342
|
self.n_max = n_max
|
|
343
|
+
self.n = 0
|
|
341
344
|
|
|
342
345
|
self._next_value = value
|
|
346
|
+
self._session = _get_requests_session()
|
|
343
347
|
|
|
344
348
|
def __iter__(self):
|
|
345
|
-
self.n = 0
|
|
346
|
-
|
|
347
349
|
return self
|
|
348
350
|
|
|
349
351
|
def _is_max(self):
|
|
@@ -356,13 +358,22 @@ class Paginator:
|
|
|
356
358
|
raise StopIteration
|
|
357
359
|
|
|
358
360
|
if self.method == "cursor":
|
|
359
|
-
|
|
361
|
+
self.endpoint_class._add_params("cursor", self._next_value)
|
|
360
362
|
elif self.method == "page":
|
|
361
|
-
|
|
363
|
+
self.endpoint_class._add_params("page", self._next_value)
|
|
362
364
|
else:
|
|
363
|
-
raise ValueError()
|
|
365
|
+
raise ValueError("Method should be 'cursor' or 'page'")
|
|
366
|
+
|
|
367
|
+
if self.per_page is not None and (
|
|
368
|
+
not isinstance(self.per_page, int)
|
|
369
|
+
or (self.per_page < 1 or self.per_page > 200)
|
|
370
|
+
):
|
|
371
|
+
raise ValueError("per_page should be a integer between 1 and 200")
|
|
364
372
|
|
|
365
|
-
|
|
373
|
+
if self.per_page is not None:
|
|
374
|
+
self.endpoint_class._add_params("per-page", self.per_page)
|
|
375
|
+
|
|
376
|
+
r = self.endpoint_class._get_from_url(self.endpoint_class.url, self._session)
|
|
366
377
|
|
|
367
378
|
if self.method == "cursor":
|
|
368
379
|
self._next_value = r.meta["next_cursor"]
|
|
@@ -428,7 +439,9 @@ class BaseOpenAlex:
|
|
|
428
439
|
"Object has no attribute 'filter_search'. Did you mean 'search_filter'?"
|
|
429
440
|
)
|
|
430
441
|
|
|
431
|
-
|
|
442
|
+
raise AttributeError(
|
|
443
|
+
f"'{self.__class__.__name__}' object has no attribute '{key}'"
|
|
444
|
+
)
|
|
432
445
|
|
|
433
446
|
def __getitem__(self, record_id):
|
|
434
447
|
if isinstance(record_id, list):
|
|
@@ -499,15 +512,24 @@ class BaseOpenAlex:
|
|
|
499
512
|
"""
|
|
500
513
|
return self.get(per_page=1).meta["count"]
|
|
501
514
|
|
|
502
|
-
def _get_from_url(self, url):
|
|
503
|
-
|
|
515
|
+
def _get_from_url(self, url, session=None):
|
|
516
|
+
if session is None:
|
|
517
|
+
session = _get_requests_session()
|
|
518
|
+
|
|
519
|
+
logger.debug(f"Requesting URL: {url}")
|
|
504
520
|
|
|
505
|
-
|
|
521
|
+
res = session.get(url, auth=OpenAlexAuth(config))
|
|
522
|
+
|
|
523
|
+
if res.status_code == 400:
|
|
506
524
|
if (
|
|
507
525
|
isinstance(res.json()["error"], str)
|
|
508
526
|
and "query parameters" in res.json()["error"]
|
|
509
527
|
):
|
|
510
528
|
raise QueryError(res.json()["message"])
|
|
529
|
+
if res.status_code == 401 and "API key" in res.json()["error"]:
|
|
530
|
+
raise QueryError(
|
|
531
|
+
f"{res.json()['error']}. Did you configure a valid API key?"
|
|
532
|
+
)
|
|
511
533
|
|
|
512
534
|
res.raise_for_status()
|
|
513
535
|
res_json = res.json()
|
|
@@ -526,8 +548,10 @@ class BaseOpenAlex:
|
|
|
526
548
|
raise ValueError("Unknown response format")
|
|
527
549
|
|
|
528
550
|
def get(self, return_meta=False, page=None, per_page=None, cursor=None):
|
|
529
|
-
if per_page is not None and (
|
|
530
|
-
|
|
551
|
+
if per_page is not None and (
|
|
552
|
+
not isinstance(per_page, int) or (per_page < 1 or per_page > 200)
|
|
553
|
+
):
|
|
554
|
+
raise ValueError("per_page should be an integer between 1 and 200")
|
|
531
555
|
|
|
532
556
|
if not isinstance(self.params, (str, list)):
|
|
533
557
|
self._add_params("per-page", per_page)
|
|
@@ -568,7 +592,7 @@ class BaseOpenAlex:
|
|
|
568
592
|
Paginator object.
|
|
569
593
|
"""
|
|
570
594
|
if method == "cursor":
|
|
571
|
-
if self.params.get("sample"):
|
|
595
|
+
if isinstance(self.params, dict) and self.params.get("sample"):
|
|
572
596
|
raise ValueError("method should be 'page' when using sample")
|
|
573
597
|
value = cursor
|
|
574
598
|
elif method == "page":
|
|
@@ -612,7 +636,7 @@ class BaseOpenAlex:
|
|
|
612
636
|
else:
|
|
613
637
|
self.params[argument] = new_params
|
|
614
638
|
|
|
615
|
-
|
|
639
|
+
logger.debug(f"Params updated: {self.params}")
|
|
616
640
|
|
|
617
641
|
def filter(self, **kwargs):
|
|
618
642
|
"""Add filter parameters to the API request.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pyalex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19
|
|
4
4
|
Summary: Python interface to the OpenAlex database
|
|
5
5
|
Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,8 @@ Requires-Dist: ruff; extra == "lint"
|
|
|
21
21
|
Provides-Extra: test
|
|
22
22
|
Requires-Dist: pytest; extra == "test"
|
|
23
23
|
Requires-Dist: pytest-xdist; extra == "test"
|
|
24
|
+
Requires-Dist: dotenv; extra == "test"
|
|
25
|
+
Dynamic: license-file
|
|
24
26
|
|
|
25
27
|
<p align="center">
|
|
26
28
|
<img alt="PyAlex - a Python wrapper for OpenAlex" src="https://github.com/J535D165/pyalex/raw/main/pyalex_repocard.svg">
|
|
@@ -99,6 +101,10 @@ config.retry_backoff_factor = 0.1
|
|
|
99
101
|
config.retry_http_codes = [429, 500, 503]
|
|
100
102
|
```
|
|
101
103
|
|
|
104
|
+
### Standards
|
|
105
|
+
|
|
106
|
+
OpenAlex uses standard [ISO_3166-1_alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country codes.
|
|
107
|
+
|
|
102
108
|
### Get single entity
|
|
103
109
|
|
|
104
110
|
Get a single Work, Author, Source, Institution, Concept, Topic, Publisher or Funder from OpenAlex by the
|
|
@@ -144,6 +150,8 @@ Publishers().random()
|
|
|
144
150
|
Funders().random()
|
|
145
151
|
```
|
|
146
152
|
|
|
153
|
+
Check also [sample](#sample), which does support filters.
|
|
154
|
+
|
|
147
155
|
#### Get abstract
|
|
148
156
|
|
|
149
157
|
Only for Works. Request a work from the OpenAlex database:
|
|
@@ -215,6 +223,17 @@ Works()
|
|
|
215
223
|
.get()
|
|
216
224
|
```
|
|
217
225
|
|
|
226
|
+
#### Filter on a set of values
|
|
227
|
+
You can filter on a set of values, for example if you want all works from a list of DOI's:
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
Works()
|
|
231
|
+
.filter_or(doi=["10.1016/s0924-9338(99)80239-9", "10.1002/andp.19213690304"])
|
|
232
|
+
.get()
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
You can use a maximum of 100 items in the set of values. Also note that OpenAlex allows a maximum URL length of 4096 characters. If you have a big list of identifiers you want to filter on you can run into this limit. It can be helpful to use the short form of the identifiers, so `W2001676859` instead of `https://openalex.org/W2001676859` and `10.1002/andp.19213690304` instead of `https://doi.org/10.1002/andp.19213690304`.
|
|
236
|
+
|
|
218
237
|
#### Search entities
|
|
219
238
|
|
|
220
239
|
OpenAlex reference: [The search parameter](https://docs.openalex.org/api-entities/works/search-works)
|
|
@@ -264,6 +283,14 @@ OpenAlex reference: [Sample entity lists](https://docs.openalex.org/how-to-use-t
|
|
|
264
283
|
Works().sample(100, seed=535).get()
|
|
265
284
|
```
|
|
266
285
|
|
|
286
|
+
Get 10 random German-based institutions:
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
Institutions().filter(country_code="DE").sample(10).get()
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Check also [random](#get-random), which does not support filters.
|
|
293
|
+
|
|
267
294
|
#### Logical expressions
|
|
268
295
|
|
|
269
296
|
OpenAlex reference: [Logical expressions](https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/filter-entity-lists#logical-expressions)
|
|
@@ -397,6 +424,25 @@ with open(Path("works.json")) as f:
|
|
|
397
424
|
|
|
398
425
|
A list of awesome use cases of the OpenAlex dataset.
|
|
399
426
|
|
|
427
|
+
### Search author by name and affiliation
|
|
428
|
+
|
|
429
|
+
This requires searching for the affiliation first, retrieving the affiliation ID, and then searching for the author while filtering for the affiliation:
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
from pyalex import Authors, Institutions
|
|
433
|
+
import logging
|
|
434
|
+
|
|
435
|
+
# Search for the institution
|
|
436
|
+
insts = Institutions().search("MIT").get()
|
|
437
|
+
logging.info(f"{len(insts)} search results found for the institution")
|
|
438
|
+
inst_id = insts[0]["id"].replace("https://openalex.org/", "")
|
|
439
|
+
|
|
440
|
+
# Search for the author within the institution
|
|
441
|
+
auths = Authors().search("Daron Acemoglu").filter(affiliations={"institution":{"id": inst_id}}).get()
|
|
442
|
+
logging.info(f"{len(auths)} search results found for the author")
|
|
443
|
+
auth = auths[0]
|
|
444
|
+
```
|
|
445
|
+
|
|
400
446
|
### Cited publications (works referenced by this paper, outgoing citations)
|
|
401
447
|
|
|
402
448
|
```python
|
|
@@ -423,6 +469,9 @@ from pyalex import Works
|
|
|
423
469
|
Works().filter(author={"id": "A2887243803"}).get()
|
|
424
470
|
```
|
|
425
471
|
|
|
472
|
+
> [!WARNING]
|
|
473
|
+
> This gets only the first 25 works of the author. To get all of them, see the [paging section](#paging).
|
|
474
|
+
|
|
426
475
|
### Dataset publications in the global south
|
|
427
476
|
|
|
428
477
|
```python
|
|
@@ -461,6 +510,8 @@ import pyalex
|
|
|
461
510
|
pyalex.config.api_key = "<MY_KEY>"
|
|
462
511
|
```
|
|
463
512
|
|
|
513
|
+
If you configure an invalid API key all requests to OpenAlex will fail.
|
|
514
|
+
|
|
464
515
|
## Alternatives
|
|
465
516
|
|
|
466
517
|
R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR) library.
|
|
@@ -7,6 +7,10 @@ from pyalex.api import Paginator
|
|
|
7
7
|
pyalex.config.max_retries = 10
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
def test_cursor_no_filter():
|
|
11
|
+
assert len(list(pyalex.Works().paginate(per_page=200, n_max=1000))) == 5
|
|
12
|
+
|
|
13
|
+
|
|
10
14
|
def test_cursor():
|
|
11
15
|
query = Authors().search_filter(display_name="einstein")
|
|
12
16
|
|
|
@@ -73,6 +77,28 @@ def test_paginate_counts():
|
|
|
73
77
|
assert r.meta["count"] == n_p_page >= n_p_default == n_p_cursor
|
|
74
78
|
|
|
75
79
|
|
|
80
|
+
def test_paginate_per_page():
|
|
81
|
+
assert all(len(page) <= 10 for page in Authors().paginate(per_page=10, n_max=50))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_paginate_per_page_200():
|
|
85
|
+
assert all(len(page) == 200 for page in Authors().paginate(per_page=200, n_max=400))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_paginate_per_page_none():
|
|
89
|
+
assert all(len(page) == 25 for page in Authors().paginate(n_max=500))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_paginate_per_page_1000():
|
|
93
|
+
with pytest.raises(ValueError):
|
|
94
|
+
assert next(Authors().paginate(per_page=1000))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_paginate_per_page_str():
|
|
98
|
+
with pytest.raises(ValueError):
|
|
99
|
+
assert next(Authors().paginate(per_page="100"))
|
|
100
|
+
|
|
101
|
+
|
|
76
102
|
def test_paginate_instance():
|
|
77
103
|
p_default = Authors().search_filter(display_name="einstein").paginate(per_page=200)
|
|
78
104
|
assert isinstance(p_default, Paginator)
|
|
@@ -102,3 +128,7 @@ def test_cursor_paging_n_max_none():
|
|
|
102
128
|
def test_paging_with_sample():
|
|
103
129
|
with pytest.raises(ValueError):
|
|
104
130
|
Authors().sample(1).paginate(method="cursor")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_paging_next():
|
|
134
|
+
next(Authors().paginate())
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import json
|
|
3
|
+
import os
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
import pytest
|
|
5
7
|
import requests
|
|
8
|
+
from dotenv import load_dotenv
|
|
6
9
|
from requests import HTTPError
|
|
7
10
|
|
|
8
11
|
import pyalex
|
|
@@ -21,6 +24,9 @@ from pyalex import Works
|
|
|
21
24
|
from pyalex import autocomplete
|
|
22
25
|
from pyalex.api import QueryError
|
|
23
26
|
|
|
27
|
+
# Load environment variables from .env file
|
|
28
|
+
load_dotenv()
|
|
29
|
+
|
|
24
30
|
pyalex.config.max_retries = 10
|
|
25
31
|
|
|
26
32
|
|
|
@@ -76,6 +82,20 @@ def test_per_page():
|
|
|
76
82
|
assert len(Works().filter(publication_year=2020).get(per_page=200)) == 200
|
|
77
83
|
|
|
78
84
|
|
|
85
|
+
def test_per_page_none():
|
|
86
|
+
assert len(Works().filter(publication_year=2020).get(per_page=None)) == 25
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_per_page_1000():
|
|
90
|
+
with pytest.raises(ValueError):
|
|
91
|
+
Works().filter(publication_year=2020).get(per_page=1000)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_per_page_str():
|
|
95
|
+
with pytest.raises(ValueError):
|
|
96
|
+
Works().filter(publication_year=2020).get(per_page="100")
|
|
97
|
+
|
|
98
|
+
|
|
79
99
|
def test_W4238809453_works():
|
|
80
100
|
assert isinstance(Works()["W4238809453"], Work)
|
|
81
101
|
assert Works()["W4238809453"]["doi"] == "https://doi.org/10.1001/jama.264.8.944b"
|
|
@@ -205,12 +225,11 @@ def test_referenced_works():
|
|
|
205
225
|
# the work to extract the referenced works of
|
|
206
226
|
w = Works()["W2741809807"]
|
|
207
227
|
|
|
208
|
-
r = Works().
|
|
228
|
+
r = Works().filter_or(openalex_id=w["referenced_works"]).get()
|
|
209
229
|
|
|
210
230
|
assert r.meta["count"] <= len(w["referenced_works"])
|
|
211
231
|
|
|
212
232
|
|
|
213
|
-
@pytest.mark.xfail()
|
|
214
233
|
def test_code_examples():
|
|
215
234
|
# /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code # noqa
|
|
216
235
|
# /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code&sort=count:desc # noqa
|
|
@@ -366,19 +385,6 @@ def test_subset():
|
|
|
366
385
|
assert url == Works().select(["id", "doi", "display_name"]).url
|
|
367
386
|
|
|
368
387
|
|
|
369
|
-
def test_auth():
|
|
370
|
-
w_no_auth = Works().get()
|
|
371
|
-
pyalex.config.email = "pyalex_github_unittests@example.com"
|
|
372
|
-
pyalex.config.api_key = "my_api_key"
|
|
373
|
-
|
|
374
|
-
w_auth = Works().get()
|
|
375
|
-
|
|
376
|
-
pyalex.config.email = None
|
|
377
|
-
pyalex.config.api_key = None
|
|
378
|
-
|
|
379
|
-
assert len(w_no_auth) == len(w_auth)
|
|
380
|
-
|
|
381
|
-
|
|
382
388
|
def test_autocomplete_works():
|
|
383
389
|
w = Works().filter(publication_year=2023).autocomplete("planetary boundaries")
|
|
384
390
|
|
|
@@ -411,3 +417,25 @@ def test_urlencoding_list():
|
|
|
411
417
|
.count()
|
|
412
418
|
== 2
|
|
413
419
|
)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def test_premium_api_no_valid_key():
|
|
423
|
+
pyalex.config.email = "pyalex_github_unittests@example.com"
|
|
424
|
+
pyalex.config.api_key = "my_api_key"
|
|
425
|
+
with pytest.raises(QueryError):
|
|
426
|
+
Works().get()
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
@pytest.mark.skipif(
|
|
430
|
+
not os.environ.get("OPENALEX_API_KEY"),
|
|
431
|
+
reason="OPENALEX_API_KEY is not set in the environment variables",
|
|
432
|
+
)
|
|
433
|
+
def test_premium_api():
|
|
434
|
+
# This test requires a valid API key set in the config. If from_updated_date
|
|
435
|
+
# is set, it should return works updated since the start of the current
|
|
436
|
+
# year. If no API key is set, it should raise an error.
|
|
437
|
+
pyalex.config.api_key = os.environ["OPENALEX_API_KEY"]
|
|
438
|
+
|
|
439
|
+
Works().filter(from_updated_date=f"{datetime.datetime.now().year}-01-01").get()
|
|
440
|
+
|
|
441
|
+
pyalex.config.api_key = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|