pyalex 0.15__tar.gz → 0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  repos:
2
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v4.6.0
3
+ rev: v5.0.0
4
4
  hooks:
5
5
  - id: trailing-whitespace
6
6
  - id: end-of-file-fixer
@@ -8,7 +8,7 @@ repos:
8
8
  - id: check-yaml
9
9
  - id: check-added-large-files
10
10
  - repo: https://github.com/astral-sh/ruff-pre-commit
11
- rev: v0.5.7
11
+ rev: v0.8.3
12
12
  hooks:
13
13
  - id: ruff
14
14
  args: [--fix]
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pyalex
3
- Version: 0.15
3
+ Version: 0.16
4
4
  Summary: Python interface to the OpenAlex database
5
5
  Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
6
6
  License: MIT
@@ -126,7 +126,7 @@ Works()["W2741809807"]["open_access"]
126
126
  The previous works also for Authors, Sources, Institutions, Concepts and Topics
127
127
 
128
128
  ```python
129
- Authors()["A2887243803"]
129
+ Authors()["A5027479191"]
130
130
  Authors()["https://orcid.org/0000-0002-4297-0502"] # same
131
131
  ```
132
132
 
@@ -139,7 +139,6 @@ Works().random()
139
139
  Authors().random()
140
140
  Sources().random()
141
141
  Institutions().random()
142
- Concepts().random()
143
142
  Topics().random()
144
143
  Publishers().random()
145
144
  Funders().random()
@@ -378,11 +377,27 @@ Works()["W2023271753"].ngrams()
378
377
  ```
379
378
 
380
379
 
380
+ ### Serialize
381
+
382
+ All results from PyAlex can be serialized. For example, save the results to a JSON file:
383
+
384
+ ```python
385
+ import json
386
+ from pathlib import Path
387
+ from pyalex import Work
388
+
389
+ with open(Path("works.json"), "w") as f:
390
+ json.dump(Works().get(), f)
391
+
392
+ with open(Path("works.json")) as f:
393
+ works = [Work(w) for w in json.load(f)]
394
+ ```
395
+
381
396
  ## Code snippets
382
397
 
383
398
  A list of awesome use cases of the OpenAlex dataset.
384
399
 
385
- ### Cited publications (referenced works)
400
+ ### Cited publications (works referenced by this paper, outgoing citations)
386
401
 
387
402
  ```python
388
403
  from pyalex import Works
@@ -393,6 +408,13 @@ w = Works()["W2741809807"]
393
408
  Works()[w["referenced_works"]]
394
409
  ```
395
410
 
411
+ ### Citing publications (other works that reference this paper, incoming citations)
412
+
413
+ ```python
414
+ from pyalex import Works
415
+ Works().filter(cites="W2741809807").get()
416
+ ```
417
+
396
418
  ### Get works of a single author
397
419
 
398
420
  ```python
@@ -451,6 +473,7 @@ R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR)
451
473
 
452
474
  > This library is a community contribution. The authors of this Python library aren't affiliated with OpenAlex.
453
475
 
476
+ This library is maintained by [J535D165](https://github.com/J535D165) and [PeterLombaers](https://github.com/PeterLombaers).
454
477
  Feel free to reach out with questions, remarks, and suggestions. The
455
- [issue tracker](/issues) is a good starting point. You can also email me at
478
+ [issue tracker](/issues) is a good starting point. You can also reach out via
456
479
  [jonathandebruinos@gmail.com](mailto:jonathandebruinos@gmail.com).
@@ -102,7 +102,7 @@ Works()["W2741809807"]["open_access"]
102
102
  The previous works also for Authors, Sources, Institutions, Concepts and Topics
103
103
 
104
104
  ```python
105
- Authors()["A2887243803"]
105
+ Authors()["A5027479191"]
106
106
  Authors()["https://orcid.org/0000-0002-4297-0502"] # same
107
107
  ```
108
108
 
@@ -115,7 +115,6 @@ Works().random()
115
115
  Authors().random()
116
116
  Sources().random()
117
117
  Institutions().random()
118
- Concepts().random()
119
118
  Topics().random()
120
119
  Publishers().random()
121
120
  Funders().random()
@@ -354,11 +353,27 @@ Works()["W2023271753"].ngrams()
354
353
  ```
355
354
 
356
355
 
356
+ ### Serialize
357
+
358
+ All results from PyAlex can be serialized. For example, save the results to a JSON file:
359
+
360
+ ```python
361
+ import json
362
+ from pathlib import Path
363
+ from pyalex import Work
364
+
365
+ with open(Path("works.json"), "w") as f:
366
+ json.dump(Works().get(), f)
367
+
368
+ with open(Path("works.json")) as f:
369
+ works = [Work(w) for w in json.load(f)]
370
+ ```
371
+
357
372
  ## Code snippets
358
373
 
359
374
  A list of awesome use cases of the OpenAlex dataset.
360
375
 
361
- ### Cited publications (referenced works)
376
+ ### Cited publications (works referenced by this paper, outgoing citations)
362
377
 
363
378
  ```python
364
379
  from pyalex import Works
@@ -369,6 +384,13 @@ w = Works()["W2741809807"]
369
384
  Works()[w["referenced_works"]]
370
385
  ```
371
386
 
387
+ ### Citing publications (other works that reference this paper, incoming citations)
388
+
389
+ ```python
390
+ from pyalex import Works
391
+ Works().filter(cites="W2741809807").get()
392
+ ```
393
+
372
394
  ### Get works of a single author
373
395
 
374
396
  ```python
@@ -427,6 +449,7 @@ R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR)
427
449
 
428
450
  > This library is a community contribution. The authors of this Python library aren't affiliated with OpenAlex.
429
451
 
452
+ This library is maintained by [J535D165](https://github.com/J535D165) and [PeterLombaers](https://github.com/PeterLombaers).
430
453
  Feel free to reach out with questions, remarks, and suggestions. The
431
- [issue tracker](/issues) is a good starting point. You can also email me at
454
+ [issue tracker](/issues) is a good starting point. You can also reach out via
432
455
  [jonathandebruinos@gmail.com](mailto:jonathandebruinos@gmail.com).
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '0.15'
16
- __version_tuple__ = version_tuple = (0, 15)
20
+ __version__ = version = '0.16'
21
+ __version_tuple__ = version_tuple = (0, 16)
@@ -23,7 +23,7 @@ class AlexConfig(dict):
23
23
  config = AlexConfig(
24
24
  email=None,
25
25
  api_key=None,
26
- user_agent="pyalex/{__version__}",
26
+ user_agent=f"pyalex/{__version__}",
27
27
  openalex_url="https://api.openalex.org",
28
28
  max_retries=0,
29
29
  retry_backoff_factor=0.1,
@@ -31,6 +31,32 @@ config = AlexConfig(
31
31
  )
32
32
 
33
33
 
34
+ class or_(dict):
35
+ pass
36
+
37
+
38
+ class _LogicalExpression:
39
+ token = None
40
+
41
+ def __init__(self, value):
42
+ self.value = value
43
+
44
+ def __str__(self) -> str:
45
+ return f"{self.token}{self.value}"
46
+
47
+
48
+ class not_(_LogicalExpression):
49
+ token = "!"
50
+
51
+
52
+ class gt_(_LogicalExpression):
53
+ token = ">"
54
+
55
+
56
+ class lt_(_LogicalExpression):
57
+ token = "<"
58
+
59
+
34
60
  def _quote_oa_value(v):
35
61
  """Prepare a value for the OpenAlex API.
36
62
 
@@ -41,30 +67,40 @@ def _quote_oa_value(v):
41
67
  if isinstance(v, bool):
42
68
  return str(v).lower()
43
69
 
70
+ if isinstance(v, _LogicalExpression) and isinstance(v.value, str):
71
+ v.value = quote_plus(v.value)
72
+ return v
73
+
44
74
  if isinstance(v, str):
45
75
  return quote_plus(v)
46
76
 
47
77
  return v
48
78
 
49
79
 
50
- def _flatten_kv(d, prefix=""):
80
+ def _flatten_kv(d, prefix=None, logical="+"):
81
+ if prefix is None and not isinstance(d, dict):
82
+ raise ValueError("prefix should be set if d is not a dict")
83
+
51
84
  if isinstance(d, dict):
85
+ logical_subd = "|" if isinstance(d, or_) else logical
86
+
52
87
  t = []
53
88
  for k, v in d.items():
54
- if isinstance(v, list):
55
- t.extend([f"{prefix}.{k}:{_quote_oa_value(i)}" for i in v])
56
- else:
57
- new_prefix = f"{prefix}.{k}" if prefix else f"{k}"
58
- x = _flatten_kv(v, prefix=new_prefix)
59
- t.append(x)
89
+ x = _flatten_kv(
90
+ v, prefix=f"{prefix}.{k}" if prefix else f"{k}", logical=logical_subd
91
+ )
92
+ t.append(x)
60
93
 
61
94
  return ",".join(t)
95
+ elif isinstance(d, list):
96
+ list_str = logical.join([f"{_quote_oa_value(i)}" for i in d])
97
+ return f"{prefix}:{list_str}"
62
98
  else:
63
99
  return f"{prefix}:{_quote_oa_value(d)}"
64
100
 
65
101
 
66
102
  def _params_merge(params, add_params):
67
- for k, _v in add_params.items():
103
+ for k in add_params.keys():
68
104
  if (
69
105
  k in params
70
106
  and isinstance(params[k], dict)
@@ -113,6 +149,18 @@ def invert_abstract(inv_index):
113
149
  return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1])))
114
150
 
115
151
 
152
+ def _wrap_values_nested_dict(d, func):
153
+ for k, v in d.items():
154
+ if isinstance(v, dict):
155
+ d[k] = _wrap_values_nested_dict(v, func)
156
+ elif isinstance(v, list):
157
+ d[k] = [func(i) for i in v]
158
+ else:
159
+ d[k] = func(v)
160
+
161
+ return d
162
+
163
+
116
164
  class QueryError(ValueError):
117
165
  pass
118
166
 
@@ -207,9 +255,6 @@ class BaseOpenAlex:
207
255
  def __init__(self, params=None):
208
256
  self.params = params
209
257
 
210
- def _get_multi_items(self, record_list):
211
- return self.filter(openalex_id="|".join(record_list)).get()
212
-
213
258
  def _full_collection_name(self):
214
259
  if self.params is not None and "q" in self.params.keys():
215
260
  return (
@@ -234,10 +279,14 @@ class BaseOpenAlex:
234
279
 
235
280
  def __getitem__(self, record_id):
236
281
  if isinstance(record_id, list):
237
- return self._get_multi_items(record_id)
282
+ if len(record_id) > 100:
283
+ raise ValueError("OpenAlex does not support more than 100 ids")
284
+
285
+ return self.filter_or(openalex_id=record_id).get(per_page=len(record_id))
238
286
 
239
287
  return self._get_from_url(
240
- f"{self._full_collection_name()}/{record_id}", return_meta=False
288
+ f"{self._full_collection_name()}/{_quote_oa_value(record_id)}",
289
+ return_meta=False,
241
290
  )
242
291
 
243
292
  @property
@@ -322,7 +371,10 @@ class BaseOpenAlex:
322
371
  def random(self):
323
372
  return self.__getitem__("random")
324
373
 
325
- def _add_params(self, argument, new_params):
374
+ def _add_params(self, argument, new_params, raise_if_exists=False):
375
+ if raise_if_exists:
376
+ raise NotImplementedError("raise_if_exists is not implemented")
377
+
326
378
  if self.params is None:
327
379
  self.params = {argument: new_params}
328
380
  elif argument in self.params and isinstance(self.params[argument], dict):
@@ -336,6 +388,25 @@ class BaseOpenAlex:
336
388
  self._add_params("filter", kwargs)
337
389
  return self
338
390
 
391
+ def filter_and(self, **kwargs):
392
+ return self.filter(**kwargs)
393
+
394
+ def filter_or(self, **kwargs):
395
+ self._add_params("filter", or_(kwargs), raise_if_exists=False)
396
+ return self
397
+
398
+ def filter_not(self, **kwargs):
399
+ self._add_params("filter", _wrap_values_nested_dict(kwargs, not_))
400
+ return self
401
+
402
+ def filter_gt(self, **kwargs):
403
+ self._add_params("filter", _wrap_values_nested_dict(kwargs, gt_))
404
+ return self
405
+
406
+ def filter_lt(self, **kwargs):
407
+ self._add_params("filter", _wrap_values_nested_dict(kwargs, lt_))
408
+ return self
409
+
339
410
  def search_filter(self, **kwargs):
340
411
  self._add_params("filter", {f"{k}.search": v for k, v in kwargs.items()})
341
412
  return self
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pyalex
3
- Version: 0.15
3
+ Version: 0.16
4
4
  Summary: Python interface to the OpenAlex database
5
5
  Author-email: Jonathan de Bruin <jonathandebruinos@gmail.com>
6
6
  License: MIT
@@ -126,7 +126,7 @@ Works()["W2741809807"]["open_access"]
126
126
  The previous works also for Authors, Sources, Institutions, Concepts and Topics
127
127
 
128
128
  ```python
129
- Authors()["A2887243803"]
129
+ Authors()["A5027479191"]
130
130
  Authors()["https://orcid.org/0000-0002-4297-0502"] # same
131
131
  ```
132
132
 
@@ -139,7 +139,6 @@ Works().random()
139
139
  Authors().random()
140
140
  Sources().random()
141
141
  Institutions().random()
142
- Concepts().random()
143
142
  Topics().random()
144
143
  Publishers().random()
145
144
  Funders().random()
@@ -378,11 +377,27 @@ Works()["W2023271753"].ngrams()
378
377
  ```
379
378
 
380
379
 
380
+ ### Serialize
381
+
382
+ All results from PyAlex can be serialized. For example, save the results to a JSON file:
383
+
384
+ ```python
385
+ import json
386
+ from pathlib import Path
387
+ from pyalex import Work
388
+
389
+ with open(Path("works.json"), "w") as f:
390
+ json.dump(Works().get(), f)
391
+
392
+ with open(Path("works.json")) as f:
393
+ works = [Work(w) for w in json.load(f)]
394
+ ```
395
+
381
396
  ## Code snippets
382
397
 
383
398
  A list of awesome use cases of the OpenAlex dataset.
384
399
 
385
- ### Cited publications (referenced works)
400
+ ### Cited publications (works referenced by this paper, outgoing citations)
386
401
 
387
402
  ```python
388
403
  from pyalex import Works
@@ -393,6 +408,13 @@ w = Works()["W2741809807"]
393
408
  Works()[w["referenced_works"]]
394
409
  ```
395
410
 
411
+ ### Citing publications (other works that reference this paper, incoming citations)
412
+
413
+ ```python
414
+ from pyalex import Works
415
+ Works().filter(cites="W2741809807").get()
416
+ ```
417
+
396
418
  ### Get works of a single author
397
419
 
398
420
  ```python
@@ -451,6 +473,7 @@ R users can use the excellent [OpenAlexR](https://github.com/ropensci/openalexR)
451
473
 
452
474
  > This library is a community contribution. The authors of this Python library aren't affiliated with OpenAlex.
453
475
 
476
+ This library is maintained by [J535D165](https://github.com/J535D165) and [PeterLombaers](https://github.com/PeterLombaers).
454
477
  Feel free to reach out with questions, remarks, and suggestions. The
455
- [issue tracker](/issues) is a good starting point. You can also email me at
478
+ [issue tracker](/issues) is a good starting point. You can also reach out via
456
479
  [jonathandebruinos@gmail.com](mailto:jonathandebruinos@gmail.com).
@@ -117,7 +117,12 @@ def test_multi_works():
117
117
  # the work to extract the referenced works of
118
118
  w = Works()["W2741809807"]
119
119
 
120
- assert len(Works()[w["referenced_works"]]) == 25
120
+ assert len(Works()[w["referenced_works"]]) >= 38
121
+
122
+ assert (
123
+ len(Works().filter_or(openalex_id=w["referenced_works"]).get(per_page=100))
124
+ >= 38
125
+ )
121
126
 
122
127
 
123
128
  def test_works_multifilter():
@@ -204,13 +209,17 @@ def test_referenced_works():
204
209
  # the work to extract the referenced works of
205
210
  w = Works()["W2741809807"]
206
211
 
207
- _, m = (
212
+ w_new, m = (
208
213
  Works()
209
214
  .filter(openalex_id="|".join(w["referenced_works"]))
210
- .get(return_meta=True)
215
+ .get(return_meta=True, per_page=100)
211
216
  )
212
217
 
213
- assert m["count"] == len(w["referenced_works"])
218
+ assert set([w["id"] for w in w_new]).difference(set(w["referenced_works"])) == set()
219
+ # assert set(w["referenced_works"]).difference(set([w["id"] for w in w_new]))
220
+ # == set()
221
+
222
+ assert m["count"] < len(w["referenced_works"])
214
223
 
215
224
 
216
225
  @pytest.mark.xfail()
@@ -244,12 +253,25 @@ def test_serializable(tmpdir):
244
253
  assert "W4238809453" in json.load(f)["id"]
245
254
 
246
255
 
256
+ def test_serializable_list(tmpdir):
257
+ with open(Path(tmpdir, "test.json"), "w") as f:
258
+ json.dump(Works().get(), f)
259
+
260
+ with open(Path(tmpdir, "test.json")) as f:
261
+ works = [Work(w) for w in json.load(f)]
262
+
263
+ assert len(works) == 25
264
+ assert all(isinstance(w, Work) for w in works)
265
+
266
+
267
+ @pytest.mark.skip("This test is not working due to unavailable API.")
247
268
  def test_ngrams_without_metadata():
248
269
  r = Works()["W2023271753"].ngrams(return_meta=False)
249
270
 
250
271
  assert len(r) == 1068
251
272
 
252
273
 
274
+ @pytest.mark.skip("This test is not working due to unavailable API.")
253
275
  def test_ngrams_with_metadata():
254
276
  r, meta = Works()["W2023271753"].ngrams(return_meta=True)
255
277
 
@@ -261,33 +283,80 @@ def test_random_publishers():
261
283
 
262
284
 
263
285
  def test_and_operator():
264
- # https://github.com/J535D165/pyalex/issues/11
265
- url = "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022"
286
+ urls = [
287
+ "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022",
288
+ "https://api.openalex.org/works?filter=institutions.country_code:tw+hk+us,publication_year:2022",
289
+ ]
266
290
 
267
291
  assert (
268
- url
269
- == Works()
292
+ Works()
270
293
  .filter(
271
294
  institutions={"country_code": ["tw", "hk", "us"]}, publication_year=2022
272
295
  )
273
296
  .url
297
+ in urls
274
298
  )
275
299
  assert (
276
- url
277
- == Works()
300
+ Works()
278
301
  .filter(institutions={"country_code": "tw"})
279
302
  .filter(institutions={"country_code": "hk"})
280
303
  .filter(institutions={"country_code": "us"})
281
304
  .filter(publication_year=2022)
282
305
  .url
306
+ in urls
283
307
  )
284
308
  assert (
285
- url
286
- == Works()
309
+ Works()
287
310
  .filter(institutions={"country_code": ["tw", "hk"]})
288
311
  .filter(institutions={"country_code": "us"})
289
312
  .filter(publication_year=2022)
290
313
  .url
314
+ in urls
315
+ )
316
+
317
+
318
+ def test_or_operator():
319
+ assert (
320
+ Works()
321
+ .filter_or(
322
+ institutions={"country_code": ["tw", "hk", "us"]}, publication_year=2022
323
+ )
324
+ .url
325
+ == "https://api.openalex.org/works?filter=institutions.country_code:tw|hk|us,publication_year:2022"
326
+ )
327
+
328
+
329
+ def test_not_operator():
330
+ assert (
331
+ Works()
332
+ .filter_not(institutions={"country_code": "us"})
333
+ .filter(publication_year=2022)
334
+ .url
335
+ == "https://api.openalex.org/works?filter=institutions.country_code:!us,publication_year:2022"
336
+ )
337
+
338
+
339
+ def test_not_operator_list():
340
+ assert (
341
+ Works()
342
+ .filter_not(institutions={"country_code": ["tw", "hk", "us"]})
343
+ .filter(publication_year=2022)
344
+ .url
345
+ == "https://api.openalex.org/works?filter=institutions.country_code:!tw+!hk+!us,publication_year:2022"
346
+ )
347
+
348
+
349
+ @pytest.mark.skip("Wait for feedback on issue by OpenAlex")
350
+ def test_combined_operators():
351
+ # works:
352
+ # https://api.openalex.org/works?filter=publication_year:>2022,publication_year:!2023
353
+
354
+ # doesn't work
355
+ # https://api.openalex.org/works?filter=publication_year:>2022+!2023
356
+
357
+ assert (
358
+ Works().filter_gt(publication_year=2022).filter_not(publication_year=2023).url
359
+ == "https://api.openalex.org/works?filter=publication_year:>2022+!2023"
291
360
  )
292
361
 
293
362
 
@@ -342,11 +411,10 @@ def test_filter_urlencoding():
342
411
  )
343
412
 
344
413
 
345
- @pytest.mark.skip("This test is not working due to inconsistencies in the API.")
346
414
  def test_urlencoding_list():
347
415
  assert (
348
416
  Works()
349
- .filter(
417
+ .filter_or(
350
418
  doi=[
351
419
  "https://doi.org/10.1207/s15327809jls0703&4_2",
352
420
  "https://doi.org/10.1001/jama.264.8.944b",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes