commonmeta-py 0.104__py3-none-any.whl → 0.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +1 -1
- commonmeta/api_utils.py +7 -6
- commonmeta/author_utils.py +10 -10
- commonmeta/base_utils.py +3 -2
- commonmeta/cli.py +3 -0
- commonmeta/date_utils.py +3 -2
- commonmeta/doi_utils.py +4 -14
- commonmeta/metadata.py +5 -1
- commonmeta/readers/cff_reader.py +9 -8
- commonmeta/readers/codemeta_reader.py +14 -13
- commonmeta/readers/crossref_reader.py +6 -5
- commonmeta/readers/crossref_xml_reader.py +20 -19
- commonmeta/readers/datacite_reader.py +21 -19
- commonmeta/readers/datacite_xml_reader.py +7 -6
- commonmeta/readers/inveniordm_reader.py +15 -16
- commonmeta/readers/json_feed_reader.py +25 -22
- commonmeta/readers/openalex_reader.py +31 -19
- commonmeta/readers/ris_reader.py +4 -4
- commonmeta/readers/schema_org_reader.py +31 -29
- commonmeta/schema_utils.py +1 -0
- commonmeta/translators.py +2 -1
- commonmeta/utils.py +81 -7
- {commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/METADATA +16 -15
- {commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/RECORD +27 -29
- commonmeta/resources/ietf-bcp-47.json +0 -3025
- commonmeta/resources/iso-8601.json +0 -3182
- {commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.104.dist-info → commonmeta_py-0.106.dist-info}/licenses/LICENSE +0 -0
commonmeta/__init__.py
CHANGED
commonmeta/api_utils.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
"""API Utils module for commonmeta-py"""
|
2
2
|
|
3
|
-
from typing import Optional
|
4
3
|
from datetime import datetime as date
|
5
|
-
import
|
6
|
-
|
4
|
+
from typing import Optional
|
5
|
+
|
7
6
|
import jwt
|
7
|
+
import requests
|
8
|
+
from furl import furl
|
8
9
|
|
9
|
-
from .doi_utils import
|
10
|
+
from .doi_utils import doi_as_url, validate_doi
|
10
11
|
from .readers.json_feed_reader import get_json_feed_item_uuid
|
11
12
|
|
12
13
|
|
@@ -52,7 +53,7 @@ def update_ghost_post_via_api(
|
|
52
53
|
f = furl(url)
|
53
54
|
slug = f.path.segments[-1]
|
54
55
|
ghost_url = f"{api_url}/ghost/api/admin/posts/slug/{slug}/"
|
55
|
-
response =
|
56
|
+
response = requests.get(ghost_url, headers=headers, timeout=10)
|
56
57
|
if response.status_code != 200:
|
57
58
|
return {"error": "Error fetching post"}
|
58
59
|
ghost_post = response.json().get("posts")[0]
|
@@ -67,7 +68,7 @@ def update_ghost_post_via_api(
|
|
67
68
|
ghost_url = f"{api_url}/ghost/api/admin/posts/{guid}/"
|
68
69
|
|
69
70
|
json = {"posts": [{"canonical_url": doi, "updated_at": updated_at}]}
|
70
|
-
response =
|
71
|
+
response = requests.put(
|
71
72
|
ghost_url,
|
72
73
|
headers=headers,
|
73
74
|
json=json,
|
commonmeta/author_utils.py
CHANGED
@@ -2,23 +2,23 @@
|
|
2
2
|
|
3
3
|
import re
|
4
4
|
from typing import List
|
5
|
+
|
6
|
+
from furl import furl
|
5
7
|
from nameparser import HumanName
|
6
8
|
from pydash import py_
|
7
|
-
from furl import furl
|
8
9
|
|
10
|
+
from .base_utils import compact, parse_attributes, presence, wrap
|
11
|
+
from .constants import (
|
12
|
+
COMMONMETA_CONTRIBUTOR_ROLES,
|
13
|
+
)
|
9
14
|
from .utils import (
|
10
|
-
|
15
|
+
format_name_identifier,
|
11
16
|
normalize_id,
|
12
|
-
normalize_ror,
|
13
17
|
normalize_isni,
|
14
|
-
|
15
|
-
|
18
|
+
normalize_orcid,
|
19
|
+
normalize_ror,
|
16
20
|
validate_orcid,
|
17
|
-
|
18
|
-
from .base_utils import parse_attributes, wrap, presence, compact
|
19
|
-
|
20
|
-
from .constants import (
|
21
|
-
COMMONMETA_CONTRIBUTOR_ROLES,
|
21
|
+
validate_ror,
|
22
22
|
)
|
23
23
|
|
24
24
|
|
commonmeta/base_utils.py
CHANGED
commonmeta/cli.py
CHANGED
@@ -12,6 +12,7 @@ from commonmeta.readers.datacite_reader import get_random_datacite_id
|
|
12
12
|
from commonmeta.readers.json_feed_reader import (
|
13
13
|
get_json_feed_item_uuid,
|
14
14
|
)
|
15
|
+
from commonmeta.readers.openalex_reader import get_random_openalex_id
|
15
16
|
|
16
17
|
|
17
18
|
@click.group()
|
@@ -131,6 +132,8 @@ def sample(provider, prefix, type, number, to, style, locale, show_errors):
|
|
131
132
|
)
|
132
133
|
elif provider == "datacite":
|
133
134
|
string = json.dumps({"items": get_random_datacite_id(number)})
|
135
|
+
elif provider == "openalex":
|
136
|
+
string = json.dumps({"items": get_random_openalex_id(number)})
|
134
137
|
else:
|
135
138
|
output = "Provider not supported. Use 'crossref' or 'datacite' instead."
|
136
139
|
click.echo(output)
|
commonmeta/date_utils.py
CHANGED
@@ -3,10 +3,11 @@
|
|
3
3
|
import datetime
|
4
4
|
from datetime import datetime as dt
|
5
5
|
from typing import Optional, Union
|
6
|
+
|
6
7
|
import dateparser
|
7
|
-
from edtf import parse_edtf, DateAndTime, Date
|
8
|
-
from edtf.parser.edtf_exceptions import EDTFParseException
|
9
8
|
import pydash as py_
|
9
|
+
from edtf import Date, DateAndTime, parse_edtf
|
10
|
+
from edtf.parser.edtf_exceptions import EDTFParseException
|
10
11
|
|
11
12
|
from .base_utils import compact
|
12
13
|
|
commonmeta/doi_utils.py
CHANGED
@@ -4,7 +4,7 @@ import re
|
|
4
4
|
from typing import Optional
|
5
5
|
|
6
6
|
import base32_lib as base32
|
7
|
-
import
|
7
|
+
import requests
|
8
8
|
from furl import furl
|
9
9
|
|
10
10
|
from .base_utils import compact
|
@@ -94,7 +94,7 @@ def short_doi_as_doi(doi: Optional[str]) -> Optional[str]:
|
|
94
94
|
doi_url = doi_as_url(doi)
|
95
95
|
if doi_url is None:
|
96
96
|
return None
|
97
|
-
response =
|
97
|
+
response = requests.head(doi_url, timeout=10)
|
98
98
|
if response.status_code != 301:
|
99
99
|
return doi_url
|
100
100
|
return response.headers.get("Location")
|
@@ -137,7 +137,7 @@ def get_doi_ra(doi) -> Optional[str]:
|
|
137
137
|
prefix = validate_prefix(doi)
|
138
138
|
if prefix is None:
|
139
139
|
return None
|
140
|
-
response =
|
140
|
+
response = requests.get("https://doi.org/ra/" + prefix, timeout=10)
|
141
141
|
if response.status_code != 200:
|
142
142
|
return None
|
143
143
|
return response.json()[0].get("RA", None)
|
@@ -170,7 +170,7 @@ def decode_doi(doi: str, checksum: bool = True) -> int:
|
|
170
170
|
|
171
171
|
def get_crossref_member(member_id) -> Optional[dict]:
|
172
172
|
"""Return the Crossref member for a given member_id"""
|
173
|
-
response =
|
173
|
+
response = requests.get("https://api.crossref.org/members/" + member_id, timeout=10)
|
174
174
|
if response.status_code != 200:
|
175
175
|
return None
|
176
176
|
data = response.json().get("message", None)
|
@@ -302,16 +302,6 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
|
|
302
302
|
return f"https://api.datacite.org/dois?random=true&page[size]={number}"
|
303
303
|
|
304
304
|
|
305
|
-
def openalex_api_url(doi: str, **kwargs) -> str:
|
306
|
-
"""Return the OpenAlex API URL for a given DOI"""
|
307
|
-
return f"https://api.openalex.org/works/{doi}"
|
308
|
-
|
309
|
-
|
310
|
-
def openalex_api_sample_url(number: int = 1, **kwargs) -> str:
|
311
|
-
"""Return the OpenAlex API URL for a sample of dois"""
|
312
|
-
return f"https://api.openalex.org/works?sample={number}"
|
313
|
-
|
314
|
-
|
315
305
|
def is_rogue_scholar_doi(doi: str) -> bool:
|
316
306
|
"""Return True if DOI is from Rogue Scholar"""
|
317
307
|
prefix = validate_prefix(doi)
|
commonmeta/metadata.py
CHANGED
@@ -199,6 +199,7 @@ class Metadata:
|
|
199
199
|
"codemeta",
|
200
200
|
"kbase",
|
201
201
|
"inveniordm",
|
202
|
+
"openalex",
|
202
203
|
]:
|
203
204
|
return json.loads(string)
|
204
205
|
else:
|
@@ -237,7 +238,7 @@ class Metadata:
|
|
237
238
|
elif via == "kbase":
|
238
239
|
return dict(read_kbase(data))
|
239
240
|
elif via == "openalex":
|
240
|
-
return read_openalex(data)
|
241
|
+
return dict(read_openalex(data))
|
241
242
|
elif via == "ris":
|
242
243
|
return dict(read_ris(data["data"] if isinstance(data, dict) else data))
|
243
244
|
else:
|
@@ -397,6 +398,7 @@ class MetadataList:
|
|
397
398
|
"crossref",
|
398
399
|
"datacite",
|
399
400
|
"schema_org",
|
401
|
+
"openalex",
|
400
402
|
"csl",
|
401
403
|
"json_feed_item",
|
402
404
|
]:
|
@@ -425,6 +427,8 @@ class MetadataList:
|
|
425
427
|
raise ValueError("Schema.org not supported for metadata lists")
|
426
428
|
elif to == "datacite":
|
427
429
|
raise ValueError("Datacite not supported for metadata lists")
|
430
|
+
elif to == "openalex":
|
431
|
+
raise ValueError("OpenAlex not supported for metadata lists")
|
428
432
|
elif to == "crossref_xml":
|
429
433
|
return write_crossref_xml_list(self)
|
430
434
|
else:
|
commonmeta/readers/cff_reader.py
CHANGED
@@ -2,26 +2,27 @@
|
|
2
2
|
|
3
3
|
from typing import Optional
|
4
4
|
from urllib.parse import urlparse
|
5
|
-
|
5
|
+
|
6
|
+
import requests
|
6
7
|
import yaml
|
7
8
|
|
9
|
+
from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
|
10
|
+
from ..constants import Commonmeta
|
11
|
+
from ..date_utils import get_iso8601_date
|
8
12
|
from ..utils import (
|
9
|
-
normalize_id,
|
10
|
-
name_to_fos,
|
11
13
|
dict_to_spdx,
|
12
|
-
normalize_orcid,
|
13
14
|
github_as_cff_url,
|
14
15
|
github_as_repo_url,
|
16
|
+
name_to_fos,
|
17
|
+
normalize_id,
|
18
|
+
normalize_orcid,
|
15
19
|
)
|
16
|
-
from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
|
17
|
-
from ..date_utils import get_iso8601_date
|
18
|
-
from ..constants import Commonmeta
|
19
20
|
|
20
21
|
|
21
22
|
def get_cff(pid: str, **kwargs) -> dict:
|
22
23
|
"""get_cff"""
|
23
24
|
url = github_as_cff_url(pid)
|
24
|
-
response =
|
25
|
+
response = requests.get(url, timeout=10, **kwargs)
|
25
26
|
if response.status_code != 200:
|
26
27
|
return {"state": "not_found"}
|
27
28
|
text = response.text
|
@@ -1,30 +1,31 @@
|
|
1
1
|
"""codemeta reader for commonmeta-py"""
|
2
2
|
|
3
|
-
from typing import Optional
|
4
3
|
from collections import defaultdict
|
5
|
-
import
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import requests
|
6
7
|
|
8
|
+
from ..author_utils import get_authors
|
9
|
+
from ..base_utils import compact, presence, sanitize, wrap
|
10
|
+
from ..constants import (
|
11
|
+
SO_TO_CM_TRANSLATIONS,
|
12
|
+
Commonmeta,
|
13
|
+
)
|
7
14
|
from ..utils import (
|
8
|
-
normalize_id,
|
9
|
-
from_schema_org_creators,
|
10
|
-
name_to_fos,
|
11
15
|
dict_to_spdx,
|
16
|
+
doi_from_url,
|
17
|
+
from_schema_org_creators,
|
12
18
|
github_as_codemeta_url,
|
13
19
|
github_as_repo_url,
|
14
|
-
|
15
|
-
|
16
|
-
from ..base_utils import wrap, presence, compact, sanitize
|
17
|
-
from ..author_utils import get_authors
|
18
|
-
from ..constants import (
|
19
|
-
Commonmeta,
|
20
|
-
SO_TO_CM_TRANSLATIONS,
|
20
|
+
name_to_fos,
|
21
|
+
normalize_id,
|
21
22
|
)
|
22
23
|
|
23
24
|
|
24
25
|
def get_codemeta(pid: str, **kwargs) -> dict:
|
25
26
|
"""get_codemeta"""
|
26
27
|
url = str(github_as_codemeta_url(pid))
|
27
|
-
response =
|
28
|
+
response = requests.get(url, timeout=10, **kwargs)
|
28
29
|
if response.status_code != 200:
|
29
30
|
return {"state": "not_found"}
|
30
31
|
data = response.json()
|
@@ -2,8 +2,9 @@
|
|
2
2
|
|
3
3
|
from typing import Optional
|
4
4
|
|
5
|
-
import
|
5
|
+
import requests
|
6
6
|
from pydash import py_
|
7
|
+
from requests.exceptions import ConnectionError, ReadTimeout
|
7
8
|
|
8
9
|
from ..author_utils import get_authors
|
9
10
|
from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
|
@@ -34,7 +35,7 @@ from ..utils import (
|
|
34
35
|
def get_crossref_list(query: dict, **kwargs) -> list[dict]:
|
35
36
|
"""get_crossref list from Crossref API."""
|
36
37
|
url = crossref_api_query_url(query, **kwargs)
|
37
|
-
response =
|
38
|
+
response = requests.get(url, timeout=30, **kwargs)
|
38
39
|
if response.status_code != 200:
|
39
40
|
return []
|
40
41
|
return response.json().get("message", {}).get("items", [])
|
@@ -46,7 +47,7 @@ def get_crossref(pid: str, **kwargs) -> dict:
|
|
46
47
|
if doi is None:
|
47
48
|
return {"state": "not_found"}
|
48
49
|
url = crossref_api_url(doi)
|
49
|
-
response =
|
50
|
+
response = requests.get(url, timeout=10, **kwargs)
|
50
51
|
if response.status_code != 200:
|
51
52
|
return {"state": "not_found"}
|
52
53
|
return response.json().get("message", {}) | {"via": "crossref"}
|
@@ -402,11 +403,11 @@ def get_random_crossref_id(number: int = 1, **kwargs) -> list:
|
|
402
403
|
number = 20 if number > 20 else number
|
403
404
|
url = crossref_api_sample_url(number, **kwargs)
|
404
405
|
try:
|
405
|
-
response =
|
406
|
+
response = requests.get(url, timeout=10)
|
406
407
|
if response.status_code != 200:
|
407
408
|
return []
|
408
409
|
|
409
410
|
items = py_.get(response.json(), "message.items")
|
410
411
|
return [i.get("DOI") for i in items]
|
411
|
-
except (
|
412
|
+
except (ReadTimeout, ConnectionError):
|
412
413
|
return []
|
@@ -1,34 +1,35 @@
|
|
1
1
|
"""crossref_xml reader for commonmeta-py"""
|
2
2
|
|
3
|
-
from typing import Optional
|
4
3
|
from collections import defaultdict
|
5
|
-
import
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import requests
|
6
7
|
from pydash import py_
|
7
8
|
|
8
|
-
from ..
|
9
|
-
doi_from_url,
|
10
|
-
dict_to_spdx,
|
11
|
-
from_crossref_xml,
|
12
|
-
normalize_cc_url,
|
13
|
-
normalize_issn,
|
14
|
-
normalize_url,
|
15
|
-
)
|
9
|
+
from ..author_utils import get_authors
|
16
10
|
from ..base_utils import (
|
17
11
|
compact,
|
18
|
-
wrap,
|
19
|
-
presence,
|
20
|
-
sanitize,
|
21
12
|
parse_attributes,
|
22
13
|
parse_xml,
|
14
|
+
presence,
|
15
|
+
sanitize,
|
16
|
+
wrap,
|
23
17
|
)
|
24
|
-
from ..author_utils import get_authors
|
25
|
-
from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
|
26
|
-
from ..doi_utils import get_doi_ra, crossref_xml_api_url, normalize_doi
|
27
18
|
from ..constants import (
|
28
|
-
|
19
|
+
CR_TO_CM_CONTAINER_TRANSLATIONS,
|
29
20
|
CR_TO_CM_TRANSLATIONS,
|
30
21
|
CROSSREF_CONTAINER_TYPES,
|
31
|
-
|
22
|
+
Commonmeta,
|
23
|
+
)
|
24
|
+
from ..date_utils import get_date_from_crossref_parts, get_iso8601_date
|
25
|
+
from ..doi_utils import crossref_xml_api_url, get_doi_ra, normalize_doi
|
26
|
+
from ..utils import (
|
27
|
+
dict_to_spdx,
|
28
|
+
doi_from_url,
|
29
|
+
from_crossref_xml,
|
30
|
+
normalize_cc_url,
|
31
|
+
normalize_issn,
|
32
|
+
normalize_url,
|
32
33
|
)
|
33
34
|
|
34
35
|
|
@@ -38,7 +39,7 @@ def get_crossref_xml(pid: str, **kwargs) -> dict:
|
|
38
39
|
if doi is None:
|
39
40
|
return {"state": "not_found"}
|
40
41
|
url = crossref_xml_api_url(doi)
|
41
|
-
response =
|
42
|
+
response = requests.get(
|
42
43
|
url, headers={"Accept": "text/xml;charset=utf-8"}, timeout=10, **kwargs
|
43
44
|
)
|
44
45
|
if response.status_code != 200:
|
@@ -2,29 +2,31 @@
|
|
2
2
|
|
3
3
|
from collections import defaultdict
|
4
4
|
from typing import Optional
|
5
|
-
|
5
|
+
|
6
|
+
import requests
|
6
7
|
from pydash import py_
|
8
|
+
from requests.exceptions import ReadTimeout
|
7
9
|
|
8
|
-
from ..utils import (
|
9
|
-
normalize_url,
|
10
|
-
normalize_doi,
|
11
|
-
normalize_cc_url,
|
12
|
-
dict_to_spdx,
|
13
|
-
format_name_identifier,
|
14
|
-
)
|
15
|
-
from ..base_utils import compact, wrap, presence
|
16
10
|
from ..author_utils import get_authors
|
11
|
+
from ..base_utils import compact, presence, wrap
|
12
|
+
from ..constants import (
|
13
|
+
DC_TO_CM_CONTAINER_TRANSLATIONS,
|
14
|
+
DC_TO_CM_TRANSLATIONS,
|
15
|
+
Commonmeta,
|
16
|
+
)
|
17
17
|
from ..date_utils import normalize_date_dict
|
18
18
|
from ..doi_utils import (
|
19
|
+
datacite_api_sample_url,
|
20
|
+
datacite_api_url,
|
19
21
|
doi_as_url,
|
20
22
|
doi_from_url,
|
21
|
-
datacite_api_url,
|
22
|
-
datacite_api_sample_url,
|
23
23
|
)
|
24
|
-
from ..
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
from ..utils import (
|
25
|
+
dict_to_spdx,
|
26
|
+
format_name_identifier,
|
27
|
+
normalize_cc_url,
|
28
|
+
normalize_doi,
|
29
|
+
normalize_url,
|
28
30
|
)
|
29
31
|
|
30
32
|
|
@@ -35,11 +37,11 @@ def get_datacite(pid: str, **kwargs) -> dict:
|
|
35
37
|
return {"state": "not_found"}
|
36
38
|
url = datacite_api_url(doi)
|
37
39
|
try:
|
38
|
-
response =
|
40
|
+
response = requests.get(url, timeout=10, **kwargs)
|
39
41
|
if response.status_code != 200:
|
40
42
|
return {"state": "not_found"}
|
41
43
|
return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite"}
|
42
|
-
except
|
44
|
+
except ReadTimeout:
|
43
45
|
return {"state": "timeout"}
|
44
46
|
|
45
47
|
|
@@ -380,11 +382,11 @@ def get_random_datacite_id(number: int = 1) -> list:
|
|
380
382
|
number = 20 if number > 20 else number
|
381
383
|
url = datacite_api_sample_url(number)
|
382
384
|
try:
|
383
|
-
response =
|
385
|
+
response = requests.get(url, timeout=60)
|
384
386
|
if response.status_code != 200:
|
385
387
|
return []
|
386
388
|
|
387
389
|
items = py_.get(response.json(), "data")
|
388
390
|
return [i.get("id") for i in items]
|
389
|
-
except
|
391
|
+
except ReadTimeout:
|
390
392
|
return []
|
@@ -1,15 +1,16 @@
|
|
1
1
|
"""datacite_xml reader for Commonmeta"""
|
2
2
|
|
3
3
|
from collections import defaultdict
|
4
|
-
|
4
|
+
|
5
|
+
import requests
|
5
6
|
from pydash import py_
|
6
7
|
|
7
|
-
from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
|
8
8
|
from ..author_utils import get_authors
|
9
|
-
from ..
|
10
|
-
from ..doi_utils import doi_from_url, doi_as_url, datacite_api_url, normalize_doi
|
11
|
-
from ..utils import normalize_url, normalize_cc_url, dict_to_spdx
|
9
|
+
from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
|
12
10
|
from ..constants import DC_TO_CM_TRANSLATIONS, Commonmeta
|
11
|
+
from ..date_utils import normalize_date_dict, strip_milliseconds
|
12
|
+
from ..doi_utils import datacite_api_url, doi_as_url, doi_from_url, normalize_doi
|
13
|
+
from ..utils import dict_to_spdx, normalize_cc_url, normalize_url
|
13
14
|
|
14
15
|
|
15
16
|
def get_datacite_xml(pid: str, **kwargs) -> dict:
|
@@ -18,7 +19,7 @@ def get_datacite_xml(pid: str, **kwargs) -> dict:
|
|
18
19
|
if doi is None:
|
19
20
|
return {"state": "not_found"}
|
20
21
|
url = datacite_api_url(doi)
|
21
|
-
response =
|
22
|
+
response = requests.get(url, timeout=10, **kwargs)
|
22
23
|
if response.status_code != 200:
|
23
24
|
return {"state": "not_found"}
|
24
25
|
return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite_xml"}
|
@@ -1,27 +1,27 @@
|
|
1
1
|
"""InvenioRDM reader for Commonmeta"""
|
2
2
|
|
3
|
-
import
|
4
|
-
from pydash import py_
|
3
|
+
import requests
|
5
4
|
from furl import furl
|
5
|
+
from pydash import py_
|
6
6
|
|
7
|
+
from ..author_utils import get_authors
|
8
|
+
from ..base_utils import compact, presence, sanitize, wrap
|
9
|
+
from ..constants import (
|
10
|
+
COMMONMETA_RELATION_TYPES,
|
11
|
+
INVENIORDM_TO_CM_TRANSLATIONS,
|
12
|
+
Commonmeta,
|
13
|
+
)
|
14
|
+
from ..date_utils import strip_milliseconds
|
15
|
+
from ..doi_utils import doi_as_url, doi_from_url
|
7
16
|
from ..utils import (
|
8
|
-
normalize_url,
|
9
|
-
normalize_doi,
|
10
17
|
dict_to_spdx,
|
11
|
-
name_to_fos,
|
12
18
|
from_inveniordm,
|
13
19
|
get_language,
|
20
|
+
name_to_fos,
|
21
|
+
normalize_doi,
|
22
|
+
normalize_url,
|
14
23
|
validate_ror,
|
15
24
|
)
|
16
|
-
from ..base_utils import compact, wrap, presence, sanitize
|
17
|
-
from ..author_utils import get_authors
|
18
|
-
from ..date_utils import strip_milliseconds
|
19
|
-
from ..doi_utils import doi_as_url, doi_from_url
|
20
|
-
from ..constants import (
|
21
|
-
INVENIORDM_TO_CM_TRANSLATIONS,
|
22
|
-
COMMONMETA_RELATION_TYPES,
|
23
|
-
Commonmeta,
|
24
|
-
)
|
25
25
|
|
26
26
|
|
27
27
|
def get_inveniordm(pid: str, **kwargs) -> dict:
|
@@ -29,7 +29,7 @@ def get_inveniordm(pid: str, **kwargs) -> dict:
|
|
29
29
|
if pid is None:
|
30
30
|
return {"state": "not_found"}
|
31
31
|
url = normalize_url(pid)
|
32
|
-
response =
|
32
|
+
response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
|
33
33
|
if response.status_code != 200:
|
34
34
|
return {"state": "not_found"}
|
35
35
|
return response.json()
|
@@ -63,7 +63,6 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
|
|
63
63
|
|
64
64
|
title = py_.get(meta, "metadata.title")
|
65
65
|
titles = [{"title": sanitize(title)}] if title else None
|
66
|
-
additional_titles = py_.get(meta, "metadata.additional_titles")
|
67
66
|
# if additional_titles:
|
68
67
|
# titles += [{"title": sanitize("bla")} for i in wrap(additional_titles)]
|
69
68
|
|
@@ -1,33 +1,34 @@
|
|
1
1
|
"""JSON Feed reader for commonmeta-py"""
|
2
2
|
|
3
3
|
from typing import Optional
|
4
|
-
|
5
|
-
|
4
|
+
|
5
|
+
import requests
|
6
6
|
from furl import furl
|
7
|
+
from pydash import py_
|
7
8
|
|
9
|
+
from ..author_utils import get_authors
|
10
|
+
from ..base_utils import parse_attributes, presence, sanitize
|
11
|
+
from ..constants import Commonmeta
|
12
|
+
from ..date_utils import get_date_from_unix_timestamp
|
13
|
+
from ..doi_utils import (
|
14
|
+
doi_from_url,
|
15
|
+
encode_doi,
|
16
|
+
is_rogue_scholar_doi,
|
17
|
+
normalize_doi,
|
18
|
+
validate_doi,
|
19
|
+
validate_prefix,
|
20
|
+
)
|
8
21
|
from ..utils import (
|
9
22
|
compact,
|
10
|
-
normalize_url,
|
11
|
-
from_json_feed,
|
12
|
-
wrap,
|
13
23
|
dict_to_spdx,
|
24
|
+
from_json_feed,
|
25
|
+
issn_as_url,
|
14
26
|
name_to_fos,
|
27
|
+
normalize_url,
|
15
28
|
validate_ror,
|
16
29
|
validate_url,
|
17
|
-
|
18
|
-
)
|
19
|
-
from ..author_utils import get_authors
|
20
|
-
from ..base_utils import presence, sanitize, parse_attributes
|
21
|
-
from ..date_utils import get_date_from_unix_timestamp
|
22
|
-
from ..doi_utils import (
|
23
|
-
normalize_doi,
|
24
|
-
validate_prefix,
|
25
|
-
validate_doi,
|
26
|
-
doi_from_url,
|
27
|
-
is_rogue_scholar_doi,
|
28
|
-
encode_doi,
|
30
|
+
wrap,
|
29
31
|
)
|
30
|
-
from ..constants import Commonmeta
|
31
32
|
|
32
33
|
|
33
34
|
def get_json_feed_item(pid: str, **kwargs) -> dict:
|
@@ -35,7 +36,7 @@ def get_json_feed_item(pid: str, **kwargs) -> dict:
|
|
35
36
|
if pid is None:
|
36
37
|
return {"state": "not_found"}
|
37
38
|
url = normalize_url(pid)
|
38
|
-
response =
|
39
|
+
response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
|
39
40
|
if response.status_code != 200:
|
40
41
|
return {"state": "not_found"}
|
41
42
|
return response.json() | {"via": "json_feed_item"}
|
@@ -255,7 +256,9 @@ def get_funding_references(meta: Optional[dict]) -> Optional[list]:
|
|
255
256
|
elif len(urls) == 2 and validate_ror(urls[0]):
|
256
257
|
f = furl(urls[0])
|
257
258
|
_id = f.path.segments[-1]
|
258
|
-
response =
|
259
|
+
response = requests.get(
|
260
|
+
f"https://api.ror.org/organizations/{_id}", timeout=10
|
261
|
+
)
|
259
262
|
ror = response.json()
|
260
263
|
funder_name = ror.get("name", None)
|
261
264
|
funder_identifier = urls[0]
|
@@ -398,7 +401,7 @@ def get_json_feed_item_uuid(id: str):
|
|
398
401
|
if id is None:
|
399
402
|
return None
|
400
403
|
url = f"https://api.rogue-scholar.org/posts/{id}"
|
401
|
-
response =
|
404
|
+
response = requests.get(url, timeout=10)
|
402
405
|
if response.status_code != 200:
|
403
406
|
return response.json()
|
404
407
|
post = response.json()
|
@@ -426,7 +429,7 @@ def get_json_feed_blog_slug(id: str):
|
|
426
429
|
if id is None:
|
427
430
|
return None
|
428
431
|
url = f"https://api.rogue-scholar.org/posts/{id}"
|
429
|
-
response =
|
432
|
+
response = requests.get(url, timeout=10)
|
430
433
|
if response.status_code != 200:
|
431
434
|
return response.json()
|
432
435
|
post = response.json()
|