commonmeta-py 0.101__py3-none-any.whl → 0.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +51 -50
- commonmeta/base_utils.py +1 -0
- commonmeta/constants.py +35 -1
- commonmeta/crossref_utils.py +11 -8
- commonmeta/date_utils.py +1 -0
- commonmeta/doi_utils.py +42 -14
- commonmeta/metadata.py +209 -100
- commonmeta/readers/cff_reader.py +1 -0
- commonmeta/readers/codemeta_reader.py +1 -0
- commonmeta/readers/commonmeta_reader.py +1 -0
- commonmeta/readers/crossref_reader.py +19 -18
- commonmeta/readers/csl_reader.py +4 -1
- commonmeta/readers/inveniordm_reader.py +14 -9
- commonmeta/readers/json_feed_reader.py +9 -3
- commonmeta/readers/kbase_reader.py +1 -0
- commonmeta/readers/openalex_reader.py +380 -0
- commonmeta/readers/ris_reader.py +1 -0
- commonmeta/schema_utils.py +1 -0
- commonmeta/utils.py +121 -16
- commonmeta/writers/bibtex_writer.py +1 -0
- commonmeta/writers/citation_writer.py +1 -0
- commonmeta/writers/crossref_xml_writer.py +1 -0
- commonmeta/writers/csl_writer.py +1 -0
- commonmeta/writers/datacite_writer.py +1 -0
- commonmeta/writers/ris_writer.py +1 -0
- commonmeta/writers/schema_org_writer.py +1 -0
- {commonmeta_py-0.101.dist-info → commonmeta_py-0.103.dist-info}/METADATA +5 -8
- {commonmeta_py-0.101.dist-info → commonmeta_py-0.103.dist-info}/RECORD +31 -30
- {commonmeta_py-0.101.dist-info → commonmeta_py-0.103.dist-info}/licenses/LICENSE +1 -1
- {commonmeta_py-0.101.dist-info → commonmeta_py-0.103.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.101.dist-info → commonmeta_py-0.103.dist-info}/entry_points.txt +0 -0
commonmeta/__init__.py
CHANGED
@@ -10,11 +10,50 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.103"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
17
17
|
# ruff: noqa: F401
|
18
|
+
from .author_utils import (
|
19
|
+
authors_as_string,
|
20
|
+
cleanup_author,
|
21
|
+
get_affiliations,
|
22
|
+
get_authors,
|
23
|
+
get_one_author,
|
24
|
+
is_personal_name,
|
25
|
+
)
|
26
|
+
from .base_utils import (
|
27
|
+
compact,
|
28
|
+
parse_attributes,
|
29
|
+
presence,
|
30
|
+
sanitize,
|
31
|
+
unwrap,
|
32
|
+
wrap,
|
33
|
+
)
|
34
|
+
from .date_utils import (
|
35
|
+
get_date_from_crossref_parts,
|
36
|
+
get_date_from_date_parts,
|
37
|
+
get_date_from_unix_timestamp,
|
38
|
+
get_date_parts,
|
39
|
+
get_iso8601_date,
|
40
|
+
strip_milliseconds,
|
41
|
+
)
|
42
|
+
from .doi_utils import (
|
43
|
+
crossref_api_url,
|
44
|
+
crossref_xml_api_url,
|
45
|
+
datacite_api_url,
|
46
|
+
decode_doi,
|
47
|
+
doi_as_url,
|
48
|
+
doi_from_url,
|
49
|
+
doi_resolver,
|
50
|
+
encode_doi,
|
51
|
+
get_doi_ra,
|
52
|
+
is_rogue_scholar_doi,
|
53
|
+
normalize_doi,
|
54
|
+
validate_doi,
|
55
|
+
validate_prefix,
|
56
|
+
)
|
18
57
|
from .metadata import Metadata, MetadataList
|
19
58
|
from .readers import (
|
20
59
|
cff_reader,
|
@@ -26,23 +65,15 @@ from .readers import (
|
|
26
65
|
inveniordm_reader,
|
27
66
|
json_feed_reader,
|
28
67
|
kbase_reader,
|
68
|
+
openalex_reader,
|
29
69
|
ris_reader,
|
30
70
|
schema_org_reader,
|
31
71
|
)
|
32
|
-
from .writers import (
|
33
|
-
bibtex_writer,
|
34
|
-
citation_writer,
|
35
|
-
commonmeta_writer,
|
36
|
-
csl_writer,
|
37
|
-
datacite_writer,
|
38
|
-
ris_writer,
|
39
|
-
schema_org_writer,
|
40
|
-
)
|
41
72
|
from .utils import (
|
42
73
|
dict_to_spdx,
|
74
|
+
extract_curie,
|
43
75
|
extract_url,
|
44
76
|
extract_urls,
|
45
|
-
extract_curie,
|
46
77
|
from_csl,
|
47
78
|
from_json_feed,
|
48
79
|
from_schema_org,
|
@@ -53,8 +84,8 @@ from .utils import (
|
|
53
84
|
normalize_id,
|
54
85
|
normalize_ids,
|
55
86
|
normalize_orcid,
|
56
|
-
normalize_url,
|
57
87
|
normalize_ror,
|
88
|
+
normalize_url,
|
58
89
|
pages_as_string,
|
59
90
|
replace_curie,
|
60
91
|
to_csl,
|
@@ -62,42 +93,12 @@ from .utils import (
|
|
62
93
|
validate_ror,
|
63
94
|
validate_url,
|
64
95
|
)
|
65
|
-
from .
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
from .base_utils import (
|
74
|
-
wrap,
|
75
|
-
unwrap,
|
76
|
-
compact,
|
77
|
-
presence,
|
78
|
-
parse_attributes,
|
79
|
-
sanitize,
|
80
|
-
)
|
81
|
-
from .date_utils import (
|
82
|
-
get_date_from_crossref_parts,
|
83
|
-
get_date_from_date_parts,
|
84
|
-
get_date_from_unix_timestamp,
|
85
|
-
get_date_parts,
|
86
|
-
get_iso8601_date,
|
87
|
-
strip_milliseconds,
|
88
|
-
)
|
89
|
-
from .doi_utils import (
|
90
|
-
crossref_api_url,
|
91
|
-
crossref_xml_api_url,
|
92
|
-
doi_from_url,
|
93
|
-
doi_as_url,
|
94
|
-
doi_resolver,
|
95
|
-
decode_doi,
|
96
|
-
encode_doi,
|
97
|
-
datacite_api_url,
|
98
|
-
get_doi_ra,
|
99
|
-
normalize_doi,
|
100
|
-
validate_doi,
|
101
|
-
validate_prefix,
|
102
|
-
is_rogue_scholar_doi,
|
96
|
+
from .writers import (
|
97
|
+
bibtex_writer,
|
98
|
+
citation_writer,
|
99
|
+
commonmeta_writer,
|
100
|
+
csl_writer,
|
101
|
+
datacite_writer,
|
102
|
+
ris_writer,
|
103
|
+
schema_org_writer,
|
103
104
|
)
|
commonmeta/base_utils.py
CHANGED
commonmeta/constants.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Constants for commonmeta-py"""
|
2
2
|
|
3
|
-
from typing import Optional, TypedDict
|
3
|
+
from typing import List, Optional, TypedDict
|
4
4
|
|
5
5
|
|
6
6
|
class Commonmeta(TypedDict):
|
@@ -464,6 +464,30 @@ CM_TO_SO_TRANSLATIONS = {
|
|
464
464
|
"Presentation": "PresentationDigitalDocument",
|
465
465
|
}
|
466
466
|
|
467
|
+
# source: https://api.openalex.org/works?group_by=type
|
468
|
+
OA_TO_CM_TRANSLATIONS = {
|
469
|
+
"article": "Article",
|
470
|
+
"book": "Book",
|
471
|
+
"book-chapter": "BookChapter",
|
472
|
+
"dataset": "Dataset",
|
473
|
+
"dissertation": "Dissertation",
|
474
|
+
"editorial": "Document",
|
475
|
+
"erratum": "Other",
|
476
|
+
"grant": "Grant",
|
477
|
+
"letter": "Article",
|
478
|
+
"libguides": "InteractiveResource",
|
479
|
+
"other": "Other",
|
480
|
+
"paratext": "Component",
|
481
|
+
"peer-review": "PeerReview",
|
482
|
+
"preprint": "Article",
|
483
|
+
"reference-entry": "Other",
|
484
|
+
"report": "Report",
|
485
|
+
"retraction": "Other",
|
486
|
+
"review": "Article",
|
487
|
+
"standard": "Standard",
|
488
|
+
"supplementary-materials": "Component",
|
489
|
+
}
|
490
|
+
|
467
491
|
SO_TO_DC_RELATION_TYPES = {
|
468
492
|
"citation": "References",
|
469
493
|
"isBasedOn": "IsSupplementedBy",
|
@@ -527,6 +551,16 @@ DC_TO_CM_CONTAINER_TRANSLATIONS = {
|
|
527
551
|
"Series": "Series",
|
528
552
|
}
|
529
553
|
|
554
|
+
OA_TO_CM_CONTAINER_TRANLATIONS = {
|
555
|
+
"journal": "Journal",
|
556
|
+
"repository": "Repository",
|
557
|
+
"conference": "Proceedings",
|
558
|
+
"ebook platform": "Book",
|
559
|
+
"book series": "BookSeries",
|
560
|
+
"metadata": "DataRepository",
|
561
|
+
"Other": "Repository",
|
562
|
+
}
|
563
|
+
|
530
564
|
DATACITE_CONTRIBUTOR_TYPES = {
|
531
565
|
"ContactPerson": "ContactPerson",
|
532
566
|
"DataCollector": "DataCollector",
|
commonmeta/crossref_utils.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
"""Crossref utils module for commonmeta-py"""
|
2
2
|
|
3
|
-
from lxml import etree
|
4
|
-
from typing import Optional
|
5
|
-
from datetime import datetime
|
6
|
-
from dateutil.parser import parse
|
7
3
|
import uuid
|
4
|
+
from datetime import datetime
|
5
|
+
from typing import Optional
|
6
|
+
|
8
7
|
import pydash as py_
|
8
|
+
from dateutil.parser import parse
|
9
9
|
from furl import furl
|
10
|
+
from lxml import etree
|
10
11
|
|
11
|
-
from .constants import
|
12
|
-
from .utils import wrap, compact, normalize_orcid, normalize_id, validate_url
|
12
|
+
from .constants import ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS, Commonmeta
|
13
13
|
from .doi_utils import doi_from_url, validate_doi
|
14
|
+
from .utils import compact, normalize_id, normalize_orcid, validate_url, wrap
|
14
15
|
|
15
16
|
|
16
17
|
def generate_crossref_xml(metadata: Commonmeta) -> Optional[str]:
|
@@ -229,11 +230,12 @@ def insert_citation_list(metadata, xml):
|
|
229
230
|
return xml
|
230
231
|
|
231
232
|
citation_list = etree.SubElement(xml, "citation_list")
|
232
|
-
for ref in metadata.references:
|
233
|
+
for i, ref in enumerate(metadata.references):
|
234
|
+
print(i)
|
233
235
|
if ref.get("id", None) is None:
|
234
236
|
continue
|
235
237
|
citation = etree.SubElement(
|
236
|
-
citation_list, "citation", {"key": ref.get("key",
|
238
|
+
citation_list, "citation", {"key": ref.get("key", f"ref{i + 1}")}
|
237
239
|
)
|
238
240
|
if ref.get("journal_title", None) is not None:
|
239
241
|
etree.SubElement(citation, "journal_article").text = ref.get(
|
@@ -255,6 +257,7 @@ def insert_citation_list(metadata, xml):
|
|
255
257
|
etree.SubElement(citation, "unstructured_citation").text = ref.get(
|
256
258
|
"unstructured"
|
257
259
|
)
|
260
|
+
print(xml)
|
258
261
|
return xml
|
259
262
|
|
260
263
|
|
commonmeta/date_utils.py
CHANGED
commonmeta/doi_utils.py
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
import re
|
4
4
|
from typing import Optional
|
5
|
+
|
6
|
+
import base32_lib as base32
|
5
7
|
import httpx
|
6
8
|
from furl import furl
|
7
|
-
import base32_lib as base32
|
8
9
|
|
9
10
|
from .base_utils import compact
|
10
11
|
|
@@ -71,8 +72,12 @@ def doi_from_url(url: Optional[str]) -> Optional[str]:
|
|
71
72
|
f.path.segments[-1] in ["fetchobject.action"]
|
72
73
|
and f.args.get("uri", None) is not None
|
73
74
|
):
|
74
|
-
|
75
|
-
|
75
|
+
uri = f.args.get("uri")
|
76
|
+
if uri is not None:
|
77
|
+
f.path.segments.clear()
|
78
|
+
f.path.segments.append(uri)
|
79
|
+
|
80
|
+
path = str(f.path).replace("%2F", "/")
|
76
81
|
match = re.search(
|
77
82
|
r"(10\.\d{4,5}/.+)\Z",
|
78
83
|
path,
|
@@ -86,9 +91,12 @@ def short_doi_as_doi(doi: Optional[str]) -> Optional[str]:
|
|
86
91
|
"""Resolve a short DOI"""
|
87
92
|
if doi is None:
|
88
93
|
return None
|
89
|
-
|
94
|
+
doi_url = doi_as_url(doi)
|
95
|
+
if doi_url is None:
|
96
|
+
return None
|
97
|
+
response = httpx.head(doi_url, timeout=10)
|
90
98
|
if response.status_code != 301:
|
91
|
-
return
|
99
|
+
return doi_url
|
92
100
|
return response.headers.get("Location")
|
93
101
|
|
94
102
|
|
@@ -106,7 +114,10 @@ def normalize_doi(doi: Optional[str], **kwargs) -> Optional[str]:
|
|
106
114
|
doi_str = validate_doi(doi)
|
107
115
|
if not doi_str:
|
108
116
|
return None
|
109
|
-
|
117
|
+
resolver = doi_resolver(doi, **kwargs)
|
118
|
+
if resolver is None:
|
119
|
+
return None
|
120
|
+
return resolver + doi_str.lower()
|
110
121
|
|
111
122
|
|
112
123
|
def doi_resolver(doi, **kwargs):
|
@@ -144,13 +155,14 @@ def encode_doi(prefix, number: Optional[int] = None, checksum: bool = True) -> s
|
|
144
155
|
def decode_doi(doi: str, checksum: bool = True) -> int:
|
145
156
|
"""Decode a DOI to a number"""
|
146
157
|
try:
|
147
|
-
|
148
|
-
if
|
158
|
+
validated_doi = validate_doi(doi)
|
159
|
+
if validated_doi is None:
|
149
160
|
return 0
|
150
|
-
suffix =
|
161
|
+
suffix = validated_doi.split("/", maxsplit=1)[1]
|
151
162
|
if checksum:
|
152
163
|
number = base32.decode(suffix, checksum=True)
|
153
|
-
|
164
|
+
else:
|
165
|
+
number = base32.decode(suffix)
|
154
166
|
return number
|
155
167
|
except ValueError:
|
156
168
|
return 0
|
@@ -183,6 +195,9 @@ def crossref_api_query_url(query: dict) -> str:
|
|
183
195
|
rows = min(int(query.get("rows", 20)), 1000)
|
184
196
|
queries = []
|
185
197
|
filters = []
|
198
|
+
_query = None
|
199
|
+
_filter = None
|
200
|
+
|
186
201
|
if query.get("query", None) is not None:
|
187
202
|
queries += [query.get("query")]
|
188
203
|
for key, value in query.items():
|
@@ -193,7 +208,8 @@ def crossref_api_query_url(query: dict) -> str:
|
|
193
208
|
"query.container-title",
|
194
209
|
]:
|
195
210
|
queries += [f"{key}:{value}"]
|
196
|
-
|
211
|
+
if queries:
|
212
|
+
_query = ",".join(queries)
|
197
213
|
|
198
214
|
for key, value in query.items():
|
199
215
|
if key in [
|
@@ -207,8 +223,10 @@ def crossref_api_query_url(query: dict) -> str:
|
|
207
223
|
"has-license",
|
208
224
|
]:
|
209
225
|
filters += [f"{key}:{value}"]
|
210
|
-
|
211
|
-
|
226
|
+
if filters:
|
227
|
+
_filter = ",".join(filters)
|
228
|
+
|
229
|
+
f.args.update(compact({"rows": rows, "query": _query, "filter": _filter}))
|
212
230
|
|
213
231
|
return f.url
|
214
232
|
|
@@ -284,6 +302,16 @@ def datacite_api_sample_url(number: int = 1, **kwargs) -> str:
|
|
284
302
|
return f"https://api.datacite.org/dois?random=true&page[size]={number}"
|
285
303
|
|
286
304
|
|
305
|
+
def openalex_api_url(doi: str, **kwargs) -> str:
|
306
|
+
"""Return the OpenAlex API URL for a given DOI"""
|
307
|
+
return f"https://api.openalex.org/works/{doi}"
|
308
|
+
|
309
|
+
|
310
|
+
def openalex_api_sample_url(number: int = 1, **kwargs) -> str:
|
311
|
+
"""Return the OpenAlex API URL for a sample of dois"""
|
312
|
+
return f"https://api.openalex.org/works?sample={number}"
|
313
|
+
|
314
|
+
|
287
315
|
def is_rogue_scholar_doi(doi: str) -> bool:
|
288
316
|
"""Return True if DOI is from Rogue Scholar"""
|
289
317
|
prefix = validate_prefix(doi)
|
@@ -298,5 +326,5 @@ def is_rogue_scholar_doi(doi: str) -> bool:
|
|
298
326
|
"10.59350",
|
299
327
|
"10.63485",
|
300
328
|
"10.64000",
|
301
|
-
"10.71938",
|
329
|
+
"10.71938", # not managed by Front Matter
|
302
330
|
]
|