commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/bibtex_reader.py +0 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
- commonmeta_py-0.24.dist-info/RECORD +75 -0
- {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
- commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
- commonmeta_py/__init__.py +0 -2
- commonmeta_py-0.22.dist-info/RECORD +0 -5
- {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,390 @@
|
|
1
|
+
"""datacite reader for Commonmeta"""
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
from typing import Optional
|
5
|
+
import httpx
|
6
|
+
from pydash import py_
|
7
|
+
|
8
|
+
from ..utils import (
|
9
|
+
normalize_url,
|
10
|
+
normalize_doi,
|
11
|
+
normalize_cc_url,
|
12
|
+
dict_to_spdx,
|
13
|
+
format_name_identifier,
|
14
|
+
)
|
15
|
+
from ..base_utils import compact, wrap, presence
|
16
|
+
from ..author_utils import get_authors
|
17
|
+
from ..date_utils import normalize_date_dict
|
18
|
+
from ..doi_utils import (
|
19
|
+
doi_as_url,
|
20
|
+
doi_from_url,
|
21
|
+
datacite_api_url,
|
22
|
+
datacite_api_sample_url,
|
23
|
+
)
|
24
|
+
from ..constants import (
|
25
|
+
DC_TO_CM_TRANSLATIONS,
|
26
|
+
DC_TO_CM_CONTAINER_TRANSLATIONS,
|
27
|
+
Commonmeta,
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
def get_datacite(pid: str, **kwargs) -> dict:
|
32
|
+
"""get_datacite"""
|
33
|
+
doi = doi_from_url(pid)
|
34
|
+
if doi is None:
|
35
|
+
return {"state": "not_found"}
|
36
|
+
url = datacite_api_url(doi)
|
37
|
+
try:
|
38
|
+
response = httpx.get(url, timeout=10, **kwargs)
|
39
|
+
if response.status_code != 200:
|
40
|
+
return {"state": "not_found"}
|
41
|
+
return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite"}
|
42
|
+
except httpx.ReadTimeout:
|
43
|
+
return {"state": "timeout"}
|
44
|
+
|
45
|
+
|
46
|
+
def read_datacite(data: dict, **kwargs) -> Commonmeta:
|
47
|
+
"""read_datacite"""
|
48
|
+
meta = data
|
49
|
+
if data is None:
|
50
|
+
return {"state": "not_found"}
|
51
|
+
|
52
|
+
read_options = kwargs or {}
|
53
|
+
|
54
|
+
_id = doi_as_url(meta.get("doi", None))
|
55
|
+
resource__typegeneral = py_.get(meta, "types.resourceTypeGeneral")
|
56
|
+
resource_type = py_.get(meta, "types.resourceType")
|
57
|
+
_type = DC_TO_CM_TRANSLATIONS.get(resource__typegeneral, "Other")
|
58
|
+
additional_type = DC_TO_CM_TRANSLATIONS.get(resource_type, None)
|
59
|
+
# if resource_type is one of the new resource__typegeneral types introduced in schema 4.3, use it
|
60
|
+
if additional_type:
|
61
|
+
_type = additional_type
|
62
|
+
additional_type = None
|
63
|
+
else:
|
64
|
+
additional_type = resource_type
|
65
|
+
titles = get_titles(wrap(meta.get("titles", None)))
|
66
|
+
|
67
|
+
contributors = get_authors(wrap(meta.get("creators", None)))
|
68
|
+
contrib = get_authors(wrap(meta.get("contributors", None)))
|
69
|
+
if contrib:
|
70
|
+
contributors = contributors + contrib
|
71
|
+
|
72
|
+
publisher = meta.get("publisher", None)
|
73
|
+
if isinstance(publisher, str):
|
74
|
+
publisher = {"name": publisher}
|
75
|
+
elif isinstance(publisher, dict):
|
76
|
+
publisher = get_publisher(publisher)
|
77
|
+
date = get_dates(wrap(meta.get("dates", None)), meta.get("publicationYear", None))
|
78
|
+
container = get_container(meta.get("container", None))
|
79
|
+
license_ = meta.get("rightsList", [])
|
80
|
+
if len(license_) > 0:
|
81
|
+
license_ = normalize_cc_url(license_[0].get("rightsUri", None))
|
82
|
+
license_ = dict_to_spdx({"url": license_}) if license_ else None
|
83
|
+
|
84
|
+
files = [get_file(i) for i in wrap(meta.get("content_url"))]
|
85
|
+
|
86
|
+
identifiers = get_identifiers(wrap(meta.get("alternateIdentifiers", None)))
|
87
|
+
identifiers.append(
|
88
|
+
compact(
|
89
|
+
{
|
90
|
+
"identifier": normalize_doi(_id),
|
91
|
+
"identifierType": "DOI",
|
92
|
+
}
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
references = get_references(
|
97
|
+
wrap(meta.get("relatedItems", None) or meta.get("relatedIdentifiers", None))
|
98
|
+
)
|
99
|
+
relations = get_relations(wrap(meta.get("relatedIdentifiers", None)))
|
100
|
+
descriptions = get_descriptions(wrap(meta.get("descriptions", None)))
|
101
|
+
geo_locations = get_geolocation(wrap(meta.get("geoLocations", None)))
|
102
|
+
|
103
|
+
def format_subject(subject):
|
104
|
+
"""format_subject"""
|
105
|
+
return compact(
|
106
|
+
{
|
107
|
+
"subject": subject.get("subject", None),
|
108
|
+
"language": subject.get("lang", None),
|
109
|
+
}
|
110
|
+
)
|
111
|
+
|
112
|
+
subjects = py_.uniq([format_subject(i) for i in wrap(meta.get("subjects", None))])
|
113
|
+
|
114
|
+
return {
|
115
|
+
# required properties
|
116
|
+
"id": _id,
|
117
|
+
"type": _type,
|
118
|
+
# recommended and optional properties
|
119
|
+
"additionalType": additional_type,
|
120
|
+
"container": presence(container),
|
121
|
+
"contributors": presence(contributors),
|
122
|
+
"date": compact(date),
|
123
|
+
"descriptions": presence(descriptions),
|
124
|
+
"files": presence(files),
|
125
|
+
"fundingReferences": presence(meta.get("fundingReferences", None)),
|
126
|
+
"geoLocations": presence(geo_locations),
|
127
|
+
"identifiers": presence(identifiers),
|
128
|
+
"language": meta.get("language", None),
|
129
|
+
"license": presence(license_),
|
130
|
+
"provider": "DataCite",
|
131
|
+
"publisher": publisher,
|
132
|
+
"references": presence(references),
|
133
|
+
"relations": presence(relations),
|
134
|
+
"subjects": presence(subjects),
|
135
|
+
"titles": presence(titles),
|
136
|
+
"url": normalize_url(meta.get("url", None)),
|
137
|
+
"version": meta.get("version", None),
|
138
|
+
} | read_options
|
139
|
+
|
140
|
+
|
141
|
+
def get_identifiers(identifiers: list) -> list:
|
142
|
+
"""get_identifiers"""
|
143
|
+
|
144
|
+
def is_identifier(identifier):
|
145
|
+
"""supported identifier types"""
|
146
|
+
return identifier.get("identifierType", None) in [
|
147
|
+
"ARK",
|
148
|
+
"arXiv",
|
149
|
+
"Bibcode",
|
150
|
+
"DOI",
|
151
|
+
"Handle",
|
152
|
+
"ISBN",
|
153
|
+
"ISSN",
|
154
|
+
"PMID",
|
155
|
+
"PMCID",
|
156
|
+
"PURL",
|
157
|
+
"URL",
|
158
|
+
"URN",
|
159
|
+
"Other",
|
160
|
+
]
|
161
|
+
|
162
|
+
def format_identifier(identifier):
|
163
|
+
"""format_identifier"""
|
164
|
+
if is_identifier(identifier):
|
165
|
+
type_ = identifier.get("identifierType")
|
166
|
+
else:
|
167
|
+
type_ = "Other"
|
168
|
+
|
169
|
+
return compact(
|
170
|
+
{
|
171
|
+
"identifier": identifier.get("alternateIdentifier", None),
|
172
|
+
"identifierType": type_,
|
173
|
+
}
|
174
|
+
)
|
175
|
+
|
176
|
+
return [format_identifier(i) for i in wrap(identifiers)]
|
177
|
+
|
178
|
+
|
179
|
+
def get_references(references: list) -> list:
|
180
|
+
"""get_references"""
|
181
|
+
|
182
|
+
def is_reference(reference):
|
183
|
+
"""is_reference"""
|
184
|
+
return reference.get("relationType", None) in ["Cites", "References"]
|
185
|
+
|
186
|
+
def map_reference(reference, index):
|
187
|
+
"""map_reference"""
|
188
|
+
identifier = reference.get("relatedIdentifier", None)
|
189
|
+
identifier_type = reference.get("relatedIdentifierType", None)
|
190
|
+
if identifier_type == "DOI":
|
191
|
+
id_ = normalize_doi(identifier)
|
192
|
+
elif identifier_type == "URL":
|
193
|
+
id_ = normalize_url(identifier)
|
194
|
+
else:
|
195
|
+
id_ = identifier
|
196
|
+
return compact(
|
197
|
+
{
|
198
|
+
"key": f"ref{index + 1}",
|
199
|
+
"id": id_,
|
200
|
+
}
|
201
|
+
)
|
202
|
+
|
203
|
+
return [
|
204
|
+
map_reference(i, index) for index, i in enumerate(references) if is_reference(i)
|
205
|
+
]
|
206
|
+
|
207
|
+
|
208
|
+
def get_relations(relations: list) -> list:
|
209
|
+
"""get_relations"""
|
210
|
+
|
211
|
+
def is_relation(relation):
|
212
|
+
"""relation"""
|
213
|
+
return relation.get("relationType", None) in [
|
214
|
+
"IsNewVersionOf",
|
215
|
+
"IsPreviousVersionOf",
|
216
|
+
"IsVersionOf",
|
217
|
+
"HasVersion",
|
218
|
+
"IsPartOf",
|
219
|
+
"HasPart",
|
220
|
+
"IsVariantFormOf",
|
221
|
+
"IsOriginalFormOf",
|
222
|
+
"IsIdenticalTo",
|
223
|
+
"IsTranslationOf",
|
224
|
+
"IsReviewedBy",
|
225
|
+
"Reviews",
|
226
|
+
"IsPreprintOf",
|
227
|
+
"HasPreprint",
|
228
|
+
"IsSupplementTo",
|
229
|
+
]
|
230
|
+
|
231
|
+
def map_relation(relation):
|
232
|
+
"""map_relation"""
|
233
|
+
|
234
|
+
identifier = normalize_doi(
|
235
|
+
relation.get("relatedIdentifier", None)
|
236
|
+
) or relation.get("relatedIdentifier", None)
|
237
|
+
relation_type = relation.get("relationType", None)
|
238
|
+
return compact(
|
239
|
+
{
|
240
|
+
"id": identifier,
|
241
|
+
"type": relation_type,
|
242
|
+
}
|
243
|
+
)
|
244
|
+
|
245
|
+
return [map_relation(i) for i in relations if is_relation(i)]
|
246
|
+
|
247
|
+
|
248
|
+
def get_file(file: str) -> dict:
|
249
|
+
"""get_file"""
|
250
|
+
return compact({"url": file})
|
251
|
+
|
252
|
+
|
253
|
+
def get_dates(dates: list, publication_year) -> dict:
|
254
|
+
"""convert date list to dict, rename and/or remove some keys"""
|
255
|
+
date: dict = defaultdict(list)
|
256
|
+
for sub in dates:
|
257
|
+
date[sub.get("dateType", None)] = sub.get("date", None)
|
258
|
+
if date.get("Issued", None) is None and publication_year is not None:
|
259
|
+
date["Issued"] = str(publication_year)
|
260
|
+
return normalize_date_dict(date)
|
261
|
+
|
262
|
+
|
263
|
+
def get_descriptions(descriptions: list) -> list:
|
264
|
+
"""get_descriptions"""
|
265
|
+
|
266
|
+
def map_description(description):
|
267
|
+
"""map_description"""
|
268
|
+
return compact(
|
269
|
+
{
|
270
|
+
"description": description.get("description", None),
|
271
|
+
"type": description.get("descriptionType")
|
272
|
+
if description.get("descriptionType", None)
|
273
|
+
in ["Abstract", "Methods", "TechnicalInfo", "Other"]
|
274
|
+
else "Other",
|
275
|
+
"language": description.get("lang", None),
|
276
|
+
}
|
277
|
+
)
|
278
|
+
|
279
|
+
return [
|
280
|
+
map_description(i)
|
281
|
+
for i in descriptions
|
282
|
+
if i.get("description", None) is not None
|
283
|
+
]
|
284
|
+
|
285
|
+
|
286
|
+
def get_titles(titles: list) -> list:
|
287
|
+
"""get_titles"""
|
288
|
+
|
289
|
+
def map_title(title):
|
290
|
+
"""map_title"""
|
291
|
+
return compact(
|
292
|
+
{
|
293
|
+
"title": title.get("title", None),
|
294
|
+
"type": title.get("titleType")
|
295
|
+
if title.get("titleType", None)
|
296
|
+
in ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
|
297
|
+
else None,
|
298
|
+
"language": title.get("lang", None),
|
299
|
+
}
|
300
|
+
)
|
301
|
+
|
302
|
+
return [map_title(i) for i in titles if i.get("title", None) is not None]
|
303
|
+
|
304
|
+
|
305
|
+
def get_publisher(publisher: dict) -> dict:
|
306
|
+
"""get_publisher"""
|
307
|
+
return compact(
|
308
|
+
{"id": format_name_identifier(publisher), "name": publisher.get("name", None)}
|
309
|
+
)
|
310
|
+
|
311
|
+
|
312
|
+
def get_geolocation(geolocations: list) -> list:
|
313
|
+
"""get_geolocation"""
|
314
|
+
|
315
|
+
def geo_location_point(point: dict):
|
316
|
+
"""geo_location_point, convert lat and long to int"""
|
317
|
+
return {
|
318
|
+
"pointLatitude": float(point.get("pointLatitude"))
|
319
|
+
if point.get("pointLatitude", None)
|
320
|
+
else None,
|
321
|
+
"pointLongitude": float(point.get("pointLongitude"))
|
322
|
+
if point.get("pointLongitude", None)
|
323
|
+
else None,
|
324
|
+
}
|
325
|
+
|
326
|
+
def geo_location_box(box: dict):
|
327
|
+
"""geo_location_box, convert lat and long to int"""
|
328
|
+
return {
|
329
|
+
"eastBoundLongitude": float(box.get("eastBoundLongitude"))
|
330
|
+
if box.get("eastBoundLongitude", None)
|
331
|
+
else None,
|
332
|
+
"northBoundLatitude": float(box.get("northBoundLatitude"))
|
333
|
+
if box.get("northBoundLatitude", None)
|
334
|
+
else None,
|
335
|
+
"southBoundLatitude": float(box.get("southBoundLatitude"))
|
336
|
+
if box.get("southBoundLatitude", None)
|
337
|
+
else None,
|
338
|
+
"westBoundLongitude": float(box.get("westBoundLongitude"))
|
339
|
+
if box.get("westBoundLongitude", None)
|
340
|
+
else None,
|
341
|
+
}
|
342
|
+
|
343
|
+
return [
|
344
|
+
compact(
|
345
|
+
{
|
346
|
+
"geoLocationPoint": geo_location_point(location.get("geoLocationPoint"))
|
347
|
+
if location.get("geoLocationPoint", None)
|
348
|
+
else None,
|
349
|
+
"geoLocationBox": geo_location_box(location.get("geoLocationBox"))
|
350
|
+
if location.get("geoLocationBox", None)
|
351
|
+
else None,
|
352
|
+
"geoLocationPlace": location.get("geoLocationPlace", None),
|
353
|
+
}
|
354
|
+
)
|
355
|
+
for location in geolocations
|
356
|
+
]
|
357
|
+
|
358
|
+
|
359
|
+
def get_container(container: Optional[dict]) -> dict or None:
|
360
|
+
"""get_container"""
|
361
|
+
if container is None:
|
362
|
+
return None
|
363
|
+
_type = (
|
364
|
+
DC_TO_CM_CONTAINER_TRANSLATIONS.get(container.get("type"), None)
|
365
|
+
if container.get("type", None)
|
366
|
+
else None
|
367
|
+
)
|
368
|
+
|
369
|
+
return compact(
|
370
|
+
{
|
371
|
+
"id": container.get("identifier", None),
|
372
|
+
"type": _type,
|
373
|
+
"title": container.get("title", None),
|
374
|
+
}
|
375
|
+
)
|
376
|
+
|
377
|
+
|
378
|
+
def get_random_datacite_id(number: int = 1) -> list:
|
379
|
+
"""Get random DOI from DataCite"""
|
380
|
+
number = 20 if number > 20 else number
|
381
|
+
url = datacite_api_sample_url(number)
|
382
|
+
try:
|
383
|
+
response = httpx.get(url, timeout=60)
|
384
|
+
if response.status_code != 200:
|
385
|
+
return []
|
386
|
+
|
387
|
+
items = py_.get(response.json(), "data")
|
388
|
+
return [i.get("id") for i in items]
|
389
|
+
except httpx.ReadTimeout:
|
390
|
+
return []
|