commonmeta-py 0.23__py3-none-any.whl → 0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
- commonmeta_py-0.24.dist-info/RECORD +75 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
- commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
- commonmeta_py-0.23.dist-info/RECORD +0 -5
- /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,359 @@
|
|
1
|
+
"""datacite_xml reader for Commonmeta"""
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
import httpx
|
5
|
+
from pydash import py_
|
6
|
+
|
7
|
+
from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
|
8
|
+
from ..author_utils import get_authors
|
9
|
+
from ..date_utils import strip_milliseconds, normalize_date_dict
|
10
|
+
from ..doi_utils import doi_from_url, doi_as_url, datacite_api_url, normalize_doi
|
11
|
+
from ..utils import normalize_url, normalize_cc_url, dict_to_spdx
|
12
|
+
from ..constants import DC_TO_CM_TRANSLATIONS, Commonmeta
|
13
|
+
|
14
|
+
|
15
|
+
def get_datacite_xml(pid: str, **kwargs) -> dict:
|
16
|
+
"""get_datacite_xml"""
|
17
|
+
doi = doi_from_url(pid)
|
18
|
+
if doi is None:
|
19
|
+
return {"state": "not_found"}
|
20
|
+
url = datacite_api_url(doi)
|
21
|
+
response = httpx.get(url, timeout=10, **kwargs)
|
22
|
+
if response.status_code != 200:
|
23
|
+
return {"state": "not_found"}
|
24
|
+
return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite_xml"}
|
25
|
+
|
26
|
+
|
27
|
+
def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
|
28
|
+
"""read_datacite_xml"""
|
29
|
+
if data is None:
|
30
|
+
return {"state": "not_found"}
|
31
|
+
|
32
|
+
read_options = kwargs or {}
|
33
|
+
|
34
|
+
meta = data.get("resource", {})
|
35
|
+
|
36
|
+
doi = parse_attributes(meta.get("identifier", None))
|
37
|
+
_id = doi_as_url(doi) if doi else None
|
38
|
+
|
39
|
+
resource__typegeneral = py_.get(meta, "resourceType.resourceTypeGeneral")
|
40
|
+
_type = DC_TO_CM_TRANSLATIONS.get(resource__typegeneral, "Other")
|
41
|
+
additional_type = py_.get(meta, "resourceType.#text")
|
42
|
+
|
43
|
+
identifiers = wrap(py_.get(meta, "alternateIdentifiers.alternateIdentifier"))
|
44
|
+
identifiers = get_xml_identifiers(identifiers)
|
45
|
+
|
46
|
+
def format_title(title):
|
47
|
+
"""format_title"""
|
48
|
+
if isinstance(title, str):
|
49
|
+
return {"title": title}
|
50
|
+
if isinstance(title, dict):
|
51
|
+
return {
|
52
|
+
"title": title.get("#text", None),
|
53
|
+
"titleType": title.get("titleType", None),
|
54
|
+
"lang": title.get("xml:lang", None),
|
55
|
+
}
|
56
|
+
return None
|
57
|
+
|
58
|
+
titles = [format_title(i) for i in wrap(py_.get(meta, "titles.title"))]
|
59
|
+
|
60
|
+
contributors = get_authors(wrap(py_.get(meta, "creators.creator")))
|
61
|
+
contrib = get_authors(wrap(meta.get("contributors", None)))
|
62
|
+
if contrib:
|
63
|
+
contributors = contributors + contrib
|
64
|
+
publisher = {"name": py_.get(meta, "publisher")}
|
65
|
+
date = get_dates(
|
66
|
+
wrap(py_.get(meta, "dates.date")), meta.get("publicationYear", None)
|
67
|
+
)
|
68
|
+
|
69
|
+
def format_description(description):
|
70
|
+
"""format_description"""
|
71
|
+
if isinstance(description, str):
|
72
|
+
return {"description": description, "type": "Abstract"}
|
73
|
+
if isinstance(description, dict):
|
74
|
+
return compact(
|
75
|
+
{
|
76
|
+
"description": sanitize(description.get("#text", None)),
|
77
|
+
"type": description.get("descriptionType", "Abstract"),
|
78
|
+
"language": description.get("xml:lang", None),
|
79
|
+
}
|
80
|
+
)
|
81
|
+
return None
|
82
|
+
|
83
|
+
descriptions = [
|
84
|
+
format_description(i) for i in wrap(py_.get(meta, "descriptions.description"))
|
85
|
+
]
|
86
|
+
|
87
|
+
def format_subject(subject):
|
88
|
+
"""format_subject"""
|
89
|
+
if isinstance(subject, str):
|
90
|
+
return {"subject": subject, "subjectScheme": "None"}
|
91
|
+
if isinstance(subject, dict):
|
92
|
+
return compact(
|
93
|
+
{
|
94
|
+
"subject": subject.get("#text", None),
|
95
|
+
"subjectScheme": subject.get("subjectScheme", None),
|
96
|
+
"language": subject.get("xml:lang", None),
|
97
|
+
}
|
98
|
+
)
|
99
|
+
return None
|
100
|
+
|
101
|
+
subjects = [format_subject(i) for i in wrap(py_.get(meta, "subjects.subject")) if i]
|
102
|
+
|
103
|
+
def format_geo_location(geo_location):
|
104
|
+
"""format_geo_location"""
|
105
|
+
if isinstance(geo_location, str):
|
106
|
+
return {"geoLocationPlace": geo_location}
|
107
|
+
if isinstance(geo_location, dict):
|
108
|
+
return compact(
|
109
|
+
{
|
110
|
+
"geoLocationPoint": compact(
|
111
|
+
{
|
112
|
+
"pointLatitude": compact(
|
113
|
+
geo_location.get("geoLocationPoint.pointLatitude", None)
|
114
|
+
),
|
115
|
+
"pointLongitude": compact(
|
116
|
+
geo_location.get(
|
117
|
+
"geoLocationPoint.pointLongitude", None
|
118
|
+
)
|
119
|
+
),
|
120
|
+
}
|
121
|
+
),
|
122
|
+
"geoLocationBox": compact(
|
123
|
+
{
|
124
|
+
"westBoundLongitude": compact(
|
125
|
+
geo_location.get(
|
126
|
+
"geoLocationBox.westBoundLongitude", None
|
127
|
+
)
|
128
|
+
),
|
129
|
+
"eastBoundLongitude": compact(
|
130
|
+
geo_location.get(
|
131
|
+
"geoLocationBox.eastBoundLongitude", None
|
132
|
+
)
|
133
|
+
),
|
134
|
+
"southBoundLatitude": compact(
|
135
|
+
geo_location.get(
|
136
|
+
"geoLocationBox.southBoundLatitude", None
|
137
|
+
)
|
138
|
+
),
|
139
|
+
"northBoundLatitude": compact(
|
140
|
+
geo_location.get(
|
141
|
+
"geoLocationBox.northBoundLatitude", None
|
142
|
+
)
|
143
|
+
),
|
144
|
+
}
|
145
|
+
),
|
146
|
+
"geoLocationPolygon": {
|
147
|
+
"polygonPoint": compact(
|
148
|
+
{
|
149
|
+
"pointLatitude": geo_location.get(
|
150
|
+
"geoLocationPolygon.polygonPoint.pointLatitude",
|
151
|
+
None,
|
152
|
+
),
|
153
|
+
"pointLongitude": geo_location.get(
|
154
|
+
"geoLocationPolygon.polygonPoint.pointLongitude",
|
155
|
+
None,
|
156
|
+
),
|
157
|
+
}
|
158
|
+
)
|
159
|
+
},
|
160
|
+
"geoLocationPlace": geo_location.get("geoLocationPlace", None),
|
161
|
+
}
|
162
|
+
)
|
163
|
+
return None
|
164
|
+
|
165
|
+
geo_locations = [] # [format_geo_location(i) for i in wrap(py_.get(meta, "geoLocations.geoLocation")) if i]
|
166
|
+
|
167
|
+
def map_rights(rights):
|
168
|
+
"""map_rights"""
|
169
|
+
return compact(
|
170
|
+
{
|
171
|
+
"rights": rights.get("#text", None),
|
172
|
+
"url": rights.get("rightsURI", None),
|
173
|
+
"lang": rights.get("xml:lang", None),
|
174
|
+
}
|
175
|
+
)
|
176
|
+
|
177
|
+
license_ = wrap(py_.get(meta, "rightsList.rights"))
|
178
|
+
if len(license_) > 0:
|
179
|
+
license_ = normalize_cc_url(license_[0].get("rightsURI", None))
|
180
|
+
license_ = dict_to_spdx({"url": license_}) if license_ else None
|
181
|
+
|
182
|
+
references = get_xml_references(
|
183
|
+
wrap(py_.get(meta, "relatedIdentifiers.relatedIdentifier"))
|
184
|
+
)
|
185
|
+
relations = get_xml_relations(
|
186
|
+
wrap(py_.get(meta, "relatedIdentifiers.relatedIdentifier"))
|
187
|
+
)
|
188
|
+
|
189
|
+
def map_funding_reference(funding_reference):
|
190
|
+
"""map_funding_reference"""
|
191
|
+
return {
|
192
|
+
"funderName": funding_reference.get("funderName", None),
|
193
|
+
"funderIdentifier": funding_reference.get("funderIdentifier", None),
|
194
|
+
"funderIdentifierType": funding_reference.get("funderIdentifierType", None),
|
195
|
+
"awardNumber": funding_reference.get("awardNumber", None),
|
196
|
+
"awardTitle": funding_reference.get("awardTitle", None),
|
197
|
+
}
|
198
|
+
|
199
|
+
funding_references = [] # [map_funding_reference(i) for i in wrap(py_.get(meta, "fundingReferences.fundingReference"))]
|
200
|
+
|
201
|
+
files = meta.get("contentUrl", None)
|
202
|
+
state = "findable" if _id or read_options else "not_found"
|
203
|
+
|
204
|
+
return {
|
205
|
+
# required properties
|
206
|
+
"id": _id,
|
207
|
+
"type": _type,
|
208
|
+
"doi": doi_from_url(_id),
|
209
|
+
"url": normalize_url(meta.get("url", None)),
|
210
|
+
"contributors": presence(contributors),
|
211
|
+
"titles": compact(titles),
|
212
|
+
"publisher": publisher,
|
213
|
+
"date": date,
|
214
|
+
# recommended and optional properties
|
215
|
+
"additionalType": presence(additional_type),
|
216
|
+
"subjects": presence(subjects),
|
217
|
+
"language": meta.get("language", None),
|
218
|
+
"identifiers": identifiers,
|
219
|
+
"version": meta.get("version", None),
|
220
|
+
"license": presence(license_),
|
221
|
+
"descriptions": presence(descriptions),
|
222
|
+
"geoLocations": presence(geo_locations),
|
223
|
+
"fundingReferences": presence(funding_references),
|
224
|
+
"references": presence(references),
|
225
|
+
"relations": presence(relations),
|
226
|
+
# other properties
|
227
|
+
"date_created": strip_milliseconds(meta.get("created", None)),
|
228
|
+
"date_registered": strip_milliseconds(meta.get("registered", None)),
|
229
|
+
"date_published": strip_milliseconds(meta.get("published", None)),
|
230
|
+
"date_updated": strip_milliseconds(meta.get("updated", None)),
|
231
|
+
"files": presence(files),
|
232
|
+
"container": presence(meta.get("container", None)),
|
233
|
+
"provider": "DataCite",
|
234
|
+
"state": state,
|
235
|
+
"schema_version": meta.get("xmlns", None),
|
236
|
+
} | read_options
|
237
|
+
|
238
|
+
|
239
|
+
def get_xml_identifiers(identifiers: list) -> list:
|
240
|
+
"""get_identifiers"""
|
241
|
+
|
242
|
+
def is_identifier(identifier):
|
243
|
+
"""supported identifier types"""
|
244
|
+
return identifier.get("alternateIdentifierType", None) in [
|
245
|
+
"ARK",
|
246
|
+
"arXiv",
|
247
|
+
"Bibcode",
|
248
|
+
"DOI",
|
249
|
+
"Handle",
|
250
|
+
"ISBN",
|
251
|
+
"ISSN",
|
252
|
+
"PMID",
|
253
|
+
"PMCID",
|
254
|
+
"PURL",
|
255
|
+
"URL",
|
256
|
+
"URN",
|
257
|
+
"Other",
|
258
|
+
]
|
259
|
+
|
260
|
+
def format_identifier(identifier):
|
261
|
+
"""format_identifier"""
|
262
|
+
|
263
|
+
if is_identifier(identifier):
|
264
|
+
type_ = identifier.get("alternateIdentifierType")
|
265
|
+
else:
|
266
|
+
type_ = "Other"
|
267
|
+
|
268
|
+
return compact(
|
269
|
+
{
|
270
|
+
"identifier": identifier.get("#text", None),
|
271
|
+
"identifierType": type_,
|
272
|
+
}
|
273
|
+
)
|
274
|
+
|
275
|
+
return [format_identifier(i) for i in identifiers]
|
276
|
+
|
277
|
+
|
278
|
+
def get_xml_references(references: list) -> list:
|
279
|
+
"""get_xml_references"""
|
280
|
+
|
281
|
+
def is_reference(reference):
|
282
|
+
"""is_reference"""
|
283
|
+
return reference.get("relationType", None) in [
|
284
|
+
"Cites",
|
285
|
+
"References",
|
286
|
+
] and reference.get("relatedIdentifierType", None) in ["DOI", "URL"]
|
287
|
+
|
288
|
+
def map_reference(reference):
|
289
|
+
"""map_reference"""
|
290
|
+
identifier = reference.get("relatedIdentifier", None)
|
291
|
+
identifier_type = reference.get("relatedIdentifierType", None)
|
292
|
+
if identifier and identifier_type == "DOI":
|
293
|
+
reference["doi"] = normalize_doi(identifier)
|
294
|
+
elif identifier and identifier_type == "URL":
|
295
|
+
reference["url"] = normalize_url(identifier)
|
296
|
+
reference = py_.omit(
|
297
|
+
reference,
|
298
|
+
[
|
299
|
+
"relationType",
|
300
|
+
"relatedIdentifier",
|
301
|
+
"relatedIdentifierType",
|
302
|
+
"resourceTypeGeneral",
|
303
|
+
"schemeType",
|
304
|
+
"schemeUri",
|
305
|
+
"relatedMetadataScheme",
|
306
|
+
],
|
307
|
+
)
|
308
|
+
return reference
|
309
|
+
|
310
|
+
return [map_reference(i) for i in references if is_reference(i)]
|
311
|
+
|
312
|
+
|
313
|
+
def get_xml_relations(relations: list) -> list:
|
314
|
+
"""get_xml_relations"""
|
315
|
+
|
316
|
+
def is_relation(relation):
|
317
|
+
"""is_relation"""
|
318
|
+
return relation.get("relationType", None) in [
|
319
|
+
"IsNewVersionOf",
|
320
|
+
"IsPreviousVersionOf",
|
321
|
+
"IsVersionOf",
|
322
|
+
"HasVersion",
|
323
|
+
"IsPartOf",
|
324
|
+
"HasPart",
|
325
|
+
"IsVariantFormOf",
|
326
|
+
"IsOriginalFormOf",
|
327
|
+
"IsIdenticalTo",
|
328
|
+
"IsTranslationOf",
|
329
|
+
"IsReviewedBy",
|
330
|
+
"Reviews",
|
331
|
+
"IsPreprintOf",
|
332
|
+
"HasPreprint",
|
333
|
+
"IsSupplementTo",
|
334
|
+
]
|
335
|
+
|
336
|
+
def map_relation(relation):
|
337
|
+
"""map_relation"""
|
338
|
+
identifier = relation.get("relatedIdentifier", None)
|
339
|
+
identifier_type = relation.get("relatedIdentifierType", None)
|
340
|
+
if identifier and identifier_type == "DOI":
|
341
|
+
relation["doi"] = normalize_doi(identifier)
|
342
|
+
elif identifier and identifier_type == "URL":
|
343
|
+
relation["url"] = normalize_url(identifier)
|
344
|
+
return {
|
345
|
+
"id": identifier,
|
346
|
+
"type": identifier_type,
|
347
|
+
}
|
348
|
+
|
349
|
+
return [map_relation(i) for i in relations if is_relation(i)]
|
350
|
+
|
351
|
+
|
352
|
+
def get_dates(dates: list, publication_year) -> dict:
|
353
|
+
"""convert date list to dict, rename and/or remove some keys"""
|
354
|
+
date: dict = defaultdict(list)
|
355
|
+
for sub in dates:
|
356
|
+
date[sub.get("dateType", None)] = sub.get("#text", None)
|
357
|
+
if date.get("Issued", None) is None and publication_year is not None:
|
358
|
+
date["Issued"] = str(publication_year)
|
359
|
+
return normalize_date_dict(date)
|
@@ -0,0 +1,218 @@
|
|
1
|
+
"""InvenioRDM reader for Commonmeta"""
|
2
|
+
import httpx
|
3
|
+
from pydash import py_
|
4
|
+
from furl import furl
|
5
|
+
|
6
|
+
from ..utils import (
|
7
|
+
normalize_url,
|
8
|
+
normalize_doi,
|
9
|
+
dict_to_spdx,
|
10
|
+
name_to_fos,
|
11
|
+
from_inveniordm,
|
12
|
+
get_language,
|
13
|
+
)
|
14
|
+
from ..base_utils import compact, wrap, presence, sanitize
|
15
|
+
from ..author_utils import get_authors
|
16
|
+
from ..date_utils import strip_milliseconds
|
17
|
+
from ..doi_utils import doi_as_url, doi_from_url
|
18
|
+
from ..constants import (
|
19
|
+
INVENIORDM_TO_CM_TRANSLATIONS,
|
20
|
+
COMMONMETA_RELATION_TYPES,
|
21
|
+
Commonmeta,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def get_inveniordm(pid: str, **kwargs) -> dict:
|
26
|
+
"""get_inveniordm"""
|
27
|
+
if pid is None:
|
28
|
+
return {"state": "not_found"}
|
29
|
+
url = normalize_url(pid)
|
30
|
+
response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
|
31
|
+
if response.status_code != 200:
|
32
|
+
return {"state": "not_found"}
|
33
|
+
return response.json()
|
34
|
+
|
35
|
+
|
36
|
+
def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
|
37
|
+
"""read_inveniordm"""
|
38
|
+
print(data)
|
39
|
+
meta = data
|
40
|
+
read_options = kwargs or {}
|
41
|
+
|
42
|
+
url = normalize_url(py_.get(meta, "links.self_html"))
|
43
|
+
_id = doi_as_url(meta.get("doi", None)) or url
|
44
|
+
resource_type = py_.get(meta, "metadata.resource_type.type") or py_.get(meta, "metadata.resource_type.id")
|
45
|
+
resource_type = resource_type.split("-")[0]
|
46
|
+
_type = INVENIORDM_TO_CM_TRANSLATIONS.get(resource_type, "Other")
|
47
|
+
|
48
|
+
contributors = py_.get(meta, "metadata.creators")
|
49
|
+
print(contributors)
|
50
|
+
|
51
|
+
contributors = get_authors(
|
52
|
+
from_inveniordm(wrap(contributors)),
|
53
|
+
)
|
54
|
+
publisher = {"name": meta.get("publisher", None) or py_.get(meta, "metadata.publisher") or "Zenodo"}
|
55
|
+
|
56
|
+
title = py_.get(meta, "metadata.title")
|
57
|
+
titles = [{"title": sanitize(title)}] if title else None
|
58
|
+
additional_titles = py_.get(meta, "metadata.additional_titles")
|
59
|
+
# if additional_titles:
|
60
|
+
# titles += [{"title": sanitize("bla")} for i in wrap(additional_titles)]
|
61
|
+
|
62
|
+
date: dict = {}
|
63
|
+
date["published"] = py_.get(meta, ("metadata.publication_date"))
|
64
|
+
if date["published"]:
|
65
|
+
date["published"] = date["published"].split("/")[0]
|
66
|
+
date["updated"] = strip_milliseconds(meta.get("updated", None))
|
67
|
+
f = furl(url)
|
68
|
+
if f.host == "zenodo.org":
|
69
|
+
container = compact(
|
70
|
+
{
|
71
|
+
"id": "https://www.re3data.org/repository/r3d100010468",
|
72
|
+
"type": "DataRepository" if _type == "Dataset" else "Repository",
|
73
|
+
"title": "Zenodo",
|
74
|
+
}
|
75
|
+
)
|
76
|
+
elif f.host in ["rogue-scholar.org", "beta.rogue-scholar.org", "demo.front-matter.io"]:
|
77
|
+
container = compact(
|
78
|
+
{
|
79
|
+
"type": "Repository",
|
80
|
+
"title": "Rogue Scholar",
|
81
|
+
}
|
82
|
+
)
|
83
|
+
else:
|
84
|
+
container = None
|
85
|
+
license_ = py_.get(meta, "metadata.license.id")
|
86
|
+
if license_:
|
87
|
+
license_ = dict_to_spdx({"id": license_})
|
88
|
+
|
89
|
+
descriptions = format_descriptions(
|
90
|
+
[
|
91
|
+
py_.get(meta, "metadata.description"),
|
92
|
+
py_.get(meta, "metadata.notes"),
|
93
|
+
]
|
94
|
+
)
|
95
|
+
language = py_.get(meta, "metadata.language") or py_.get(meta, "metadata.languages[0].id")
|
96
|
+
subjects = [name_to_fos(i) for i in wrap(py_.get(meta, "metadata.keywords"))]
|
97
|
+
|
98
|
+
references = get_references(wrap(py_.get(meta, "metadata.related_identifiers")))
|
99
|
+
relations = get_relations(wrap(py_.get(meta, "metadata.related_identifiers")))
|
100
|
+
if meta.get("conceptdoi", None):
|
101
|
+
relations.append(
|
102
|
+
{
|
103
|
+
"id": doi_as_url(meta.get("conceptdoi")),
|
104
|
+
"type": "IsVersionOf",
|
105
|
+
}
|
106
|
+
)
|
107
|
+
files = [get_file(i) for i in wrap(meta.get("files"))]
|
108
|
+
|
109
|
+
return {
|
110
|
+
# required properties
|
111
|
+
"id": _id,
|
112
|
+
"type": _type,
|
113
|
+
"doi": doi_from_url(_id),
|
114
|
+
"url": url,
|
115
|
+
"contributors": presence(contributors),
|
116
|
+
"titles": titles,
|
117
|
+
"publisher": publisher,
|
118
|
+
"date": compact(date),
|
119
|
+
# recommended and optional properties
|
120
|
+
# "additional_type": additional_type,
|
121
|
+
"subjects": presence(subjects),
|
122
|
+
"language": get_language(language),
|
123
|
+
"version": py_.get(meta, "metadata.version"),
|
124
|
+
"license": presence(license_),
|
125
|
+
"descriptions": descriptions,
|
126
|
+
"geoLocations": None,
|
127
|
+
# "funding_references": presence(meta.get("fundingReferences", None)),
|
128
|
+
"references": presence(references),
|
129
|
+
"relations": presence(relations),
|
130
|
+
# other properties
|
131
|
+
"files": files,
|
132
|
+
"container": container,
|
133
|
+
"provider": "DataCite",
|
134
|
+
} | read_options
|
135
|
+
|
136
|
+
|
137
|
+
def get_references(references: list) -> list:
|
138
|
+
"""get_references"""
|
139
|
+
|
140
|
+
def is_reference(reference):
|
141
|
+
"""is_reference"""
|
142
|
+
return reference.get("relationType", None) in ["Cites", "References"]
|
143
|
+
|
144
|
+
def map_reference(reference):
|
145
|
+
"""map_reference"""
|
146
|
+
identifier = reference.get("relatedIdentifier", None)
|
147
|
+
identifier_type = reference.get("relatedIdentifierType", None)
|
148
|
+
if identifier and identifier_type == "DOI":
|
149
|
+
reference["id"] = normalize_doi(identifier)
|
150
|
+
elif identifier and identifier_type == "URL":
|
151
|
+
reference["id"] = normalize_url(identifier)
|
152
|
+
reference = py_.omit(
|
153
|
+
reference,
|
154
|
+
[
|
155
|
+
"relationType",
|
156
|
+
"relatedIdentifier",
|
157
|
+
"relatedIdentifierType",
|
158
|
+
"resourceTypeGeneral",
|
159
|
+
"schemeType",
|
160
|
+
"schemeUri",
|
161
|
+
"relatedMetadataScheme",
|
162
|
+
],
|
163
|
+
)
|
164
|
+
return reference
|
165
|
+
|
166
|
+
return [map_reference(i) for i in references if is_reference(i)]
|
167
|
+
|
168
|
+
|
169
|
+
def get_file(file: dict) -> str:
|
170
|
+
"""get_file"""
|
171
|
+
_type = file.get("type", None)
|
172
|
+
return compact(
|
173
|
+
{
|
174
|
+
"bucket": file.get("bucket", None),
|
175
|
+
"key": file.get("key", None),
|
176
|
+
"checksum": file.get("checksum", None),
|
177
|
+
"url": py_.get(file, "links.self"),
|
178
|
+
"size": file.get("size", None),
|
179
|
+
"mimeType": "application/" + _type if _type else None,
|
180
|
+
}
|
181
|
+
)
|
182
|
+
|
183
|
+
|
184
|
+
def get_relations(relations: list) -> list:
|
185
|
+
"""get_relations"""
|
186
|
+
|
187
|
+
def map_relation(relation: dict) -> dict:
|
188
|
+
"""map_relation"""
|
189
|
+
identifier = relation.get("identifier", None)
|
190
|
+
scheme = relation.get("scheme", None)
|
191
|
+
relation_type = relation.get("relation", None) or relation.get("relation_type", None)
|
192
|
+
if scheme == "doi":
|
193
|
+
identifier = doi_as_url(identifier)
|
194
|
+
else:
|
195
|
+
identifier = normalize_url(identifier)
|
196
|
+
return {
|
197
|
+
"id": identifier,
|
198
|
+
"type": py_.capitalize(relation_type, False) if relation_type else None,
|
199
|
+
}
|
200
|
+
|
201
|
+
identifiers = [map_relation(i) for i in relations]
|
202
|
+
return [
|
203
|
+
i
|
204
|
+
for i in identifiers
|
205
|
+
if py_.upper_first(i["type"]) in COMMONMETA_RELATION_TYPES
|
206
|
+
]
|
207
|
+
|
208
|
+
|
209
|
+
def format_descriptions(descriptions: list) -> list:
|
210
|
+
"""format_descriptions"""
|
211
|
+
return [
|
212
|
+
{
|
213
|
+
"description": sanitize(i),
|
214
|
+
"type": "Abstract" if index == 0 else "Other",
|
215
|
+
}
|
216
|
+
for index, i in enumerate(descriptions)
|
217
|
+
if i
|
218
|
+
]
|