commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/bibtex_reader.py +0 -0
  13. commonmeta/readers/cff_reader.py +199 -0
  14. commonmeta/readers/codemeta_reader.py +112 -0
  15. commonmeta/readers/commonmeta_reader.py +13 -0
  16. commonmeta/readers/crossref_reader.py +409 -0
  17. commonmeta/readers/crossref_xml_reader.py +505 -0
  18. commonmeta/readers/csl_reader.py +98 -0
  19. commonmeta/readers/datacite_reader.py +390 -0
  20. commonmeta/readers/datacite_xml_reader.py +359 -0
  21. commonmeta/readers/inveniordm_reader.py +218 -0
  22. commonmeta/readers/json_feed_reader.py +420 -0
  23. commonmeta/readers/kbase_reader.py +205 -0
  24. commonmeta/readers/ris_reader.py +103 -0
  25. commonmeta/readers/schema_org_reader.py +506 -0
  26. commonmeta/resources/cff_v1.2.0.json +1827 -0
  27. commonmeta/resources/commonmeta_v0.12.json +601 -0
  28. commonmeta/resources/commonmeta_v0.13.json +559 -0
  29. commonmeta/resources/commonmeta_v0.14.json +573 -0
  30. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  36. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  37. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  38. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  39. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  40. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  41. commonmeta/resources/crossref/fundref.xsd +49 -0
  42. commonmeta/resources/crossref/module-ali.xsd +39 -0
  43. commonmeta/resources/crossref/relations.xsd +444 -0
  44. commonmeta/resources/crossref-v0.2.json +60 -0
  45. commonmeta/resources/csl-data.json +538 -0
  46. commonmeta/resources/datacite-v4.5.json +829 -0
  47. commonmeta/resources/datacite-v4.5pr.json +608 -0
  48. commonmeta/resources/ietf-bcp-47.json +3025 -0
  49. commonmeta/resources/iso-8601.json +3182 -0
  50. commonmeta/resources/spdx/licenses.json +4851 -0
  51. commonmeta/resources/spdx-schema..json +903 -0
  52. commonmeta/resources/styles/apa.csl +1697 -0
  53. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  54. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  55. commonmeta/resources/styles/ieee.csl +468 -0
  56. commonmeta/resources/styles/modern-language-association.csl +341 -0
  57. commonmeta/resources/styles/vancouver.csl +376 -0
  58. commonmeta/schema_utils.py +27 -0
  59. commonmeta/translators.py +47 -0
  60. commonmeta/utils.py +1108 -0
  61. commonmeta/writers/__init__.py +1 -0
  62. commonmeta/writers/bibtex_writer.py +149 -0
  63. commonmeta/writers/citation_writer.py +70 -0
  64. commonmeta/writers/commonmeta_writer.py +68 -0
  65. commonmeta/writers/crossref_xml_writer.py +17 -0
  66. commonmeta/writers/csl_writer.py +79 -0
  67. commonmeta/writers/datacite_writer.py +193 -0
  68. commonmeta/writers/inveniordm_writer.py +94 -0
  69. commonmeta/writers/ris_writer.py +58 -0
  70. commonmeta/writers/schema_org_writer.py +146 -0
  71. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  72. commonmeta_py-0.24.dist-info/RECORD +75 -0
  73. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  74. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  75. commonmeta_py/__init__.py +0 -2
  76. commonmeta_py-0.22.dist-info/RECORD +0 -5
  77. {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,359 @@
1
+ """datacite_xml reader for Commonmeta"""
2
+
3
+ from collections import defaultdict
4
+ import httpx
5
+ from pydash import py_
6
+
7
+ from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
8
+ from ..author_utils import get_authors
9
+ from ..date_utils import strip_milliseconds, normalize_date_dict
10
+ from ..doi_utils import doi_from_url, doi_as_url, datacite_api_url, normalize_doi
11
+ from ..utils import normalize_url, normalize_cc_url, dict_to_spdx
12
+ from ..constants import DC_TO_CM_TRANSLATIONS, Commonmeta
13
+
14
+
15
+ def get_datacite_xml(pid: str, **kwargs) -> dict:
16
+ """get_datacite_xml"""
17
+ doi = doi_from_url(pid)
18
+ if doi is None:
19
+ return {"state": "not_found"}
20
+ url = datacite_api_url(doi)
21
+ response = httpx.get(url, timeout=10, **kwargs)
22
+ if response.status_code != 200:
23
+ return {"state": "not_found"}
24
+ return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite_xml"}
25
+
26
+
27
+ def read_datacite_xml(data: dict, **kwargs) -> Commonmeta:
28
+ """read_datacite_xml"""
29
+ if data is None:
30
+ return {"state": "not_found"}
31
+
32
+ read_options = kwargs or {}
33
+
34
+ meta = data.get("resource", {})
35
+
36
+ doi = parse_attributes(meta.get("identifier", None))
37
+ _id = doi_as_url(doi) if doi else None
38
+
39
+ resource__typegeneral = py_.get(meta, "resourceType.resourceTypeGeneral")
40
+ _type = DC_TO_CM_TRANSLATIONS.get(resource__typegeneral, "Other")
41
+ additional_type = py_.get(meta, "resourceType.#text")
42
+
43
+ identifiers = wrap(py_.get(meta, "alternateIdentifiers.alternateIdentifier"))
44
+ identifiers = get_xml_identifiers(identifiers)
45
+
46
+ def format_title(title):
47
+ """format_title"""
48
+ if isinstance(title, str):
49
+ return {"title": title}
50
+ if isinstance(title, dict):
51
+ return {
52
+ "title": title.get("#text", None),
53
+ "titleType": title.get("titleType", None),
54
+ "lang": title.get("xml:lang", None),
55
+ }
56
+ return None
57
+
58
+ titles = [format_title(i) for i in wrap(py_.get(meta, "titles.title"))]
59
+
60
+ contributors = get_authors(wrap(py_.get(meta, "creators.creator")))
61
+ contrib = get_authors(wrap(meta.get("contributors", None)))
62
+ if contrib:
63
+ contributors = contributors + contrib
64
+ publisher = {"name": py_.get(meta, "publisher")}
65
+ date = get_dates(
66
+ wrap(py_.get(meta, "dates.date")), meta.get("publicationYear", None)
67
+ )
68
+
69
+ def format_description(description):
70
+ """format_description"""
71
+ if isinstance(description, str):
72
+ return {"description": description, "type": "Abstract"}
73
+ if isinstance(description, dict):
74
+ return compact(
75
+ {
76
+ "description": sanitize(description.get("#text", None)),
77
+ "type": description.get("descriptionType", "Abstract"),
78
+ "language": description.get("xml:lang", None),
79
+ }
80
+ )
81
+ return None
82
+
83
+ descriptions = [
84
+ format_description(i) for i in wrap(py_.get(meta, "descriptions.description"))
85
+ ]
86
+
87
+ def format_subject(subject):
88
+ """format_subject"""
89
+ if isinstance(subject, str):
90
+ return {"subject": subject, "subjectScheme": "None"}
91
+ if isinstance(subject, dict):
92
+ return compact(
93
+ {
94
+ "subject": subject.get("#text", None),
95
+ "subjectScheme": subject.get("subjectScheme", None),
96
+ "language": subject.get("xml:lang", None),
97
+ }
98
+ )
99
+ return None
100
+
101
+ subjects = [format_subject(i) for i in wrap(py_.get(meta, "subjects.subject")) if i]
102
+
103
+ def format_geo_location(geo_location):
104
+ """format_geo_location"""
105
+ if isinstance(geo_location, str):
106
+ return {"geoLocationPlace": geo_location}
107
+ if isinstance(geo_location, dict):
108
+ return compact(
109
+ {
110
+ "geoLocationPoint": compact(
111
+ {
112
+ "pointLatitude": compact(
113
+ geo_location.get("geoLocationPoint.pointLatitude", None)
114
+ ),
115
+ "pointLongitude": compact(
116
+ geo_location.get(
117
+ "geoLocationPoint.pointLongitude", None
118
+ )
119
+ ),
120
+ }
121
+ ),
122
+ "geoLocationBox": compact(
123
+ {
124
+ "westBoundLongitude": compact(
125
+ geo_location.get(
126
+ "geoLocationBox.westBoundLongitude", None
127
+ )
128
+ ),
129
+ "eastBoundLongitude": compact(
130
+ geo_location.get(
131
+ "geoLocationBox.eastBoundLongitude", None
132
+ )
133
+ ),
134
+ "southBoundLatitude": compact(
135
+ geo_location.get(
136
+ "geoLocationBox.southBoundLatitude", None
137
+ )
138
+ ),
139
+ "northBoundLatitude": compact(
140
+ geo_location.get(
141
+ "geoLocationBox.northBoundLatitude", None
142
+ )
143
+ ),
144
+ }
145
+ ),
146
+ "geoLocationPolygon": {
147
+ "polygonPoint": compact(
148
+ {
149
+ "pointLatitude": geo_location.get(
150
+ "geoLocationPolygon.polygonPoint.pointLatitude",
151
+ None,
152
+ ),
153
+ "pointLongitude": geo_location.get(
154
+ "geoLocationPolygon.polygonPoint.pointLongitude",
155
+ None,
156
+ ),
157
+ }
158
+ )
159
+ },
160
+ "geoLocationPlace": geo_location.get("geoLocationPlace", None),
161
+ }
162
+ )
163
+ return None
164
+
165
+ geo_locations = [] # [format_geo_location(i) for i in wrap(py_.get(meta, "geoLocations.geoLocation")) if i]
166
+
167
+ def map_rights(rights):
168
+ """map_rights"""
169
+ return compact(
170
+ {
171
+ "rights": rights.get("#text", None),
172
+ "url": rights.get("rightsURI", None),
173
+ "lang": rights.get("xml:lang", None),
174
+ }
175
+ )
176
+
177
+ license_ = wrap(py_.get(meta, "rightsList.rights"))
178
+ if len(license_) > 0:
179
+ license_ = normalize_cc_url(license_[0].get("rightsURI", None))
180
+ license_ = dict_to_spdx({"url": license_}) if license_ else None
181
+
182
+ references = get_xml_references(
183
+ wrap(py_.get(meta, "relatedIdentifiers.relatedIdentifier"))
184
+ )
185
+ relations = get_xml_relations(
186
+ wrap(py_.get(meta, "relatedIdentifiers.relatedIdentifier"))
187
+ )
188
+
189
+ def map_funding_reference(funding_reference):
190
+ """map_funding_reference"""
191
+ return {
192
+ "funderName": funding_reference.get("funderName", None),
193
+ "funderIdentifier": funding_reference.get("funderIdentifier", None),
194
+ "funderIdentifierType": funding_reference.get("funderIdentifierType", None),
195
+ "awardNumber": funding_reference.get("awardNumber", None),
196
+ "awardTitle": funding_reference.get("awardTitle", None),
197
+ }
198
+
199
+ funding_references = [] # [map_funding_reference(i) for i in wrap(py_.get(meta, "fundingReferences.fundingReference"))]
200
+
201
+ files = meta.get("contentUrl", None)
202
+ state = "findable" if _id or read_options else "not_found"
203
+
204
+ return {
205
+ # required properties
206
+ "id": _id,
207
+ "type": _type,
208
+ "doi": doi_from_url(_id),
209
+ "url": normalize_url(meta.get("url", None)),
210
+ "contributors": presence(contributors),
211
+ "titles": compact(titles),
212
+ "publisher": publisher,
213
+ "date": date,
214
+ # recommended and optional properties
215
+ "additionalType": presence(additional_type),
216
+ "subjects": presence(subjects),
217
+ "language": meta.get("language", None),
218
+ "identifiers": identifiers,
219
+ "version": meta.get("version", None),
220
+ "license": presence(license_),
221
+ "descriptions": presence(descriptions),
222
+ "geoLocations": presence(geo_locations),
223
+ "fundingReferences": presence(funding_references),
224
+ "references": presence(references),
225
+ "relations": presence(relations),
226
+ # other properties
227
+ "date_created": strip_milliseconds(meta.get("created", None)),
228
+ "date_registered": strip_milliseconds(meta.get("registered", None)),
229
+ "date_published": strip_milliseconds(meta.get("published", None)),
230
+ "date_updated": strip_milliseconds(meta.get("updated", None)),
231
+ "files": presence(files),
232
+ "container": presence(meta.get("container", None)),
233
+ "provider": "DataCite",
234
+ "state": state,
235
+ "schema_version": meta.get("xmlns", None),
236
+ } | read_options
237
+
238
+
239
+ def get_xml_identifiers(identifiers: list) -> list:
240
+ """get_identifiers"""
241
+
242
+ def is_identifier(identifier):
243
+ """supported identifier types"""
244
+ return identifier.get("alternateIdentifierType", None) in [
245
+ "ARK",
246
+ "arXiv",
247
+ "Bibcode",
248
+ "DOI",
249
+ "Handle",
250
+ "ISBN",
251
+ "ISSN",
252
+ "PMID",
253
+ "PMCID",
254
+ "PURL",
255
+ "URL",
256
+ "URN",
257
+ "Other",
258
+ ]
259
+
260
+ def format_identifier(identifier):
261
+ """format_identifier"""
262
+
263
+ if is_identifier(identifier):
264
+ type_ = identifier.get("alternateIdentifierType")
265
+ else:
266
+ type_ = "Other"
267
+
268
+ return compact(
269
+ {
270
+ "identifier": identifier.get("#text", None),
271
+ "identifierType": type_,
272
+ }
273
+ )
274
+
275
+ return [format_identifier(i) for i in identifiers]
276
+
277
+
278
+ def get_xml_references(references: list) -> list:
279
+ """get_xml_references"""
280
+
281
+ def is_reference(reference):
282
+ """is_reference"""
283
+ return reference.get("relationType", None) in [
284
+ "Cites",
285
+ "References",
286
+ ] and reference.get("relatedIdentifierType", None) in ["DOI", "URL"]
287
+
288
+ def map_reference(reference):
289
+ """map_reference"""
290
+ identifier = reference.get("relatedIdentifier", None)
291
+ identifier_type = reference.get("relatedIdentifierType", None)
292
+ if identifier and identifier_type == "DOI":
293
+ reference["doi"] = normalize_doi(identifier)
294
+ elif identifier and identifier_type == "URL":
295
+ reference["url"] = normalize_url(identifier)
296
+ reference = py_.omit(
297
+ reference,
298
+ [
299
+ "relationType",
300
+ "relatedIdentifier",
301
+ "relatedIdentifierType",
302
+ "resourceTypeGeneral",
303
+ "schemeType",
304
+ "schemeUri",
305
+ "relatedMetadataScheme",
306
+ ],
307
+ )
308
+ return reference
309
+
310
+ return [map_reference(i) for i in references if is_reference(i)]
311
+
312
+
313
+ def get_xml_relations(relations: list) -> list:
314
+ """get_xml_relations"""
315
+
316
+ def is_relation(relation):
317
+ """is_relation"""
318
+ return relation.get("relationType", None) in [
319
+ "IsNewVersionOf",
320
+ "IsPreviousVersionOf",
321
+ "IsVersionOf",
322
+ "HasVersion",
323
+ "IsPartOf",
324
+ "HasPart",
325
+ "IsVariantFormOf",
326
+ "IsOriginalFormOf",
327
+ "IsIdenticalTo",
328
+ "IsTranslationOf",
329
+ "IsReviewedBy",
330
+ "Reviews",
331
+ "IsPreprintOf",
332
+ "HasPreprint",
333
+ "IsSupplementTo",
334
+ ]
335
+
336
+ def map_relation(relation):
337
+ """map_relation"""
338
+ identifier = relation.get("relatedIdentifier", None)
339
+ identifier_type = relation.get("relatedIdentifierType", None)
340
+ if identifier and identifier_type == "DOI":
341
+ relation["doi"] = normalize_doi(identifier)
342
+ elif identifier and identifier_type == "URL":
343
+ relation["url"] = normalize_url(identifier)
344
+ return {
345
+ "id": identifier,
346
+ "type": identifier_type,
347
+ }
348
+
349
+ return [map_relation(i) for i in relations if is_relation(i)]
350
+
351
+
352
+ def get_dates(dates: list, publication_year) -> dict:
353
+ """convert date list to dict, rename and/or remove some keys"""
354
+ date: dict = defaultdict(list)
355
+ for sub in dates:
356
+ date[sub.get("dateType", None)] = sub.get("#text", None)
357
+ if date.get("Issued", None) is None and publication_year is not None:
358
+ date["Issued"] = str(publication_year)
359
+ return normalize_date_dict(date)
@@ -0,0 +1,218 @@
1
+ """InvenioRDM reader for Commonmeta"""
2
+ import httpx
3
+ from pydash import py_
4
+ from furl import furl
5
+
6
+ from ..utils import (
7
+ normalize_url,
8
+ normalize_doi,
9
+ dict_to_spdx,
10
+ name_to_fos,
11
+ from_inveniordm,
12
+ get_language,
13
+ )
14
+ from ..base_utils import compact, wrap, presence, sanitize
15
+ from ..author_utils import get_authors
16
+ from ..date_utils import strip_milliseconds
17
+ from ..doi_utils import doi_as_url, doi_from_url
18
+ from ..constants import (
19
+ INVENIORDM_TO_CM_TRANSLATIONS,
20
+ COMMONMETA_RELATION_TYPES,
21
+ Commonmeta,
22
+ )
23
+
24
+
25
+ def get_inveniordm(pid: str, **kwargs) -> dict:
26
+ """get_inveniordm"""
27
+ if pid is None:
28
+ return {"state": "not_found"}
29
+ url = normalize_url(pid)
30
+ response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
31
+ if response.status_code != 200:
32
+ return {"state": "not_found"}
33
+ return response.json()
34
+
35
+
36
+ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
37
+ """read_inveniordm"""
38
+ print(data)
39
+ meta = data
40
+ read_options = kwargs or {}
41
+
42
+ url = normalize_url(py_.get(meta, "links.self_html"))
43
+ _id = doi_as_url(meta.get("doi", None)) or url
44
+ resource_type = py_.get(meta, "metadata.resource_type.type") or py_.get(meta, "metadata.resource_type.id")
45
+ resource_type = resource_type.split("-")[0]
46
+ _type = INVENIORDM_TO_CM_TRANSLATIONS.get(resource_type, "Other")
47
+
48
+ contributors = py_.get(meta, "metadata.creators")
49
+ print(contributors)
50
+
51
+ contributors = get_authors(
52
+ from_inveniordm(wrap(contributors)),
53
+ )
54
+ publisher = {"name": meta.get("publisher", None) or py_.get(meta, "metadata.publisher") or "Zenodo"}
55
+
56
+ title = py_.get(meta, "metadata.title")
57
+ titles = [{"title": sanitize(title)}] if title else None
58
+ additional_titles = py_.get(meta, "metadata.additional_titles")
59
+ # if additional_titles:
60
+ # titles += [{"title": sanitize("bla")} for i in wrap(additional_titles)]
61
+
62
+ date: dict = {}
63
+ date["published"] = py_.get(meta, ("metadata.publication_date"))
64
+ if date["published"]:
65
+ date["published"] = date["published"].split("/")[0]
66
+ date["updated"] = strip_milliseconds(meta.get("updated", None))
67
+ f = furl(url)
68
+ if f.host == "zenodo.org":
69
+ container = compact(
70
+ {
71
+ "id": "https://www.re3data.org/repository/r3d100010468",
72
+ "type": "DataRepository" if _type == "Dataset" else "Repository",
73
+ "title": "Zenodo",
74
+ }
75
+ )
76
+ elif f.host in ["rogue-scholar.org", "beta.rogue-scholar.org", "demo.front-matter.io"]:
77
+ container = compact(
78
+ {
79
+ "type": "Repository",
80
+ "title": "Rogue Scholar",
81
+ }
82
+ )
83
+ else:
84
+ container = None
85
+ license_ = py_.get(meta, "metadata.license.id")
86
+ if license_:
87
+ license_ = dict_to_spdx({"id": license_})
88
+
89
+ descriptions = format_descriptions(
90
+ [
91
+ py_.get(meta, "metadata.description"),
92
+ py_.get(meta, "metadata.notes"),
93
+ ]
94
+ )
95
+ language = py_.get(meta, "metadata.language") or py_.get(meta, "metadata.languages[0].id")
96
+ subjects = [name_to_fos(i) for i in wrap(py_.get(meta, "metadata.keywords"))]
97
+
98
+ references = get_references(wrap(py_.get(meta, "metadata.related_identifiers")))
99
+ relations = get_relations(wrap(py_.get(meta, "metadata.related_identifiers")))
100
+ if meta.get("conceptdoi", None):
101
+ relations.append(
102
+ {
103
+ "id": doi_as_url(meta.get("conceptdoi")),
104
+ "type": "IsVersionOf",
105
+ }
106
+ )
107
+ files = [get_file(i) for i in wrap(meta.get("files"))]
108
+
109
+ return {
110
+ # required properties
111
+ "id": _id,
112
+ "type": _type,
113
+ "doi": doi_from_url(_id),
114
+ "url": url,
115
+ "contributors": presence(contributors),
116
+ "titles": titles,
117
+ "publisher": publisher,
118
+ "date": compact(date),
119
+ # recommended and optional properties
120
+ # "additional_type": additional_type,
121
+ "subjects": presence(subjects),
122
+ "language": get_language(language),
123
+ "version": py_.get(meta, "metadata.version"),
124
+ "license": presence(license_),
125
+ "descriptions": descriptions,
126
+ "geoLocations": None,
127
+ # "funding_references": presence(meta.get("fundingReferences", None)),
128
+ "references": presence(references),
129
+ "relations": presence(relations),
130
+ # other properties
131
+ "files": files,
132
+ "container": container,
133
+ "provider": "DataCite",
134
+ } | read_options
135
+
136
+
137
+ def get_references(references: list) -> list:
138
+ """get_references"""
139
+
140
+ def is_reference(reference):
141
+ """is_reference"""
142
+ return reference.get("relationType", None) in ["Cites", "References"]
143
+
144
+ def map_reference(reference):
145
+ """map_reference"""
146
+ identifier = reference.get("relatedIdentifier", None)
147
+ identifier_type = reference.get("relatedIdentifierType", None)
148
+ if identifier and identifier_type == "DOI":
149
+ reference["id"] = normalize_doi(identifier)
150
+ elif identifier and identifier_type == "URL":
151
+ reference["id"] = normalize_url(identifier)
152
+ reference = py_.omit(
153
+ reference,
154
+ [
155
+ "relationType",
156
+ "relatedIdentifier",
157
+ "relatedIdentifierType",
158
+ "resourceTypeGeneral",
159
+ "schemeType",
160
+ "schemeUri",
161
+ "relatedMetadataScheme",
162
+ ],
163
+ )
164
+ return reference
165
+
166
+ return [map_reference(i) for i in references if is_reference(i)]
167
+
168
+
169
+ def get_file(file: dict) -> str:
170
+ """get_file"""
171
+ _type = file.get("type", None)
172
+ return compact(
173
+ {
174
+ "bucket": file.get("bucket", None),
175
+ "key": file.get("key", None),
176
+ "checksum": file.get("checksum", None),
177
+ "url": py_.get(file, "links.self"),
178
+ "size": file.get("size", None),
179
+ "mimeType": "application/" + _type if _type else None,
180
+ }
181
+ )
182
+
183
+
184
+ def get_relations(relations: list) -> list:
185
+ """get_relations"""
186
+
187
+ def map_relation(relation: dict) -> dict:
188
+ """map_relation"""
189
+ identifier = relation.get("identifier", None)
190
+ scheme = relation.get("scheme", None)
191
+ relation_type = relation.get("relation", None) or relation.get("relation_type", None)
192
+ if scheme == "doi":
193
+ identifier = doi_as_url(identifier)
194
+ else:
195
+ identifier = normalize_url(identifier)
196
+ return {
197
+ "id": identifier,
198
+ "type": py_.capitalize(relation_type, False) if relation_type else None,
199
+ }
200
+
201
+ identifiers = [map_relation(i) for i in relations]
202
+ return [
203
+ i
204
+ for i in identifiers
205
+ if py_.upper_first(i["type"]) in COMMONMETA_RELATION_TYPES
206
+ ]
207
+
208
+
209
+ def format_descriptions(descriptions: list) -> list:
210
+ """format_descriptions"""
211
+ return [
212
+ {
213
+ "description": sanitize(i),
214
+ "type": "Abstract" if index == 0 else "Other",
215
+ }
216
+ for index, i in enumerate(descriptions)
217
+ if i
218
+ ]