commonmeta-py 0.23__py3-none-any.whl → 0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/cff_reader.py +199 -0
  13. commonmeta/readers/codemeta_reader.py +112 -0
  14. commonmeta/readers/commonmeta_reader.py +13 -0
  15. commonmeta/readers/crossref_reader.py +409 -0
  16. commonmeta/readers/crossref_xml_reader.py +505 -0
  17. commonmeta/readers/csl_reader.py +98 -0
  18. commonmeta/readers/datacite_reader.py +390 -0
  19. commonmeta/readers/datacite_xml_reader.py +359 -0
  20. commonmeta/readers/inveniordm_reader.py +218 -0
  21. commonmeta/readers/json_feed_reader.py +420 -0
  22. commonmeta/readers/kbase_reader.py +205 -0
  23. commonmeta/readers/ris_reader.py +103 -0
  24. commonmeta/readers/schema_org_reader.py +506 -0
  25. commonmeta/resources/cff_v1.2.0.json +1827 -0
  26. commonmeta/resources/commonmeta_v0.12.json +601 -0
  27. commonmeta/resources/commonmeta_v0.13.json +559 -0
  28. commonmeta/resources/commonmeta_v0.14.json +573 -0
  29. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  30. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  36. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  37. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  38. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  39. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  40. commonmeta/resources/crossref/fundref.xsd +49 -0
  41. commonmeta/resources/crossref/module-ali.xsd +39 -0
  42. commonmeta/resources/crossref/relations.xsd +444 -0
  43. commonmeta/resources/crossref-v0.2.json +60 -0
  44. commonmeta/resources/csl-data.json +538 -0
  45. commonmeta/resources/datacite-v4.5.json +829 -0
  46. commonmeta/resources/datacite-v4.5pr.json +608 -0
  47. commonmeta/resources/ietf-bcp-47.json +3025 -0
  48. commonmeta/resources/iso-8601.json +3182 -0
  49. commonmeta/resources/spdx/licenses.json +4851 -0
  50. commonmeta/resources/spdx-schema..json +903 -0
  51. commonmeta/resources/styles/apa.csl +1697 -0
  52. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  53. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  54. commonmeta/resources/styles/ieee.csl +468 -0
  55. commonmeta/resources/styles/modern-language-association.csl +341 -0
  56. commonmeta/resources/styles/vancouver.csl +376 -0
  57. commonmeta/schema_utils.py +27 -0
  58. commonmeta/translators.py +47 -0
  59. commonmeta/utils.py +1108 -0
  60. commonmeta/writers/__init__.py +1 -0
  61. commonmeta/writers/bibtex_writer.py +149 -0
  62. commonmeta/writers/citation_writer.py +70 -0
  63. commonmeta/writers/commonmeta_writer.py +68 -0
  64. commonmeta/writers/crossref_xml_writer.py +17 -0
  65. commonmeta/writers/csl_writer.py +79 -0
  66. commonmeta/writers/datacite_writer.py +193 -0
  67. commonmeta/writers/inveniordm_writer.py +94 -0
  68. commonmeta/writers/ris_writer.py +58 -0
  69. commonmeta/writers/schema_org_writer.py +146 -0
  70. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/METADATA +56 -45
  71. commonmeta_py-0.25.dist-info/RECORD +75 -0
  72. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/WHEEL +1 -1
  73. commonmeta_py-0.25.dist-info/entry_points.txt +3 -0
  74. commonmeta_py-0.23.dist-info/RECORD +0 -5
  75. /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
  76. {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.25.dist-info}/LICENSE +0 -0
@@ -0,0 +1,420 @@
1
+ """JSON Feed reader for commonmeta-py"""
2
+
3
+ from typing import Optional
4
+ import httpx
5
+ from pydash import py_
6
+ from furl import furl
7
+
8
+ from ..utils import (
9
+ compact,
10
+ normalize_url,
11
+ from_json_feed,
12
+ wrap,
13
+ dict_to_spdx,
14
+ name_to_fos,
15
+ validate_url,
16
+ validate_ror,
17
+ encode_doi,
18
+ issn_as_url,
19
+ )
20
+ from ..author_utils import get_authors
21
+ from ..base_utils import presence, sanitize, parse_attributes
22
+ from ..date_utils import get_date_from_unix_timestamp
23
+ from ..doi_utils import (
24
+ normalize_doi,
25
+ validate_prefix,
26
+ validate_doi,
27
+ doi_from_url,
28
+ is_rogue_scholar_doi,
29
+ )
30
+ from ..constants import Commonmeta
31
+
32
+
33
+ def get_json_feed_item(pid: str, **kwargs) -> dict:
34
+ """get_json_feed_item"""
35
+ if pid is None:
36
+ return {"state": "not_found"}
37
+ url = normalize_url(pid)
38
+ response = httpx.get(url, timeout=10, follow_redirects=True, **kwargs)
39
+ if response.status_code != 200:
40
+ return {"state": "not_found"}
41
+ return response.json() | {"via": "json_feed_item"}
42
+
43
+
44
+ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
45
+ """read_json_feed_item"""
46
+ if data is None:
47
+ return {"state": "not_found"}
48
+ meta = data
49
+ read_options = kwargs or {}
50
+
51
+ url = normalize_url(meta.get("url", None))
52
+ _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None)) or url
53
+ _type = "Article"
54
+
55
+ # optionally generate a DOI if missing but a DOI prefix is provided
56
+ prefix = read_options.get("prefix", None) or py_.get(meta, "blog.prefix", None)
57
+ if doi_from_url(_id) is None and prefix is not None:
58
+ _id = encode_doi(prefix)
59
+
60
+ if meta.get("authors", None):
61
+ contributors = get_authors(from_json_feed(wrap(meta.get("authors"))))
62
+ else:
63
+ contributors = None
64
+
65
+ title = parse_attributes(meta.get("title", None))
66
+ titles = [{"title": sanitize(title)}] if title else None
67
+
68
+ publisher = py_.get(meta, "blog.title", None)
69
+ if publisher is not None:
70
+ publisher = {"name": publisher}
71
+
72
+ date: dict = {}
73
+ date["published"] = (
74
+ get_date_from_unix_timestamp(meta.get("published_at", None))
75
+ if meta.get("published_at", None)
76
+ else None
77
+ )
78
+ date["updated"] = (
79
+ get_date_from_unix_timestamp(meta.get("updated_at", None))
80
+ if meta.get("updated_at", None)
81
+ else None
82
+ )
83
+
84
+ license_ = py_.get(meta, "blog.license", None)
85
+ if license_ is not None:
86
+ license_ = dict_to_spdx({"url": license_})
87
+ issn = py_.get(meta, "blog.issn", None)
88
+ blog_url = (
89
+ f"https://rogue-scholar.org/blogs/{meta.get('blog_slug')}"
90
+ if meta.get("blog_slug", None)
91
+ else None
92
+ )
93
+ container = compact(
94
+ {
95
+ "type": "Periodical",
96
+ "title": py_.get(meta, "blog.title", None),
97
+ "identifier": issn or blog_url,
98
+ "identifierType": "ISSN" if issn else "URL",
99
+ }
100
+ )
101
+
102
+ description = meta.get("summary", None)
103
+ if description is not None:
104
+ descriptions = [{"description": sanitize(description), "type": "Abstract"}]
105
+ else:
106
+ descriptions = None
107
+ category = py_.get(meta, "blog.category", None)
108
+ if category is not None:
109
+ subjects = [name_to_fos(py_.human_case(category))]
110
+ else:
111
+ subjects = None
112
+ references = get_references(wrap(meta.get("reference", None)))
113
+ funding_references = get_funding_references(meta)
114
+ relations = get_relations(wrap(meta.get("relationships", None)))
115
+ if issn is not None:
116
+ relations.append(
117
+ {
118
+ "id": issn_as_url(issn),
119
+ "type": "IsPartOf",
120
+ }
121
+ )
122
+ identifiers = [{"identifier": meta.get("id"), "identifierType": "UUID"}]
123
+ files = get_files(_id)
124
+ state = "findable" if meta or read_options else "not_found"
125
+
126
+ return {
127
+ # required properties
128
+ "id": _id,
129
+ "type": _type,
130
+ "url": url,
131
+ "contributors": presence(contributors),
132
+ "titles": presence(titles),
133
+ "publisher": publisher,
134
+ "date": compact(date),
135
+ # recommended and optional properties
136
+ "additional_type": None,
137
+ "subjects": presence(subjects),
138
+ "language": meta.get("language", None),
139
+ "identifiers": identifiers,
140
+ "version": None,
141
+ "license": license_,
142
+ "descriptions": descriptions,
143
+ "geoLocations": None,
144
+ "fundingReferences": presence(funding_references),
145
+ "references": presence(references),
146
+ "relations": presence(relations),
147
+ "files": files,
148
+ # other properties
149
+ "container": presence(container),
150
+ "provider": "Crossref" if is_rogue_scholar_doi(_id) else None,
151
+ "state": state,
152
+ "schema_version": None,
153
+ } | read_options
154
+
155
+
156
+ def get_references(references: list) -> list:
157
+ """get json feed references."""
158
+
159
+ def get_reference(reference: dict) -> Optional[dict]:
160
+ if reference is None or not isinstance(reference, dict):
161
+ return None
162
+ try:
163
+ if reference.get("doi", None) and validate_doi(reference.get("doi")):
164
+ id_ = normalize_doi(reference.get("doi"))
165
+ return compact(
166
+ {
167
+ "id": id_,
168
+ "title": reference.get("title", None),
169
+ "publicationYear": reference.get("publicationYear", None),
170
+ }
171
+ )
172
+
173
+ elif (
174
+ reference.get("url", None)
175
+ and validate_url(reference.get("url")) == "URL"
176
+ ):
177
+ response = httpx.head(reference.get("url", None), timeout=10)
178
+ # check that URL resolves.
179
+ # TODO: check for redirects
180
+ if response.status_code in [404]:
181
+ return None
182
+ return {
183
+ "id": reference.get("url"),
184
+ }
185
+ except Exception as error:
186
+ print(error)
187
+ return None
188
+
189
+ def number_reference(reference: dict, index: int) -> dict:
190
+ """number reference"""
191
+ reference["key"] = f"ref{index +1}"
192
+ return reference
193
+
194
+ references = [get_reference(i) for i in references]
195
+ return [
196
+ number_reference(i, index)
197
+ for index, i in enumerate(references)
198
+ if i is not None
199
+ ]
200
+
201
+
202
+ def get_funding_references(meta: Optional[dict]) -> Optional[list]:
203
+ """get json feed funding references.
204
+ Check that relationships resolve and have type "HasAward" or
205
+ funding is provided by blog metadata"""
206
+
207
+ if meta is None or not isinstance(meta, dict):
208
+ return None
209
+
210
+ def format_funding(urls: list) -> list:
211
+ """format funding. URLs can either be a list of grant IDs or a funder identifier
212
+ (Open Funder Registry ID or ROR), followed by a grant URL"""
213
+ # Prefix 10.3030 means grant ID from funder is European Commission.
214
+ # CORDIS is the grants portal of the European Commission.
215
+ if len(urls) == 1 and (
216
+ validate_prefix(urls[0]) == "10.3030"
217
+ or furl(urls[0]).host == "cordis.europa.eu"
218
+ ):
219
+ return [
220
+ {
221
+ "funderName": "European Commission",
222
+ "funderIdentifier": "https://doi.org/10.13039/501100000780",
223
+ "funderIdentifierType": "Crossref Funder ID",
224
+ "award_uri": urls[0],
225
+ "awardNumber": urls[0].split("/")[-1],
226
+ }
227
+ ]
228
+ # Prefix 10.13039 means funder ID from Open Funder registry.
229
+ elif len(urls) == 2 and validate_prefix(urls[0]) == "10.13039":
230
+ if urls[0] == "https://doi.org/10.13039/100000001":
231
+ funder_name = "National Science Foundation"
232
+ else:
233
+ funder_name = None
234
+ f = furl(urls[1])
235
+ # url is for NSF grant
236
+ if f.args["awd_id"] is not None:
237
+ award_number = f.args["awd_id"]
238
+ else:
239
+ award_number = f.path.segments[-1]
240
+ return [
241
+ {
242
+ "funderName": funder_name,
243
+ "funderIdentifier": urls[0],
244
+ "funderIdentifierType": "Crossref Funder ID",
245
+ "award_uri": urls[1],
246
+ "awardNumber": award_number,
247
+ }
248
+ ]
249
+ # URL is ROR ID for funder. Need to transform to Crossref Funder ID
250
+ # until Crossref production service supports ROR IDs.
251
+ elif len(urls) == 2 and validate_ror(urls[0]):
252
+ f = furl(urls[0])
253
+ _id = f.path.segments[-1]
254
+ response = httpx.get(f"https://api.ror.org/organizations/{_id}", timeout=10)
255
+ ror = response.json()
256
+ funder_name = ror.get("name", None)
257
+ funder_identifier = py_.get(ror, "external_ids.FUNDREF.all.0")
258
+ if funder_identifier is not None:
259
+ funder_identifier = f"https://doi.org/{funder_identifier}"
260
+ funder_identifier_type = "Crossref Funder ID"
261
+ else:
262
+ funder_identifier = urls[0]
263
+ funder_identifier_type = "ROR"
264
+ f = furl(urls[1])
265
+ # url is for NSF grant
266
+ if f.args["awd_id"] is not None:
267
+ award_number = f.args["awd_id"]
268
+ else:
269
+ award_number = f.path.segments[-1]
270
+ return [
271
+ compact(
272
+ {
273
+ "funderName": funder_name,
274
+ "funderIdentifier": funder_identifier,
275
+ "funderIdentifierType": funder_identifier_type,
276
+ "award_uri": urls[1],
277
+ "awardNumber": award_number,
278
+ }
279
+ )
280
+ ]
281
+
282
+ awards = py_.flatten(
283
+ [
284
+ format_funding(i.get("urls"))
285
+ for i in wrap(meta.get("relationships", None))
286
+ if i.get("type", None) == "HasAward"
287
+ ]
288
+ )
289
+ funding = py_.get(meta, "blog.funding", None)
290
+ if funding is not None:
291
+ awards += [
292
+ {
293
+ "funderName": funding.get("funder_name", None),
294
+ "funderIdentifier": funding.get("funder_id", None),
295
+ "funderIdentifierType": "Crossref Funder ID",
296
+ "awardTitle": funding.get("award", None),
297
+ "awardNumber": funding.get("award_number", None),
298
+ }
299
+ ]
300
+ return awards
301
+
302
+
303
+ def get_relations(relations: Optional[list]) -> Optional[list]:
304
+ """get json feed related relations.
305
+ Check that relations resolve and have a supported type"""
306
+ supported_types = [
307
+ "IsNewVersionOf",
308
+ "IsPreviousVersionOf",
309
+ "IsVersionOf",
310
+ "HasVersion",
311
+ "IsPartOf",
312
+ "HasPart",
313
+ "IsVariantFormOf",
314
+ "IsOriginalFormOf",
315
+ "IsIdenticalTo",
316
+ "IsTranslationOf",
317
+ "IsReviewedBy",
318
+ "Reviews",
319
+ "IsPreprintOf",
320
+ "HasPreprint",
321
+ "IsSupplementTo",
322
+ ]
323
+
324
+ def format_relationship(relation: dict) -> dict:
325
+ """format relationship"""
326
+ _id = relation.get("url", None) or relation.get("urls", None)
327
+ if isinstance(_id, list):
328
+ relations = []
329
+ for url in _id:
330
+ relations.append({"id": url, "type": relation.get("type", None)})
331
+ return relations
332
+ return {
333
+ "id": _id,
334
+ "type": relation.get("type", None),
335
+ }
336
+
337
+ return py_.flatten(
338
+ [
339
+ format_relationship(i)
340
+ for i in relations
341
+ if i.get("type", None) in supported_types
342
+ ]
343
+ )
344
+
345
+
346
+ def get_files(pid: str) -> Optional[list]:
347
+ """get json feed file links"""
348
+ doi = doi_from_url(pid)
349
+ if not is_rogue_scholar_doi(doi):
350
+ return None
351
+ return [
352
+ {
353
+ "mimeType": "text/markdown",
354
+ "url": f"https://api.rogue-scholar.org/posts/{doi}.md",
355
+ },
356
+ {
357
+ "mimeType": "application/pdf",
358
+ "url": f"https://api.rogue-scholar.org/posts/{doi}.pdf",
359
+ },
360
+ {
361
+ "mimeType": "application/epub+zip",
362
+ "url": f"https://api.rogue-scholar.org/posts/{doi}.epub",
363
+ },
364
+ {
365
+ "mimeType": "application/xml",
366
+ "url": f"https://api.rogue-scholar.org/posts/{doi}.xml",
367
+ },
368
+ ]
369
+
370
+
371
+ def get_json_feed_item_uuid(id: str):
372
+ """get JSON Feed item by uuid"""
373
+ if id is None:
374
+ return None
375
+ url = f"https://api.rogue-scholar.org/posts/{id}"
376
+ response = httpx.get(url, timeout=10)
377
+ if response.status_code != 200:
378
+ return response.json()
379
+ post = response.json()
380
+ return py_.pick(
381
+ post,
382
+ [
383
+ "id",
384
+ "guid",
385
+ "url",
386
+ "doi",
387
+ "title",
388
+ "blog.slug",
389
+ "blog.issn",
390
+ "blog.prefix",
391
+ "blog.status",
392
+ "published_at",
393
+ "updated_at",
394
+ "indexed_at",
395
+ ],
396
+ )
397
+
398
+
399
+ def get_json_feed_blog_slug(id: str):
400
+ """get JSON Feed item by id and return blog slug"""
401
+ if id is None:
402
+ return None
403
+ url = f"https://api.rogue-scholar.org/posts/{id}"
404
+ response = httpx.get(url, timeout=10)
405
+ if response.status_code != 200:
406
+ return response.json()
407
+ post = response.json()
408
+ return py_.get(post, "blog.slug", None)
409
+
410
+
411
+ def get_json_feed_blog_slug(id: str):
412
+ """get JSON Feed item by id and return blog slug"""
413
+ if id is None:
414
+ return None
415
+ url = f"https://api.rogue-scholar.org/posts/#{id}"
416
+ response = httpx.get(url, timeout=10)
417
+ if response.status_code != 200:
418
+ return None
419
+ post = response.json()
420
+ return py_.get(post, "blog.slug", None)
@@ -0,0 +1,205 @@
1
+ """kbase reader for Commonmeta"""
2
+ from pydash import py_
3
+
4
+ from ..utils import normalize_url, normalize_doi, from_curie, from_kbase
5
+ from ..base_utils import compact, wrap, presence, sanitize
6
+ from ..author_utils import get_authors
7
+ from ..date_utils import normalize_date_dict
8
+ from ..doi_utils import doi_from_url, validate_doi
9
+ from ..constants import (
10
+ COMMONMETA_RELATION_TYPES,
11
+ Commonmeta,
12
+ )
13
+
14
+
15
+ def read_kbase(data: dict, **kwargs) -> Commonmeta:
16
+ """read_kbase"""
17
+ meta = data.get("credit_metadata", {})
18
+ read_options = kwargs or {}
19
+
20
+ _id = from_curie(meta.get("identifier", None))
21
+ _type = "Dataset"
22
+ contributors = get_authors(from_kbase(wrap(meta.get("contributors", None))))
23
+
24
+ publisher = meta.get("publisher", None)
25
+ if publisher is not None:
26
+ publisher = {
27
+ "id": from_curie(publisher.get("organization_id", None)),
28
+ "name": publisher.get("organization_name", None),
29
+ }
30
+ titles = [format_title(i) for i in wrap(meta.get("titles", None))]
31
+
32
+ date: dict = {}
33
+
34
+ # convert date list to dict
35
+ for sub in wrap(meta.get("dates", None)):
36
+ data_type = sub.get("event", None)
37
+ date[data_type.capitalize() if data_type else None] = sub.get("date", None)
38
+ date = normalize_date_dict(date)
39
+
40
+ container = compact(
41
+ {
42
+ "id": "https://www.re3data.org/repository/r3d100012864",
43
+ "type": "DataRepository",
44
+ "title": "KBase",
45
+ }
46
+ )
47
+ license_ = meta.get("license", None)
48
+ if license_:
49
+ license_ = license_[0]
50
+ descriptions = meta.get("descriptions", None)
51
+ for des in wrap(descriptions):
52
+ des["description"] = sanitize(des["description_text"])
53
+ des["type"] = (
54
+ des["description_type"]
55
+ if des["description_type"] in ["Abstract", "Description", "Summary"]
56
+ else None
57
+ )
58
+ py_.omit(des, ["description_text", "description_type"])
59
+ language = meta.get("language", None)
60
+
61
+ # subjects = [name_to_fos(i) for i in wrap(py_.get(meta, "metadata.keywords"))]
62
+
63
+ version = meta.get("version", None)
64
+ references = get_references(wrap(meta.get("related_identifiers")))
65
+ relations = get_relations(wrap(meta.get("related_identifiers")))
66
+ funding_references = get_funding_references(wrap(meta.get("funding", None)))
67
+ files = [get_file(i) for i in wrap(meta.get("content_url"))]
68
+
69
+ state = "findable" if meta or read_options else "not_found"
70
+
71
+ return {
72
+ # required properties
73
+ "id": _id,
74
+ "type": _type,
75
+ "doi": doi_from_url(_id),
76
+ "url": normalize_url(meta.get("url", None)),
77
+ "contributors": presence(contributors),
78
+ "titles": titles,
79
+ "publisher": publisher,
80
+ "date": compact(date),
81
+ # recommended and optional properties
82
+ "additional_type": None,
83
+ "subjects": None,
84
+ "language": language,
85
+ "identifiers": None,
86
+ "version": py_.get(meta, "metadata.version"),
87
+ "license": presence(license_),
88
+ "descriptions": descriptions,
89
+ "geo_locations": None,
90
+ "fundingReferences": presence(funding_references),
91
+ "references": presence(references),
92
+ "relations": presence(relations),
93
+ # other properties
94
+ "files": presence(files),
95
+ "container": container,
96
+ "provider": "DataCite",
97
+ } | read_options
98
+
99
+
100
+ def format_title(title: dict) -> dict:
101
+ """format_title"""
102
+ _type = title.get("title_type", None)
103
+ return compact(
104
+ {
105
+ "title": title.get("title", None),
106
+ "type": _type
107
+ if _type in ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
108
+ else None,
109
+ }
110
+ )
111
+
112
+
113
+ def get_references(references: list) -> list:
114
+ """get_references"""
115
+
116
+ def is_reference(reference):
117
+ """is_reference"""
118
+ return reference.get("relationship_type", None) in [
119
+ "DataCite:Cites",
120
+ "DataCite:References",
121
+ "DataCite:IsSupplementedBy",
122
+ ]
123
+
124
+ def map_reference(reference):
125
+ """map_reference"""
126
+ identifier = from_curie(reference.get("id", None))
127
+ identifier_type = "DOI" if validate_doi(identifier) else "URL"
128
+ if identifier and identifier_type == "DOI":
129
+ reference["doi"] = normalize_doi(identifier)
130
+ elif identifier and identifier_type == "URL":
131
+ reference["url"] = normalize_url(identifier)
132
+ reference = py_.omit(
133
+ reference,
134
+ [
135
+ "id",
136
+ "relationship_type",
137
+ ],
138
+ )
139
+ return reference
140
+
141
+ return [map_reference(i) for i in references if is_reference(i)]
142
+
143
+
144
+ def get_file(file: str) -> dict:
145
+ """get_file"""
146
+ return compact({"url": file})
147
+
148
+
149
+ def get_relations(relations: list) -> list:
150
+ """get_relations"""
151
+
152
+ def map_relation(relation: dict) -> dict:
153
+ """map_relation"""
154
+ identifier = from_curie(relation.get("id", None))
155
+ _type = relation.get("relationship_type", None)
156
+ # remove DataCite: and Crossref: prefixes
157
+ _type = _type.split(":")[1] if _type else None
158
+ if normalize_url(identifier):
159
+ identifier = normalize_url(identifier)
160
+ # TODO: resolvable url for other identifier types
161
+ else:
162
+ identifier = None
163
+ return {
164
+ "id": identifier,
165
+ "type": _type,
166
+ }
167
+
168
+ identifiers = [map_relation(i) for i in relations]
169
+ return [i for i in identifiers if i["type"] in COMMONMETA_RELATION_TYPES]
170
+
171
+
172
+ def get_funding_references(funding_references: list) -> list:
173
+ """get_funding_references"""
174
+
175
+ def map_funding_reference(funding_reference: dict) -> dict:
176
+ """map_funding_reference"""
177
+ funder_identifier = py_.get(funding_reference, "funder.organization_id", None)
178
+ funder_identifier_type = (
179
+ funder_identifier.split(":")[0] if funder_identifier else None
180
+ )
181
+ return compact(
182
+ {
183
+ "funderIdentifier": from_curie(funder_identifier),
184
+ "funderIdentifierType": funder_identifier_type,
185
+ "funderName": py_.get(
186
+ funding_reference, "funder.organization_name", None
187
+ ),
188
+ "awardNumber": funding_reference.get("grant_id", None),
189
+ "award_uri": funding_reference.get("grant_url", None),
190
+ }
191
+ )
192
+
193
+ return [map_funding_reference(i) for i in funding_references]
194
+
195
+
196
+ def format_descriptions(descriptions: list) -> list:
197
+ """format_descriptions"""
198
+ return [
199
+ {
200
+ "description": sanitize(i),
201
+ "type": "Abstract" if index == 0 else "Other",
202
+ }
203
+ for index, i in enumerate(descriptions)
204
+ if i
205
+ ]