commonmeta-py 0.23__py3-none-any.whl → 0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/cff_reader.py +199 -0
  13. commonmeta/readers/codemeta_reader.py +112 -0
  14. commonmeta/readers/commonmeta_reader.py +13 -0
  15. commonmeta/readers/crossref_reader.py +409 -0
  16. commonmeta/readers/crossref_xml_reader.py +505 -0
  17. commonmeta/readers/csl_reader.py +98 -0
  18. commonmeta/readers/datacite_reader.py +390 -0
  19. commonmeta/readers/datacite_xml_reader.py +359 -0
  20. commonmeta/readers/inveniordm_reader.py +218 -0
  21. commonmeta/readers/json_feed_reader.py +420 -0
  22. commonmeta/readers/kbase_reader.py +205 -0
  23. commonmeta/readers/ris_reader.py +103 -0
  24. commonmeta/readers/schema_org_reader.py +506 -0
  25. commonmeta/resources/cff_v1.2.0.json +1827 -0
  26. commonmeta/resources/commonmeta_v0.12.json +601 -0
  27. commonmeta/resources/commonmeta_v0.13.json +559 -0
  28. commonmeta/resources/commonmeta_v0.14.json +573 -0
  29. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  30. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  36. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  37. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  38. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  39. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  40. commonmeta/resources/crossref/fundref.xsd +49 -0
  41. commonmeta/resources/crossref/module-ali.xsd +39 -0
  42. commonmeta/resources/crossref/relations.xsd +444 -0
  43. commonmeta/resources/crossref-v0.2.json +60 -0
  44. commonmeta/resources/csl-data.json +538 -0
  45. commonmeta/resources/datacite-v4.5.json +829 -0
  46. commonmeta/resources/datacite-v4.5pr.json +608 -0
  47. commonmeta/resources/ietf-bcp-47.json +3025 -0
  48. commonmeta/resources/iso-8601.json +3182 -0
  49. commonmeta/resources/spdx/licenses.json +4851 -0
  50. commonmeta/resources/spdx-schema..json +903 -0
  51. commonmeta/resources/styles/apa.csl +1697 -0
  52. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  53. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  54. commonmeta/resources/styles/ieee.csl +468 -0
  55. commonmeta/resources/styles/modern-language-association.csl +341 -0
  56. commonmeta/resources/styles/vancouver.csl +376 -0
  57. commonmeta/schema_utils.py +27 -0
  58. commonmeta/translators.py +47 -0
  59. commonmeta/utils.py +1108 -0
  60. commonmeta/writers/__init__.py +1 -0
  61. commonmeta/writers/bibtex_writer.py +149 -0
  62. commonmeta/writers/citation_writer.py +70 -0
  63. commonmeta/writers/commonmeta_writer.py +68 -0
  64. commonmeta/writers/crossref_xml_writer.py +17 -0
  65. commonmeta/writers/csl_writer.py +79 -0
  66. commonmeta/writers/datacite_writer.py +193 -0
  67. commonmeta/writers/inveniordm_writer.py +94 -0
  68. commonmeta/writers/ris_writer.py +58 -0
  69. commonmeta/writers/schema_org_writer.py +146 -0
  70. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/METADATA +56 -45
  71. commonmeta_py-0.25.dist-info/RECORD +75 -0
  72. {commonmeta_py-0.23.dist-info → commonmeta_py-0.25.dist-info}/WHEEL +1 -1
  73. commonmeta_py-0.25.dist-info/entry_points.txt +3 -0
  74. commonmeta_py-0.23.dist-info/RECORD +0 -5
  75. /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
  76. {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.25.dist-info}/LICENSE +0 -0
commonmeta/utils.py ADDED
@@ -0,0 +1,1108 @@
1
+ """Utils module for commonmeta-py"""
2
+
3
+ import os
4
+ import orjson as json
5
+ import re
6
+ import time
7
+ from typing import Optional
8
+ from urllib.parse import urlparse
9
+ import yaml
10
+ from furl import furl
11
+ import bibtexparser
12
+ from bs4 import BeautifulSoup
13
+ from pydash import py_
14
+ import base32_lib as base32
15
+ import pycountry
16
+
17
+ from .base_utils import wrap, compact, parse_attributes
18
+ from .doi_utils import normalize_doi, doi_from_url, get_doi_ra, validate_doi, doi_as_url
19
+ from .constants import DATACITE_CONTRIBUTOR_TYPES
20
+
21
+
22
+ NORMALIZED_LICENSES = {
23
+ "https://creativecommons.org/licenses/by/1.0": "https://creativecommons.org/licenses/by/1.0/legalcode",
24
+ "https://creativecommons.org/licenses/by/2.0": "https://creativecommons.org/licenses/by/2.0/legalcode",
25
+ "https://creativecommons.org/licenses/by/2.5": "https://creativecommons.org/licenses/by/2.5/legalcode",
26
+ "https://creativecommons.org/licenses/by/3.0": "https://creativecommons.org/licenses/by/3.0/legalcode",
27
+ "https://creativecommons.org/licenses/by/3.0/us": "https://creativecommons.org/licenses/by/3.0/legalcode",
28
+ "https://creativecommons.org/licenses/by/4.0": "https://creativecommons.org/licenses/by/4.0/legalcode",
29
+ "https://creativecommons.org/licenses/by-nc/1.0": "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
30
+ "https://creativecommons.org/licenses/by-nc/2.0": "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
31
+ "https://creativecommons.org/licenses/by-nc/2.5": "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
32
+ "https://creativecommons.org/licenses/by-nc/3.0": "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
33
+ "https://creativecommons.org/licenses/by-nc/4.0": "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
34
+ "https://creativecommons.org/licenses/by-nd-nc/1.0": "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
35
+ "https://creativecommons.org/licenses/by-nd-nc/2.0": "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
36
+ "https://creativecommons.org/licenses/by-nd-nc/2.5": "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
37
+ "https://creativecommons.org/licenses/by-nd-nc/3.0": "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
38
+ "https://creativecommons.org/licenses/by-nd-nc/4.0": "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
39
+ "https://creativecommons.org/licenses/by-nc-sa/1.0": "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
40
+ "https://creativecommons.org/licenses/by-nc-sa/2.0": "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
41
+ "https://creativecommons.org/licenses/by-nc-sa/2.5": "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
42
+ "https://creativecommons.org/licenses/by-nc-sa/3.0": "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
43
+ "https://creativecommons.org/licenses/by-nc-sa/3.0/us": "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
44
+ "https://creativecommons.org/licenses/by-nc-sa/4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
45
+ "https://creativecommons.org/licenses/by-nd/1.0": "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
46
+ "https://creativecommons.org/licenses/by-nd/2.0": "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
47
+ "https://creativecommons.org/licenses/by-nd/2.5": "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
48
+ "https://creativecommons.org/licenses/by-nd/3.0": "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
49
+ "https://creativecommons.org/licenses/by-nd/4.0": "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
50
+ "https://creativecommons.org/licenses/by-sa/1.0": "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
51
+ "https://creativecommons.org/licenses/by-sa/2.0": "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
52
+ "https://creativecommons.org/licenses/by-sa/2.5": "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
53
+ "https://creativecommons.org/licenses/by-sa/3.0": "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
54
+ "https://creativecommons.org/licenses/by-sa/4.0": "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
55
+ "https://creativecommons.org/licenses/by-nc-nd/1.0": "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
56
+ "https://creativecommons.org/licenses/by-nc-nd/2.0": "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
57
+ "https://creativecommons.org/licenses/by-nc-nd/2.5": "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
58
+ "https://creativecommons.org/licenses/by-nc-nd/3.0": "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
59
+ "https://creativecommons.org/licenses/by-nc-nd/4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
60
+ "https://creativecommons.org/licenses/publicdomain": "https://creativecommons.org/licenses/publicdomain/",
61
+ "https://creativecommons.org/publicdomain/zero/1.0": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
62
+ }
63
+
64
+ UNKNOWN_INFORMATION = {
65
+ ":unac": "temporarily inaccessible",
66
+ ":unal": "unallowed, suppressed intentionally",
67
+ ":unap": "not applicable, makes no sense",
68
+ ":unas": "value unassigned (e.g., Untitled)",
69
+ ":unav": "value unavailable, possibly unknown",
70
+ ":unkn": "known to be unknown (e.g., Anonymous, Inconnue)",
71
+ ":none": "never had a value, never will",
72
+ ":null": "explicitly and meaningfully empty",
73
+ ":tba": "to be assigned or announced later",
74
+ ":etal": "too numerous to list (et alia)",
75
+ }
76
+
77
+ HTTP_SCHEME = "http://"
78
+ HTTPS_SCHEME = "https://"
79
+
80
+
81
+ def normalize_id(pid: Optional[str], **kwargs) -> Optional[str]:
82
+ """Check for valid DOI or HTTP(S) URL"""
83
+ if pid is None:
84
+ return None
85
+
86
+ # check if pid is a bytes object
87
+ if isinstance(pid, (bytes, bytearray)):
88
+ pid = pid.decode()
89
+
90
+ # check for valid DOI
91
+ doi = normalize_doi(pid, **kwargs)
92
+ if doi is not None:
93
+ return doi
94
+
95
+ # check for valid HTTP uri and ensure https
96
+ uri = urlparse(pid)
97
+ if not uri.netloc or uri.scheme not in ["http", "https"]:
98
+ return None
99
+ if uri.scheme == "http":
100
+ pid = pid.replace(HTTP_SCHEME, HTTPS_SCHEME)
101
+
102
+ # remove trailing slash
103
+ if pid.endswith("/"):
104
+ pid = pid.strip("/")
105
+
106
+ return pid
107
+
108
+
109
+ def normalize_ids(ids: list, relation_type=None) -> list:
110
+ """Normalize identifiers"""
111
+
112
+ def format_id(i):
113
+ if i.get("id", None):
114
+ idn = normalize_id(i["id"])
115
+ doi = doi_from_url(idn)
116
+ related_identifier_type = "DOI" if doi is not None else "URL"
117
+ idn = doi or idn
118
+ _type = (
119
+ i.get("type")
120
+ if isinstance(i.get("type", None), str)
121
+ else wrap(i.get("type", None))[0]
122
+ )
123
+ return compact(
124
+ {
125
+ "relatedIdentifier": idn,
126
+ "relationType": relation_type,
127
+ "relatedIdentifierType": related_identifier_type,
128
+ }
129
+ )
130
+ return None
131
+
132
+ return [format_id(i) for i in ids]
133
+
134
+
135
+ def normalize_url(url: Optional[str], secure=False, lower=False) -> Optional[str]:
136
+ """Normalize URL"""
137
+ if url is None or not isinstance(url, str):
138
+ return None
139
+ if url.endswith("/"):
140
+ url = url.strip("/")
141
+ if secure is True and url.startswith(HTTP_SCHEME):
142
+ url = url.replace(HTTP_SCHEME, HTTPS_SCHEME)
143
+ if lower is True:
144
+ return url.lower()
145
+ return url
146
+
147
+
148
+ def normalize_cc_url(url: Optional[str]):
149
+ """Normalize Creative Commons URL"""
150
+ if url is None or not isinstance(url, str):
151
+ return None
152
+ url = normalize_url(url, secure=True)
153
+ return NORMALIZED_LICENSES.get(url, url)
154
+
155
+
156
+ def normalize_ror(ror: Optional[str]) -> Optional[str]:
157
+ """Normalize ROR ID"""
158
+ ror = validate_ror(ror)
159
+ if ror is None:
160
+ return None
161
+
162
+ # turn ROR ID into URL
163
+ return "https://ror.org/" + ror
164
+
165
+
166
+ def validate_ror(ror: Optional[str]) -> Optional[str]:
167
+ """Validate ROR"""
168
+ if ror is None or not isinstance(ror, str):
169
+ return None
170
+ match = re.search(
171
+ r"\A(?:(?:http|https)://ror\.org/)?([0-9a-z]{7}\d{2})\Z",
172
+ ror,
173
+ )
174
+ if match is None:
175
+ return None
176
+ ror = match.group(1).replace(" ", "-")
177
+ return ror
178
+
179
+
180
+ def validate_url(url: str) -> Optional[str]:
181
+ if url is None:
182
+ return None
183
+ elif validate_doi(url):
184
+ return "DOI"
185
+ f = furl(url)
186
+ if f and f.scheme in ["http", "https"]:
187
+ return "URL"
188
+ match = re.search(
189
+ r"\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\Z",
190
+ url,
191
+ )
192
+ if match is not None:
193
+ return "ISSN"
194
+ return None
195
+
196
+
197
+ def normalize_orcid(orcid: Optional[str]) -> Optional[str]:
198
+ """Normalize ORCID"""
199
+ if orcid is None or not isinstance(orcid, str):
200
+ return None
201
+ orcid = validate_orcid(orcid)
202
+ if orcid is None:
203
+ return None
204
+ return "https://orcid.org/" + orcid
205
+
206
+
207
+ def validate_orcid(orcid: Optional[str]) -> Optional[str]:
208
+ """Validate ORCID"""
209
+ if orcid is None or not isinstance(orcid, str):
210
+ return None
211
+ match = re.search(
212
+ r"\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[ -]\d{4}[ -]\d{4}[ -]\d{3}[0-9X]+)\Z",
213
+ orcid,
214
+ )
215
+ if match is None:
216
+ return None
217
+ orcid = match.group(1).replace(" ", "-")
218
+ return orcid
219
+
220
+
221
+ def validate_isni(isni: Optional[str]) -> Optional[str]:
222
+ """Validate ISNI"""
223
+ if isni is None or not isinstance(isni, str):
224
+ return None
225
+ match = re.search(
226
+ r"\A(?:(?:http|https)://isni\.org/isni/)?(\d{4}([ -])?\d{4}([ -])?\d{4}([ -])?\d{3}[0-9X]+)\Z",
227
+ isni,
228
+ )
229
+ if match is None:
230
+ return None
231
+ isni = match.group(1).replace(" ", "")
232
+ return isni
233
+
234
+
235
+ def normalize_isni(isni: Optional[str]) -> Optional[str]:
236
+ """Normalize ISNI"""
237
+ if isni is None or not isinstance(isni, str):
238
+ return None
239
+ isni = validate_isni(isni)
240
+ if isni is None:
241
+ return None
242
+ return "https://isni.org/isni/" + isni
243
+
244
+
245
+ def normalize_name_identifier(ni: Optional[str]) -> Optional[str]:
246
+ """Normalize name identifier"""
247
+ if ni is None:
248
+ return None
249
+ if isinstance(ni, str):
250
+ return
251
+ if isinstance(ni, dict):
252
+ return format_name_identifier(ni)
253
+ if isinstance(ni, list):
254
+ return next(
255
+ (format_name_identifier(i) for i in wrap(ni.get("nameIdentifiers", None))),
256
+ None,
257
+ )
258
+ return None
259
+
260
+
261
+ def format_name_identifier(ni):
262
+ """format_name_identifier"""
263
+ if ni is None:
264
+ return None
265
+ elif isinstance(ni, str):
266
+ return normalize_orcid(ni) or normalize_ror(ni) or normalize_isni(ni)
267
+ name_identifier = (
268
+ ni.get("nameIdentifier", None)
269
+ or ni.get("identifier", None)
270
+ or ni.get("publisherIdentifier", None)
271
+ )
272
+ name_identifier_scheme = (
273
+ ni.get("nameIdentifierScheme", None)
274
+ or ni.get("scheme", None)
275
+ or ni.get("publisherIdentifierScheme", None)
276
+ )
277
+ scheme_uri = ni.get("schemeURI", None) or ni.get("schemeUri", None)
278
+ if name_identifier is None:
279
+ return None
280
+ elif name_identifier_scheme in ["ORCID", "orcid"]:
281
+ return normalize_orcid(name_identifier)
282
+ elif name_identifier_scheme == "ISNI":
283
+ return normalize_isni(name_identifier)
284
+ elif name_identifier_scheme == "ROR":
285
+ return normalize_ror(name_identifier)
286
+ elif validate_url(name_identifier) == "URL":
287
+ return name_identifier
288
+ elif isinstance(name_identifier, str) and scheme_uri is not None:
289
+ return scheme_uri + name_identifier
290
+ return None
291
+
292
+
293
+ def normalize_issn(string, **kwargs):
294
+ """Normalize ISSN
295
+ Pick electronic issn if there are multiple
296
+ Format issn as xxxx-xxxx"""
297
+ content = kwargs.get("content", "#text")
298
+ if string is None:
299
+ return None
300
+ if isinstance(string, str):
301
+ issn = string
302
+ elif isinstance(string, dict):
303
+ issn = string.get(content, None)
304
+ elif isinstance(string, list):
305
+ issn = next(
306
+ (i for i in string if i.get("media_type", None) == "electronic"), {}
307
+ ).get(content, None)
308
+ if issn is None:
309
+ return None
310
+ if len(issn) == 9:
311
+ return issn
312
+ if len(issn) == 8:
313
+ return issn[0:4] + "-" + issn[4:8]
314
+ return None
315
+
316
+
317
+ def dict_to_spdx(dct: dict) -> dict:
318
+ """Convert a dict to SPDX"""
319
+ dct.update({"url": normalize_cc_url(dct.get("url", None))})
320
+ file_path = os.path.join(
321
+ os.path.dirname(__file__), "resources", "spdx", "licenses.json"
322
+ )
323
+ with open(file_path, encoding="utf-8") as file:
324
+ string = file.read()
325
+ spdx = json.loads(string).get("licenses")
326
+ license_ = next(
327
+ (
328
+ lic
329
+ for lic in spdx
330
+ if lic["licenseId"].casefold() == dct.get("id", "").casefold()
331
+ or lic["seeAlso"][0] == dct.get("url", None)
332
+ ),
333
+ None,
334
+ )
335
+ if license_ is None:
336
+ return compact(dct)
337
+ # license = spdx.find do |l|
338
+ # l['licenseId'].casecmp?(hsh['rightsIdentifier']) || l['seeAlso'].first == normalize_cc_url(hsh['rightsUri']) || l['name'] == hsh['rights'] || l['seeAlso'].first == normalize_cc_url(hsh['rights'])
339
+ # end
340
+ return compact(
341
+ {
342
+ "id": license_["licenseId"],
343
+ "url": license_["seeAlso"][0],
344
+ }
345
+ )
346
+
347
+ # else
348
+ # {
349
+ # 'rights': hsh['#text'] || hsh['rights'],
350
+ # 'rightsUri': hsh['rightsUri'] || hsh['rightsUri'],
351
+ # 'rightsIdentifier': hsh['rightsIdentifier'].present? ? hsh['rightsIdentifier'].downcase : None,
352
+ # 'rightsIdentifierScheme': hsh['rightsIdentifierScheme'],
353
+ # 'schemeUri': hsh['schemeUri'],
354
+ # 'lang': hsh['lang']
355
+ # }.compact
356
+ # end
357
+ # end
358
+
359
+
360
+ def from_json_feed(elements: list) -> list:
361
+ """Convert from JSON Feed elements"""
362
+
363
+ def format_element(element):
364
+ """format element"""
365
+ if not isinstance(element, dict):
366
+ return None
367
+ mapping = {"url": "id"}
368
+ for key, value in mapping.items():
369
+ if element.get(key, None) is not None:
370
+ element[value] = element.pop(key)
371
+ return element
372
+
373
+ return [format_element(i) for i in elements]
374
+
375
+
376
+ def from_inveniordm(elements: list) -> list:
377
+ """Convert from inveniordm elements"""
378
+
379
+ def format_element(element):
380
+ if "person_or_org" in element.keys():
381
+ element = element["person_or_org"]
382
+
383
+ """format element"""
384
+ if not isinstance(element, dict):
385
+ return None
386
+ mapping = {"orcid": "ORCID"}
387
+ for key, value in mapping.items():
388
+ if element.get(key, None) is not None:
389
+ element[value] = element.pop(key)
390
+ return element
391
+
392
+ return [format_element(i) for i in elements]
393
+
394
+
395
+ def to_inveniordm(elements: list) -> list:
396
+ """Convert elements to InvenioRDM"""
397
+
398
+ def format_element(i):
399
+ """format element"""
400
+ element = {}
401
+ element["familyName"] = i.get("familyName", None)
402
+ element["givenName"] = i.get("givenName", None)
403
+ element["name"] = i.get("name", None)
404
+ element["type"] = i.get("type", None)
405
+ element["ORCID"] = i.get("ORCID", None)
406
+ return compact(element)
407
+
408
+ return [format_element(i) for i in elements]
409
+
410
+
411
+ def from_crossref_xml(elements: list) -> list:
412
+ """Convert from crossref_xml elements"""
413
+
414
+ def format_affiliation(element):
415
+ """Format affiliation"""
416
+ return {"name": element}
417
+
418
+ def format_element(element):
419
+ """format element"""
420
+ if element.get("name", None) is not None:
421
+ element["type"] = "Organization"
422
+ element["name"] = element.get("name")
423
+ else:
424
+ element["type"] = "Person"
425
+ element["givenName"] = element.get("given_name", None)
426
+ element["familyName"] = element.get("surname", None)
427
+ element["contributorType"] = element.get(
428
+ "contributor_role", "author"
429
+ ).capitalize()
430
+ if element.get("ORCID", None) is not None:
431
+ orcid = parse_attributes(element.get("ORCID"))
432
+ element["ORCID"] = normalize_orcid(orcid)
433
+ element = py_.omit(
434
+ element, "given_name", "surname", "sequence", "contributor_role"
435
+ )
436
+ return compact(element)
437
+
438
+ return [format_element(i) for i in elements]
439
+
440
+
441
+ def from_kbase(elements: list) -> list:
442
+ """Convert from kbase elements"""
443
+
444
+ def map_contributor_role(role):
445
+ if role.split(":")[0] == "CRediT":
446
+ return py_.pascal_case(role.split(":")[1])
447
+ elif role.split(":")[0] == "DataCite":
448
+ return DATACITE_CONTRIBUTOR_TYPES.get(role.split(":")[1], "Other")
449
+ else:
450
+ return role.split(":")[1]
451
+
452
+ def format_element(element):
453
+ """format element"""
454
+ if not isinstance(element, dict):
455
+ return None
456
+ if element.get("contributor_id", None) is not None:
457
+ element["ORCID"] = from_curie(element["contributor_id"])
458
+ element["contributor_roles"] = [
459
+ map_contributor_role(i)
460
+ for i in wrap(element.get("contributor_roles", None))
461
+ ]
462
+ element = py_.omit(element, "contributor_id")
463
+ return compact(element)
464
+
465
+ return [format_element(i) for i in elements]
466
+
467
+
468
+ def from_csl(elements: list) -> list:
469
+ """Convert from csl elements"""
470
+
471
+ def format_element(element):
472
+ """format element"""
473
+ if element.get("literal", None) is not None:
474
+ element["type"] = "Organization"
475
+ element["name"] = element["literal"]
476
+ elif element.get("name", None) is not None:
477
+ element["type"] = "Organization"
478
+ element["name"] = element.get("name")
479
+ else:
480
+ element["type"] = "Person"
481
+ element["name"] = " ".join(
482
+ [element.get("given", ""), element.get("family", "")]
483
+ )
484
+ element["givenName"] = element.get("given", None)
485
+ element["familyName"] = element.get("family", None)
486
+ element["affiliation"] = element.get("affiliation", None)
487
+ element = py_.omit(element, "given", "family", "literal", "sequence")
488
+ return compact(element)
489
+
490
+ return [format_element(i) for i in elements]
491
+
492
+
493
+ def to_csl(elements: list) -> list:
494
+ """Convert elements to CSL-JSON"""
495
+
496
+ def format_element(i):
497
+ """format element"""
498
+ element = {}
499
+ element["family"] = i.get("familyName", None)
500
+ element["given"] = i.get("givenName", None)
501
+ element["literal"] = (
502
+ i.get("name", None) if i.get("familyName", None) is None else None
503
+ )
504
+ return compact(element)
505
+
506
+ return [format_element(i) for i in elements]
507
+
508
+
509
+ def to_ris(elements: Optional[list]) -> list:
510
+ """Convert element to RIS"""
511
+ if elements is None:
512
+ return []
513
+
514
+ def format_element(i):
515
+ """format element"""
516
+ if i.get("familyName", None) and i.get("givenName", None):
517
+ element = ", ".join([i["familyName"], i.get("givenName", None)])
518
+ else:
519
+ element = i.get("name", None)
520
+ return element
521
+
522
+ return [
523
+ format_element(i)
524
+ for i in elements
525
+ if i.get("name", None) or i.get("familyName", None)
526
+ ]
527
+
528
+
529
+ def to_schema_org(element: Optional[dict]) -> Optional[dict]:
530
+ """Convert a metadata element to Schema.org"""
531
+ if not isinstance(element, dict):
532
+ return None
533
+ mapping = {"type": "@type", "id": "@id", "title": "name"}
534
+ for key, value in mapping.items():
535
+ if element.get(key, None) is not None:
536
+ element[value] = element.pop(key)
537
+ return element
538
+
539
+
540
+ def to_schema_org_creators(elements: list) -> list():
541
+ """Convert creators to Schema.org"""
542
+
543
+ def format_element(element):
544
+ """format element"""
545
+ element["@type"] = element["type"][0:-2] if element.get("type", None) else None
546
+ if element.get("familyName", None) and element.get("name", None) is None:
547
+ element["name"] = " ".join(
548
+ [element.get("givenName", None), element.get("familyName")]
549
+ )
550
+ element["@type"] = "Person"
551
+ else:
552
+ element["@type"] = "Organization"
553
+ element = py_.omit(element, "type", "contributorRoles")
554
+ return compact(element)
555
+
556
+ return [format_element(i) for i in elements]
557
+
558
+
559
+ def to_schema_org_container(element: Optional[dict], **kwargs) -> Optional[dict]:
560
+ """Convert CSL container to Schema.org container"""
561
+ if element is None and kwargs.get("container_title", None) is None:
562
+ return None
563
+ if not isinstance(element, dict):
564
+ return None
565
+
566
+ return compact(
567
+ {
568
+ "@id": element.get("identifier", None),
569
+ "@type": "DataCatalog"
570
+ if kwargs.get("type", None) == "DataRepository"
571
+ else "Periodical",
572
+ "name": element.get("title", None) or kwargs.get("container_title", None),
573
+ }
574
+ )
575
+
576
+
577
+ def to_schema_org_identifiers(elements: list) -> list:
578
+ """Convert identifiers to Schema.org"""
579
+
580
+ def format_element(i):
581
+ """format element"""
582
+ element = {}
583
+ element["@type"] = "PropertyValue"
584
+ element["propertyID"] = i.get("identifierType", None)
585
+ element["value"] = i.get("identifier", None)
586
+ return compact(element)
587
+
588
+ return [format_element(i) for i in elements]
589
+
590
+
591
+ def to_schema_org_relations(related_items: list, relation_type=None):
592
+ """Convert relatedItems to Schema.org relations"""
593
+
594
+ def format_element(i):
595
+ """format element"""
596
+ if i["relatedItemIdentifierType"] == "ISSN" and i["relationType"] == "IsPartOf":
597
+ return compact({"@type": "Periodical", "issn": i["relatedItemIdentifier"]})
598
+ return compact({"@id": normalize_id(i["relatedIdentifier"])})
599
+
600
+ # consolidate different relation types
601
+ if relation_type == "References":
602
+ relation_type = ["References", "Cites"]
603
+ else:
604
+ relation_type = [relation_type]
605
+
606
+ related_items = py_.filter(
607
+ wrap(related_items), lambda ri: ri["relationType"] in relation_type
608
+ )
609
+ return [format_element(i) for i in related_items]
610
+
611
+
612
+ def find_from_format(pid=None, string=None, ext=None, dct=None, filename=None):
613
+ """Find reader from format"""
614
+ if pid is not None:
615
+ return find_from_format_by_id(pid)
616
+ if string is not None and ext is not None:
617
+ return find_from_format_by_ext(ext)
618
+ if dct is not None:
619
+ return find_from_format_by_dict(dct)
620
+ if string is not None:
621
+ return find_from_format_by_string(string)
622
+ if filename is not None:
623
+ return find_from_format_by_filename(filename)
624
+ return "datacite"
625
+
626
+
627
+ def find_from_format_by_id(pid: str) -> Optional[str]:
628
+ """Find reader from format by id"""
629
+ doi = validate_doi(pid)
630
+ if doi and (registration_agency := get_doi_ra(doi)) is not None:
631
+ return registration_agency.lower()
632
+ if (
633
+ re.match(r"\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\Z", pid)
634
+ is not None
635
+ ):
636
+ return "cff"
637
+ if (
638
+ re.match(r"\A(http|https):/(/)?github\.com/(.+)/codemeta.json\Z", pid)
639
+ is not None
640
+ ):
641
+ return "codemeta"
642
+ if re.match(r"\A(http|https):/(/)?github\.com/(.+)\Z", pid) is not None:
643
+ return "cff"
644
+ if re.match(r"\Ahttps:/(/)?api\.rogue-scholar\.org/posts/(.+)\Z", pid) is not None:
645
+ return "json_feed_item"
646
+ if re.match(r"\Ahttps:/(/)(.+)/api/records/(.+)\Z", pid) is not None:
647
+ return "inveniordm"
648
+ return "schema_org"
649
+
650
+
651
+ def find_from_format_by_ext(ext: str) -> Optional[str]:
652
+ """Find reader from format by ext"""
653
+ if ext == ".bib":
654
+ return "bibtex"
655
+ if ext == ".ris":
656
+ return "ris"
657
+ return None
658
+
659
+
660
+ def find_from_format_by_dict(dct: dict) -> Optional[str]:
661
+ if dct is None or not isinstance(dct, dict):
662
+ return None
663
+ """Find reader from format by dict"""
664
+ if dct.get("schema_version", "").startswith("https://commonmeta.org"):
665
+ return "commonmeta"
666
+ if dct.get("@context", None) == "http://schema.org":
667
+ return "schema_org"
668
+ if dct.get("@context", None) in [
669
+ "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
670
+ ]:
671
+ return "codemeta"
672
+ if dct.get("guid", None) is not None:
673
+ return "json_feed_item"
674
+ if dct.get("schemaVersion", "").startswith("http://datacite.org/schema/kernel"):
675
+ return "datacite"
676
+ if dct.get("source", None) == "Crossref":
677
+ return "crossref"
678
+ if py_.get(dct, "issued.date-parts") is not None:
679
+ return "csl"
680
+ if py_.get(dct, "conceptdoi") is not None:
681
+ return "inveniordm"
682
+ if py_.get(dct, "credit_metadata") is not None:
683
+ return "kbase"
684
+ return None
685
+
686
+
687
+ def find_from_format_by_string(string: str) -> Optional[str]:
688
+ """Find reader from format by string"""
689
+ if string is None:
690
+ return None
691
+ try:
692
+ data = json.loads(string)
693
+ if not isinstance(data, dict):
694
+ raise TypeError
695
+ if data.get("schema", "").startswith("https://commonmeta.org"):
696
+ return "commonmeta"
697
+ if data.get("items", None) is not None:
698
+ data = data["items"][0]
699
+ if data.get("@context", None) == "http://schema.org":
700
+ return "schema_org"
701
+ if data.get("@context", None) in [
702
+ "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld"
703
+ ]:
704
+ return "codemeta"
705
+ if data.get("guid", None) is not None:
706
+ return "json_feed_item"
707
+ if data.get("schemaVersion", "").startswith(
708
+ "http://datacite.org/schema/kernel"
709
+ ):
710
+ return "datacite"
711
+ if data.get("source", None) == "Crossref":
712
+ return "crossref"
713
+ if py_.get(data, "issued.date-parts") is not None:
714
+ return "csl"
715
+ if py_.get(data, "conceptdoi") is not None:
716
+ return "inveniordm"
717
+ if py_.get(data, "credit_metadata") is not None:
718
+ return "kbase"
719
+ except (TypeError, json.JSONDecodeError):
720
+ pass
721
+ try:
722
+ data = BeautifulSoup(string, "xml")
723
+ if data.find("doi_record"):
724
+ return "crossref_xml"
725
+ if data.find("resource"):
726
+ return "datacite_xml"
727
+ except ValueError:
728
+ pass
729
+ try:
730
+ data = BeautifulSoup(string, "html.parser")
731
+ if (
732
+ data.find("script", type="application/ld+json")
733
+ or data.find("meta", {"name": "citation_doi"})
734
+ or data.find("meta", {"name": "dc.identifier"})
735
+ ):
736
+ return "schema_org"
737
+ except ValueError:
738
+ pass
739
+ try:
740
+ data = yaml.safe_load(string)
741
+ if data.get("cff-version", None):
742
+ return "cff"
743
+ except (yaml.YAMLError, AttributeError):
744
+ pass
745
+
746
+ if string.startswith("TY - "):
747
+ return "ris"
748
+ if any(string.startswith(f"@{t}") for t in bibtexparser.bibdatabase.STANDARD_TYPES):
749
+ return "bibtex"
750
+
751
+ # no format found
752
+ return None
753
+
754
+
755
+ def find_from_format_by_filename(filename):
756
+ """Find reader from format by filename"""
757
+ if filename == "CITATION.cff":
758
+ return "cff"
759
+ return None
760
+
761
+
762
+ def from_schema_org(element):
763
+ """Convert schema.org to DataCite"""
764
+ if element is None:
765
+ return None
766
+ element["type"] = element.get("@type", None)
767
+ element["id"] = element.get("@id", None)
768
+ return compact(py_.omit(element, ["@type", "@id"]))
769
+
770
+
771
+ def from_schema_org_creators(elements: list) -> list:
772
+ """Convert schema.org creators to commonmeta"""
773
+
774
+ def format_element(i):
775
+ """format element"""
776
+ element = {}
777
+ if isinstance(i, str):
778
+ return {"name": i}
779
+ if urlparse(i.get("@id", None)).hostname == "orcid.org":
780
+ element["id"] = i.get("@id")
781
+ element["type"] = "Person"
782
+ elif isinstance(i.get("@type", None), str):
783
+ element["type"] = i.get("@type")
784
+ elif isinstance(i.get("@type", None), list):
785
+ element["type"] = py_.find(
786
+ i["@type"], lambda x: x in ["Person", "Organization"]
787
+ )
788
+
789
+ # strip text after comma if suffix is an academic title
790
+ if str(i["name"]).split(", ", maxsplit=1)[-1] in [
791
+ "MD",
792
+ "PhD",
793
+ "DVM",
794
+ "DDS",
795
+ "DMD",
796
+ "JD",
797
+ "MBA",
798
+ "MPH",
799
+ "MS",
800
+ "MA",
801
+ "MFA",
802
+ "MSc",
803
+ "MEd",
804
+ "MEng",
805
+ "MPhil",
806
+ "MRes",
807
+ "LLM",
808
+ "LLB",
809
+ "BSc",
810
+ "BA",
811
+ "BFA",
812
+ "BEd",
813
+ "BEng",
814
+ "BPhil",
815
+ ]:
816
+ i["name"] = str(i["name"]).split(", ", maxsplit=1)[0]
817
+ length = len(str(i["name"]).split(" "))
818
+ if i.get("givenName", None):
819
+ element["givenName"] = i.get("givenName", None)
820
+ if i.get("familyName", None):
821
+ element["familyName"] = i.get("familyName", None)
822
+ element["type"] = "Person"
823
+ # parentheses around the last word indicate an organization
824
+ elif length > 1 and not str(i["name"]).rsplit(" ", maxsplit=1)[-1].startswith(
825
+ "("
826
+ ):
827
+ element["givenName"] = " ".join(str(i["name"]).split(" ")[0 : length - 1])
828
+ element["familyName"] = str(i["name"]).rsplit(" ", maxsplit=1)[1:]
829
+ if not element.get("familyName", None):
830
+ element["creatorName"] = compact(
831
+ {
832
+ "type": i.get("@type", None),
833
+ "#text": i.get("name", None),
834
+ }
835
+ )
836
+
837
+ if isinstance(i.get("affiliation", None), str):
838
+ element["affiliation"] = {"type": "Organization", "name": i["affiliation"]}
839
+ elif urlparse(py_.get(i, "affiliation.@id", "")).hostname in [
840
+ "ror.org",
841
+ "isni.org",
842
+ ]:
843
+ element["affiliation"] = {
844
+ "id": i["affiliation"]["@id"],
845
+ "type": "Organization",
846
+ "name": i["affiliation"]["name"],
847
+ }
848
+ return compact(element)
849
+
850
+ return [format_element(i) for i in wrap(elements)]
851
+
852
+
853
+ def github_from_url(url: str) -> dict:
854
+ """Get github owner, repo, release and path from url"""
855
+
856
+ match = re.match(
857
+ r"\Ahttps://(github|raw\.githubusercontent)\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\Z",
858
+ url,
859
+ )
860
+ if match is None:
861
+ return {}
862
+ words = urlparse(url).path.lstrip("/").split("/")
863
+ owner = words[0] if len(words) > 0 else None
864
+ repo = words[1] if len(words) > 1 else None
865
+ release = words[3] if len(words) > 3 else None
866
+ path = "/".join(words[4:]) if len(words) > 3 else ""
867
+ if len(path) == 0:
868
+ path = None
869
+
870
+ return compact({"owner": owner, "repo": repo, "release": release, "path": path})
871
+
872
+
873
+ def github_repo_from_url(url: str) -> Optional[str]:
874
+ """Get github repo from url"""
875
+ return github_from_url(url).get("repo", None)
876
+
877
+
878
+ def github_release_from_url(url: str) -> Optional[str]:
879
+ """Get github release from url"""
880
+ return github_from_url(url).get("release", None)
881
+
882
+
883
+ def github_owner_from_url(url: str) -> Optional[str]:
884
+ """Get github owner from url"""
885
+ return github_from_url(url).get("owner", None)
886
+
887
+
888
+ def github_as_owner_url(url: str) -> Optional[str]:
889
+ """Get github owner url from url"""
890
+ github_dict = github_from_url(url)
891
+ if github_dict.get("owner", None) is None:
892
+ return None
893
+ return f"https://github.com/{github_dict.get('owner')}"
894
+
895
+
896
+ def github_as_repo_url(url) -> Optional[str]:
897
+ """Get github repo url from url"""
898
+ github_dict = github_from_url(url)
899
+ if github_dict.get("repo", None) is None:
900
+ return None
901
+ return f"https://github.com/{github_dict.get('owner')}/{github_dict.get('repo')}"
902
+
903
+
904
+ def github_as_release_url(url: str) -> Optional[str]:
905
+ """Get github release url from url"""
906
+ github_dict = github_from_url(url)
907
+ if github_dict.get("release", None) is None:
908
+ return None
909
+ return f"https://github.com/{github_dict.get('owner')}/{github_dict.get('repo')}/tree/{github_dict.get('release')}"
910
+
911
+
912
+ def github_as_codemeta_url(url: str) -> Optional[str]:
913
+ """Get github codemeta.json url from url"""
914
+ github_dict = github_from_url(url)
915
+
916
+ if github_dict.get("path", None) and github_dict.get("path").endswith(
917
+ "codemeta.json"
918
+ ):
919
+ return f"https://raw.githubusercontent.com/{github_dict.get('owner')}/{github_dict.get('repo')}/{github_dict.get('release')}/{github_dict.get('path')}"
920
+ elif github_dict.get("owner", None):
921
+ return f"https://raw.githubusercontent.com/{github_dict.get('owner')}/{github_dict.get('repo')}/master/codemeta.json"
922
+ else:
923
+ return None
924
+
925
+
926
+ def github_as_cff_url(url: str) -> Optional[str]:
927
+ """Get github CITATION.cff url from url"""
928
+ github_dict = github_from_url(url)
929
+
930
+ if github_dict.get("path", None) and github_dict.get("path").endswith(
931
+ "CITATION.cff"
932
+ ):
933
+ return f"https://raw.githubusercontent.com/{github_dict.get('owner')}/{github_dict.get('repo')}/{github_dict.get('release')}/{github_dict.get('path')}"
934
+ if github_dict.get("owner", None):
935
+ return f"https://raw.githubusercontent.com/{github_dict.get('owner')}/{github_dict.get('repo')}/main/CITATION.cff"
936
+ return None
937
+
938
+
939
+ def pages_as_string(
940
+ container: Optional[dict], page_range_separator="-"
941
+ ) -> Optional[str]:
942
+ """Parse pages for BibTeX"""
943
+ if container is None:
944
+ return None
945
+ if container.get("firstPage", None) is None:
946
+ return None
947
+ if container.get("lastPage", None) is None:
948
+ return container.get("firstPage", None)
949
+
950
+ return page_range_separator.join(
951
+ [container.get("firstPage"), container.get("lastPage", None)]
952
+ )
953
+
954
+
955
+ def subjects_as_string(subjects):
956
+ """convert subject list to string, e.g. for bibtex"""
957
+ if subjects is None:
958
+ return None
959
+
960
+ keywords = []
961
+ for subject in wrap(subjects):
962
+ keywords.append(subject.get("subject", None))
963
+ return ", ".join(keywords)
964
+
965
+
966
+ # def reverse():
967
+ # return { 'citation': wrap(related_identifiers).select do |ri|
968
+ # ri['relationType'] == 'IsReferencedBy'
969
+ # end.map do |r|
970
+ # { '@id': normalize_doi(r['relatedIdentifier']),
971
+ # '@type': r['resourceTypeGeneral'] validate_orcid 'ScholarlyArticle',
972
+ # 'identifier': r['relatedIdentifierType'] == 'DOI' ? nil : to_identifier(r) }.compact
973
+ # end.unwrap,
974
+ # 'isBasedOn': wrap(related_identifiers).select do |ri|
975
+ # ri['relationType'] == 'IsSupplementTo'
976
+ # end.map do |r|
977
+ # { '@id': normalize_doi(r['relatedIdentifier']),
978
+ # '@type': r['resourceTypeGeneral'] or 'ScholarlyArticle',
979
+ # 'identifier': r['relatedIdentifierType'] == 'DOI' ? nil : to_identifier(r) }.compact
980
+ # end.unwrap }.compact
981
+
982
+
983
+ def name_to_fos(name: str) -> Optional[dict]:
984
+ """Convert name to Fields of Science (OECD) subject"""
985
+ # # first find subject in Fields of Science (OECD)
986
+ # fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
987
+ # __dir__))).fetch('fosFields')
988
+
989
+ # subject = fos.find { |l| l['fosLabel'] == name || 'FOS: ' + l['fosLabel'] == name }
990
+
991
+ # if subject
992
+ # return [{
993
+ # 'subject': sanitize(name).downcase
994
+ # },
995
+ # {
996
+ # 'subject': 'FOS: ' + subject['fosLabel'],
997
+ # 'subjectScheme': 'Fields of Science and Technology (FOS)',
998
+ # 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf'
999
+ # }]
1000
+ # end
1001
+
1002
+ # # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1003
+ # # and map to Fields of Science. Add an extra entry for the latter
1004
+ # fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
1005
+ # __dir__)))
1006
+ # for_fields = fores.fetch('forFields')
1007
+ # for_disciplines = fores.fetch('forDisciplines')
1008
+
1009
+ # subject = for_fields.find { |l| l['forLabel'] == name } ||
1010
+ # for_disciplines.find { |l| l['forLabel'] == name }
1011
+
1012
+ # if subject
1013
+ # [{
1014
+ # 'subject': sanitize(name).downcase
1015
+ # },
1016
+ # {
1017
+ # 'subject': 'FOS: ' + subject['fosLabel'],
1018
+ # 'subjectScheme': 'Fields of Science and Technology (FOS)',
1019
+ # 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf'
1020
+ # }]
1021
+ # else
1022
+
1023
+ return {"subject": name.strip()}
1024
+
1025
+
1026
+ def encode_doi(prefix):
1027
+ """Generate a DOI using the DOI prefix and a random base32 suffix"""
1028
+ suffix = base32.generate(length=10, split_every=5, checksum=True)
1029
+ return f"https://doi.org/{prefix}/{suffix}"
1030
+
1031
+
1032
+ def decode_doi(doi: str) -> int:
1033
+ """Decode a DOI to a number"""
1034
+ suffix = doi.split("/", maxsplit=5)[-1]
1035
+ return base32.decode(suffix)
1036
+
1037
+
1038
+ def from_curie(id: Optional[str]) -> Optional[str]:
1039
+ """from CURIE"""
1040
+ if id is None:
1041
+ return None
1042
+ _type = id.split(":")[0]
1043
+ if _type == "DOI":
1044
+ return doi_as_url(id.split(":")[1])
1045
+ elif _type == "ROR":
1046
+ return "https://ror.org/" + id.split(":")[1]
1047
+ elif _type == "ISNI":
1048
+ return "https://isni.org/isni/" + id.split(":")[1]
1049
+ elif _type == "ORCID":
1050
+ return normalize_orcid(id.split(":")[1])
1051
+ elif _type == "URL":
1052
+ return normalize_url(id.split(":")[1])
1053
+ elif _type == "JDP":
1054
+ return id.split(":")[1]
1055
+ # TODO: resolvable url for other identifier types
1056
+ return None
1057
+
1058
+
1059
+ def issn_as_url(issn: str) -> Optional[str]:
1060
+ """ISSN as URL"""
1061
+ if issn is None:
1062
+ return None
1063
+ return f"https://portal.issn.org/resource/ISSN/{issn}"
1064
+
1065
+
1066
+ def get_language(lang: str, format: str = "alpha_2") -> Optional[str]:
1067
+ """Provide a language string based on ISO 639, with either a name in English,
1068
+ ISO 639-1, or ISO 639-3 code as input. Optionally format as alpha_2 (defaul),
1069
+ alpha_3, or name.
1070
+ """
1071
+ if not lang:
1072
+ return None
1073
+ if len(lang) == 2:
1074
+ language = pycountry.languages.get(alpha_2=lang)
1075
+ elif len(lang) == 3:
1076
+ language = pycountry.languages.get(alpha_3=lang)
1077
+ else:
1078
+ language = pycountry.languages.get(name=lang)
1079
+
1080
+ if language is None:
1081
+ return None
1082
+ elif format == "name":
1083
+ return language.name
1084
+ elif format == "alpha_3":
1085
+ return language.alpha_3
1086
+
1087
+ else:
1088
+ return language.alpha_2
1089
+
1090
+
1091
+ def start_case(content: str) -> str:
1092
+ """Capitalize first letter of each word without lowercasing the rest"""
1093
+ words = content.split(" ")
1094
+ content = " ".join([word[0].upper() + word[1:] for word in words])
1095
+ return content
1096
+
1097
+
1098
+ def timer_func(func):
1099
+ def function_timer(*args, **kwargs):
1100
+ start = time.time()
1101
+ value = func(*args, **kwargs)
1102
+ end = time.time()
1103
+ runtime = end - start
1104
+ msg = "{func} took {time} seconds to complete its execution."
1105
+ print(msg.format(func=func.__name__, time=runtime))
1106
+ return value
1107
+
1108
+ return function_timer