commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/bibtex_reader.py +0 -0
  13. commonmeta/readers/cff_reader.py +199 -0
  14. commonmeta/readers/codemeta_reader.py +112 -0
  15. commonmeta/readers/commonmeta_reader.py +13 -0
  16. commonmeta/readers/crossref_reader.py +409 -0
  17. commonmeta/readers/crossref_xml_reader.py +505 -0
  18. commonmeta/readers/csl_reader.py +98 -0
  19. commonmeta/readers/datacite_reader.py +390 -0
  20. commonmeta/readers/datacite_xml_reader.py +359 -0
  21. commonmeta/readers/inveniordm_reader.py +218 -0
  22. commonmeta/readers/json_feed_reader.py +420 -0
  23. commonmeta/readers/kbase_reader.py +205 -0
  24. commonmeta/readers/ris_reader.py +103 -0
  25. commonmeta/readers/schema_org_reader.py +506 -0
  26. commonmeta/resources/cff_v1.2.0.json +1827 -0
  27. commonmeta/resources/commonmeta_v0.12.json +601 -0
  28. commonmeta/resources/commonmeta_v0.13.json +559 -0
  29. commonmeta/resources/commonmeta_v0.14.json +573 -0
  30. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  36. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  37. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  38. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  39. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  40. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  41. commonmeta/resources/crossref/fundref.xsd +49 -0
  42. commonmeta/resources/crossref/module-ali.xsd +39 -0
  43. commonmeta/resources/crossref/relations.xsd +444 -0
  44. commonmeta/resources/crossref-v0.2.json +60 -0
  45. commonmeta/resources/csl-data.json +538 -0
  46. commonmeta/resources/datacite-v4.5.json +829 -0
  47. commonmeta/resources/datacite-v4.5pr.json +608 -0
  48. commonmeta/resources/ietf-bcp-47.json +3025 -0
  49. commonmeta/resources/iso-8601.json +3182 -0
  50. commonmeta/resources/spdx/licenses.json +4851 -0
  51. commonmeta/resources/spdx-schema..json +903 -0
  52. commonmeta/resources/styles/apa.csl +1697 -0
  53. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  54. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  55. commonmeta/resources/styles/ieee.csl +468 -0
  56. commonmeta/resources/styles/modern-language-association.csl +341 -0
  57. commonmeta/resources/styles/vancouver.csl +376 -0
  58. commonmeta/schema_utils.py +27 -0
  59. commonmeta/translators.py +47 -0
  60. commonmeta/utils.py +1108 -0
  61. commonmeta/writers/__init__.py +1 -0
  62. commonmeta/writers/bibtex_writer.py +149 -0
  63. commonmeta/writers/citation_writer.py +70 -0
  64. commonmeta/writers/commonmeta_writer.py +68 -0
  65. commonmeta/writers/crossref_xml_writer.py +17 -0
  66. commonmeta/writers/csl_writer.py +79 -0
  67. commonmeta/writers/datacite_writer.py +193 -0
  68. commonmeta/writers/inveniordm_writer.py +94 -0
  69. commonmeta/writers/ris_writer.py +58 -0
  70. commonmeta/writers/schema_org_writer.py +146 -0
  71. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  72. commonmeta_py-0.24.dist-info/RECORD +75 -0
  73. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  74. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  75. commonmeta_py/__init__.py +0 -2
  76. commonmeta_py-0.22.dist-info/RECORD +0 -5
  77. {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,409 @@
1
+ """crossref reader for commonmeta-py"""
2
+
3
+ from typing import Optional
4
+ import httpx
5
+ from pydash import py_
6
+
7
+ from ..utils import (
8
+ dict_to_spdx,
9
+ normalize_cc_url,
10
+ normalize_url,
11
+ normalize_doi,
12
+ normalize_issn,
13
+ issn_as_url,
14
+ )
15
+ from ..base_utils import wrap, compact, presence, sanitize, parse_attributes
16
+ from ..author_utils import get_authors
17
+ from ..date_utils import get_date_from_date_parts
18
+ from ..doi_utils import (
19
+ doi_as_url,
20
+ doi_from_url,
21
+ crossref_api_url,
22
+ crossref_api_query_url,
23
+ crossref_api_sample_url,
24
+ )
25
+ from ..constants import (
26
+ CR_TO_CM_TRANSLATIONS,
27
+ CR_TO_CM_CONTAINER_TRANSLATIONS,
28
+ CROSSREF_CONTAINER_TYPES,
29
+ Commonmeta,
30
+ )
31
+
32
+
33
+ def get_crossref_list(query: dict, **kwargs) -> list[dict]:
34
+ """get_crossref list from Crossref API."""
35
+ url = crossref_api_query_url(query, **kwargs)
36
+ response = httpx.get(url, timeout=30, **kwargs)
37
+ if response.status_code != 200:
38
+ return []
39
+ return response.json().get("message", {}).get("items", [])
40
+
41
+
42
+ def get_crossref(pid: str, **kwargs) -> dict:
43
+ """get_crossref"""
44
+ doi = doi_from_url(pid)
45
+ if doi is None:
46
+ return {"state": "not_found"}
47
+ url = crossref_api_url(doi)
48
+ response = httpx.get(url, timeout=10, **kwargs)
49
+ if response.status_code != 200:
50
+ return {"state": "not_found"}
51
+ return response.json().get("message", {}) | {"via": "crossref"}
52
+
53
+
54
+ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
55
+ """read_crossref"""
56
+ if data is None:
57
+ return {"state": "not_found"}
58
+ meta = data
59
+ # read_options = ActiveSupport::HashWithIndifferentAccess.
60
+ # new(options.except(:doi, :id, :url,
61
+ # :sandbox, :validate, :ra))
62
+ read_options = kwargs or {}
63
+
64
+ doi = meta.get("DOI", None)
65
+ _id = doi_as_url(doi)
66
+ _type = CR_TO_CM_TRANSLATIONS.get(meta.get("type", None)) or "Other"
67
+
68
+ archive_locations = wrap(meta.get("archive", None))
69
+
70
+ if meta.get("author", None):
71
+ contributors = get_authors(wrap(meta.get("author")), via="crossref")
72
+ else:
73
+ contributors = []
74
+
75
+ def editor_type(item):
76
+ item["contributorType"] = "Editor"
77
+ return item
78
+
79
+ editors = [editor_type(i) for i in wrap(meta.get("editor", None))]
80
+ if editors:
81
+ contributors += get_authors(editors)
82
+
83
+ url = normalize_url(py_.get(meta, "resource.primary.URL"))
84
+ titles = get_titles(meta)
85
+ publisher = compact({"name": meta.get("publisher", None)})
86
+
87
+ date = compact(
88
+ {
89
+ "published": py_.get(meta, "issued.date-time")
90
+ or get_date_from_date_parts(meta.get("issued", None))
91
+ or py_.get(meta, "created.date-time")
92
+ }
93
+ )
94
+ identifiers = []
95
+ identifiers.append(
96
+ compact(
97
+ {
98
+ "identifier": _id,
99
+ "identifierType": "DOI",
100
+ }
101
+ )
102
+ )
103
+ license_ = meta.get("license", None)
104
+ if license_ is not None:
105
+ license_ = normalize_cc_url(license_[0].get("URL", None))
106
+ license_ = dict_to_spdx({"url": license_}) if license_ else None
107
+ issn = get_issn(meta)
108
+ container = get_container(meta, issn=issn)
109
+ relations = get_relations(meta.get("relation", None))
110
+ if issn is not None:
111
+ relations.append(
112
+ {
113
+ "id": issn_as_url(issn),
114
+ "type": "IsPartOf",
115
+ }
116
+ )
117
+ relations = py_.uniq(relations)
118
+ references = py_.uniq([get_reference(i) for i in wrap(meta.get("reference", None))])
119
+ funding_references = from_crossref_funding(wrap(meta.get("funder", None)))
120
+
121
+ description = meta.get("abstract", None)
122
+ if description is not None:
123
+ descriptions = [{"description": sanitize(description), "type": "Abstract"}]
124
+ else:
125
+ descriptions = None
126
+
127
+ subjects = py_.uniq(
128
+ [
129
+ {"subject": i}
130
+ for i in wrap(meta.get("subject", None) or meta.get("group-title", None))
131
+ ]
132
+ )
133
+ files = py_.uniq(
134
+ [
135
+ get_file(i)
136
+ for i in wrap(meta.get("link", None))
137
+ if i["content-type"] != "unspecified"
138
+ ]
139
+ )
140
+
141
+ return {
142
+ # required properties
143
+ "id": _id,
144
+ "type": _type,
145
+ # recommended and optional properties
146
+ "additionalType": None,
147
+ "archiveLocations": presence(archive_locations),
148
+ "container": presence(container),
149
+ "contributors": presence(contributors),
150
+ "date": presence(date),
151
+ "descriptions": presence(descriptions),
152
+ "files": presence(files),
153
+ "fundingReferences": presence(funding_references),
154
+ "geoLocations": None,
155
+ "identifiers": identifiers,
156
+ "language": meta.get("language", None),
157
+ "license": license_,
158
+ "provider": "Crossref",
159
+ "publisher": presence(publisher),
160
+ "references": presence(references),
161
+ "relations": presence(relations),
162
+ "subjects": presence(subjects),
163
+ "titles": presence(titles),
164
+ "url": url,
165
+ "version": meta.get("version", None),
166
+ } | read_options
167
+
168
+
169
+ def get_titles(meta):
170
+ """Title information from Crossref metadata."""
171
+ titles = wrap(parse_attributes(meta.get("title", None)))
172
+ subtitles = wrap(parse_attributes(meta.get("subtitle", None)))
173
+ original_language_titles = wrap(
174
+ parse_attributes(meta.get("original_language_title", None))
175
+ )
176
+ language = None
177
+ return (
178
+ [{"title": sanitize(i)} for i in titles]
179
+ + [
180
+ compact(
181
+ {
182
+ "title": sanitize(i),
183
+ "titleType": "Subtitle",
184
+ }
185
+ )
186
+ for i in subtitles
187
+ ]
188
+ + [
189
+ compact(
190
+ {
191
+ "title": sanitize(i),
192
+ "titleType": "TranslatedTitle",
193
+ "lang": language,
194
+ }
195
+ )
196
+ for i in original_language_titles
197
+ ]
198
+ )
199
+
200
+
201
+ def get_reference(reference: Optional[dict]) -> Optional[dict]:
202
+ """Get reference from Crossref reference"""
203
+ if reference is None or not isinstance(reference, dict):
204
+ return None
205
+ doi = reference.get("DOI", None)
206
+ metadata = {
207
+ "key": reference.get("key", None),
208
+ "id": normalize_doi(doi) if doi else None,
209
+ "contributor": reference.get("author", None),
210
+ "title": reference.get("article-title", None),
211
+ "publisher": reference.get("publisher", None),
212
+ "publicationYear": reference.get("year", None),
213
+ "volume": reference.get("volume", None),
214
+ "issue": reference.get("issue", None),
215
+ "firstPage": reference.get("first-page", None),
216
+ "lastPage": reference.get("last-page", None),
217
+ "containerTitle": reference.get("journal-title", None),
218
+ "edition": None,
219
+ "unstructured": reference.get("unstructured", None),
220
+ }
221
+ return compact(metadata)
222
+
223
+
224
+ def get_relations(relations: list) -> list:
225
+ """Get relations from Crossref"""
226
+ supported_types = [
227
+ "IsNewVersionOf",
228
+ "IsPreviousVersionOf",
229
+ "IsVersionOf",
230
+ "HasVersion",
231
+ "IsPartOf",
232
+ "HasPart",
233
+ "IsVariantFormOf",
234
+ "IsOriginalFormOf",
235
+ "IsIdenticalTo",
236
+ "IsTranslationOf",
237
+ "IsReviewedBy",
238
+ "Reviews",
239
+ "HasReview",
240
+ "IsPreprintOf",
241
+ "HasPreprint",
242
+ "IsSupplementTo",
243
+ "IsSupplementedBy",
244
+ ]
245
+
246
+ if not relations:
247
+ return []
248
+
249
+ def format_relation(key, values):
250
+ _type = py_.pascal_case(key)
251
+ if _type not in supported_types:
252
+ return None
253
+ rs = []
254
+ for value in values:
255
+ if value.get("id-type", None) == "doi":
256
+ _id = doi_as_url(value.get("id", None))
257
+ elif value.get("id-type", None) == "issn":
258
+ _id = issn_as_url(value.get("id", None))
259
+ else:
260
+ _id = value.get("id", None)
261
+
262
+ rs.append({"type": _type, "id": _id})
263
+
264
+ return rs
265
+
266
+ return py_.uniq(
267
+ py_.compact(py_.flatten([format_relation(k, v) for k, v in relations.items()]))
268
+ )
269
+
270
+
271
+ def get_file(file: dict) -> dict:
272
+ """Get file from Crossref"""
273
+ return compact(
274
+ {
275
+ "url": file.get("URL", None),
276
+ "mimeType": file.get("content-type", None),
277
+ }
278
+ )
279
+
280
+
281
+ def get_issn(meta: dict) -> Optional[str]:
282
+ """Get ISSN from Crossref"""
283
+ issn = (
284
+ next(
285
+ (
286
+ item
287
+ for item in wrap(meta.get("issn-type", None))
288
+ if item["type"] == "electronic"
289
+ ),
290
+ None,
291
+ )
292
+ or next(
293
+ (
294
+ item
295
+ for item in wrap(meta.get("issn-type", None))
296
+ if item["type"] == "print"
297
+ ),
298
+ None,
299
+ )
300
+ or next(
301
+ (
302
+ item
303
+ for item in py_.get(meta, "relation.is-part-of", [])
304
+ if item["id-type"] == "issn"
305
+ ),
306
+ None,
307
+ )
308
+ or {}
309
+ )
310
+ return (
311
+ normalize_issn(issn.get("value", None) or issn.get("id", None))
312
+ if issn
313
+ else None
314
+ )
315
+
316
+
317
+ def get_container(meta: dict, issn: str) -> dict:
318
+ """Get container from Crossref"""
319
+ container_type = CROSSREF_CONTAINER_TYPES.get(meta.get("type", None))
320
+ container_type = CR_TO_CM_CONTAINER_TRANSLATIONS.get(container_type, None)
321
+ isbn = (
322
+ next(
323
+ (
324
+ item
325
+ for item in wrap(meta.get("isbn-type", None))
326
+ if item["type"] == "electronic"
327
+ ),
328
+ None,
329
+ )
330
+ or next(
331
+ (
332
+ item
333
+ for item in wrap(meta.get("isbn-type", None))
334
+ if item["type"] == "print"
335
+ ),
336
+ None,
337
+ )
338
+ or {}
339
+ )
340
+ isbn = isbn["value"] if isbn else None
341
+ container_title = parse_attributes(meta.get("container-title", None), first=True)
342
+ volume = meta.get("volume", None)
343
+ issue = py_.get(meta, "journal-issue.issue")
344
+ if meta.get("page", None):
345
+ pages = meta.get("page", None).split("-")
346
+ first_page = pages[0]
347
+ last_page = pages[1] if len(pages) > 1 else None
348
+ else:
349
+ first_page = None
350
+ last_page = None
351
+
352
+ # TODO: add support for series, location, missing in Crossref JSON
353
+
354
+ return compact(
355
+ {
356
+ "type": container_type,
357
+ "identifier": issn or isbn,
358
+ "identifierType": "ISSN" if issn else "ISBN" if isbn else None,
359
+ "title": container_title,
360
+ "volume": volume,
361
+ "issue": issue,
362
+ "firstPage": first_page,
363
+ "lastPage": last_page,
364
+ }
365
+ )
366
+
367
+
368
+ def from_crossref_funding(funding_references: list) -> list:
369
+ """Get funding references from Crossref"""
370
+ formatted_funding_references = []
371
+ for funding in funding_references:
372
+ f = compact(
373
+ {
374
+ "funderName": funding.get("name", None),
375
+ "funderIdentifier": doi_as_url(funding["DOI"])
376
+ if funding.get("DOI", None) is not None
377
+ else None,
378
+ "funderIdentifierType": "Crossref Funder ID"
379
+ if funding.get("DOI", "").startswith("10.13039")
380
+ else None,
381
+ }
382
+ )
383
+ f = py_.omit(f, "DOI", "doi-asserted-by")
384
+ if (
385
+ funding.get("name", None) is not None
386
+ and funding.get("award", None) is not None
387
+ ):
388
+ for award in wrap(funding["award"]):
389
+ fund_ref = f.copy()
390
+ fund_ref["awardNumber"] = award
391
+ formatted_funding_references.append(fund_ref)
392
+ elif f != {}:
393
+ formatted_funding_references.append(f)
394
+ return py_.uniq(formatted_funding_references)
395
+
396
+
397
+ def get_random_crossref_id(number: int = 1, **kwargs) -> list:
398
+ """Get random DOI from Crossref"""
399
+ number = 20 if number > 20 else number
400
+ url = crossref_api_sample_url(number, **kwargs)
401
+ try:
402
+ response = httpx.get(url, timeout=10)
403
+ if response.status_code != 200:
404
+ return []
405
+
406
+ items = py_.get(response.json(), "message.items")
407
+ return [i.get("DOI") for i in items]
408
+ except (httpx.ReadTimeout, httpx.ConnectError):
409
+ return []