commonmeta-py 0.23__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/cff_reader.py +199 -0
  13. commonmeta/readers/codemeta_reader.py +112 -0
  14. commonmeta/readers/commonmeta_reader.py +13 -0
  15. commonmeta/readers/crossref_reader.py +409 -0
  16. commonmeta/readers/crossref_xml_reader.py +505 -0
  17. commonmeta/readers/csl_reader.py +98 -0
  18. commonmeta/readers/datacite_reader.py +390 -0
  19. commonmeta/readers/datacite_xml_reader.py +359 -0
  20. commonmeta/readers/inveniordm_reader.py +218 -0
  21. commonmeta/readers/json_feed_reader.py +420 -0
  22. commonmeta/readers/kbase_reader.py +205 -0
  23. commonmeta/readers/ris_reader.py +103 -0
  24. commonmeta/readers/schema_org_reader.py +506 -0
  25. commonmeta/resources/cff_v1.2.0.json +1827 -0
  26. commonmeta/resources/commonmeta_v0.12.json +601 -0
  27. commonmeta/resources/commonmeta_v0.13.json +559 -0
  28. commonmeta/resources/commonmeta_v0.14.json +573 -0
  29. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  30. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  36. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  37. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  38. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  39. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  40. commonmeta/resources/crossref/fundref.xsd +49 -0
  41. commonmeta/resources/crossref/module-ali.xsd +39 -0
  42. commonmeta/resources/crossref/relations.xsd +444 -0
  43. commonmeta/resources/crossref-v0.2.json +60 -0
  44. commonmeta/resources/csl-data.json +538 -0
  45. commonmeta/resources/datacite-v4.5.json +829 -0
  46. commonmeta/resources/datacite-v4.5pr.json +608 -0
  47. commonmeta/resources/ietf-bcp-47.json +3025 -0
  48. commonmeta/resources/iso-8601.json +3182 -0
  49. commonmeta/resources/spdx/licenses.json +4851 -0
  50. commonmeta/resources/spdx-schema..json +903 -0
  51. commonmeta/resources/styles/apa.csl +1697 -0
  52. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  53. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  54. commonmeta/resources/styles/ieee.csl +468 -0
  55. commonmeta/resources/styles/modern-language-association.csl +341 -0
  56. commonmeta/resources/styles/vancouver.csl +376 -0
  57. commonmeta/schema_utils.py +27 -0
  58. commonmeta/translators.py +47 -0
  59. commonmeta/utils.py +1108 -0
  60. commonmeta/writers/__init__.py +1 -0
  61. commonmeta/writers/bibtex_writer.py +149 -0
  62. commonmeta/writers/citation_writer.py +70 -0
  63. commonmeta/writers/commonmeta_writer.py +68 -0
  64. commonmeta/writers/crossref_xml_writer.py +17 -0
  65. commonmeta/writers/csl_writer.py +79 -0
  66. commonmeta/writers/datacite_writer.py +193 -0
  67. commonmeta/writers/inveniordm_writer.py +94 -0
  68. commonmeta/writers/ris_writer.py +58 -0
  69. commonmeta/writers/schema_org_writer.py +146 -0
  70. {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  71. commonmeta_py-0.24.dist-info/RECORD +75 -0
  72. {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  73. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  74. commonmeta_py-0.23.dist-info/RECORD +0 -5
  75. /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
  76. {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
@@ -0,0 +1,390 @@
1
+ """datacite reader for Commonmeta"""
2
+
3
+ from collections import defaultdict
4
+ from typing import Optional
5
+ import httpx
6
+ from pydash import py_
7
+
8
+ from ..utils import (
9
+ normalize_url,
10
+ normalize_doi,
11
+ normalize_cc_url,
12
+ dict_to_spdx,
13
+ format_name_identifier,
14
+ )
15
+ from ..base_utils import compact, wrap, presence
16
+ from ..author_utils import get_authors
17
+ from ..date_utils import normalize_date_dict
18
+ from ..doi_utils import (
19
+ doi_as_url,
20
+ doi_from_url,
21
+ datacite_api_url,
22
+ datacite_api_sample_url,
23
+ )
24
+ from ..constants import (
25
+ DC_TO_CM_TRANSLATIONS,
26
+ DC_TO_CM_CONTAINER_TRANSLATIONS,
27
+ Commonmeta,
28
+ )
29
+
30
+
31
+ def get_datacite(pid: str, **kwargs) -> dict:
32
+ """get_datacite"""
33
+ doi = doi_from_url(pid)
34
+ if doi is None:
35
+ return {"state": "not_found"}
36
+ url = datacite_api_url(doi)
37
+ try:
38
+ response = httpx.get(url, timeout=10, **kwargs)
39
+ if response.status_code != 200:
40
+ return {"state": "not_found"}
41
+ return py_.get(response.json(), "data.attributes", {}) | {"via": "datacite"}
42
+ except httpx.ReadTimeout:
43
+ return {"state": "timeout"}
44
+
45
+
46
+ def read_datacite(data: dict, **kwargs) -> Commonmeta:
47
+ """read_datacite"""
48
+ meta = data
49
+ if data is None:
50
+ return {"state": "not_found"}
51
+
52
+ read_options = kwargs or {}
53
+
54
+ _id = doi_as_url(meta.get("doi", None))
55
+ resource__typegeneral = py_.get(meta, "types.resourceTypeGeneral")
56
+ resource_type = py_.get(meta, "types.resourceType")
57
+ _type = DC_TO_CM_TRANSLATIONS.get(resource__typegeneral, "Other")
58
+ additional_type = DC_TO_CM_TRANSLATIONS.get(resource_type, None)
59
+ # if resource_type is one of the new resource__typegeneral types introduced in schema 4.3, use it
60
+ if additional_type:
61
+ _type = additional_type
62
+ additional_type = None
63
+ else:
64
+ additional_type = resource_type
65
+ titles = get_titles(wrap(meta.get("titles", None)))
66
+
67
+ contributors = get_authors(wrap(meta.get("creators", None)))
68
+ contrib = get_authors(wrap(meta.get("contributors", None)))
69
+ if contrib:
70
+ contributors = contributors + contrib
71
+
72
+ publisher = meta.get("publisher", None)
73
+ if isinstance(publisher, str):
74
+ publisher = {"name": publisher}
75
+ elif isinstance(publisher, dict):
76
+ publisher = get_publisher(publisher)
77
+ date = get_dates(wrap(meta.get("dates", None)), meta.get("publicationYear", None))
78
+ container = get_container(meta.get("container", None))
79
+ license_ = meta.get("rightsList", [])
80
+ if len(license_) > 0:
81
+ license_ = normalize_cc_url(license_[0].get("rightsUri", None))
82
+ license_ = dict_to_spdx({"url": license_}) if license_ else None
83
+
84
+ files = [get_file(i) for i in wrap(meta.get("content_url"))]
85
+
86
+ identifiers = get_identifiers(wrap(meta.get("alternateIdentifiers", None)))
87
+ identifiers.append(
88
+ compact(
89
+ {
90
+ "identifier": normalize_doi(_id),
91
+ "identifierType": "DOI",
92
+ }
93
+ )
94
+ )
95
+
96
+ references = get_references(
97
+ wrap(meta.get("relatedItems", None) or meta.get("relatedIdentifiers", None))
98
+ )
99
+ relations = get_relations(wrap(meta.get("relatedIdentifiers", None)))
100
+ descriptions = get_descriptions(wrap(meta.get("descriptions", None)))
101
+ geo_locations = get_geolocation(wrap(meta.get("geoLocations", None)))
102
+
103
+ def format_subject(subject):
104
+ """format_subject"""
105
+ return compact(
106
+ {
107
+ "subject": subject.get("subject", None),
108
+ "language": subject.get("lang", None),
109
+ }
110
+ )
111
+
112
+ subjects = py_.uniq([format_subject(i) for i in wrap(meta.get("subjects", None))])
113
+
114
+ return {
115
+ # required properties
116
+ "id": _id,
117
+ "type": _type,
118
+ # recommended and optional properties
119
+ "additionalType": additional_type,
120
+ "container": presence(container),
121
+ "contributors": presence(contributors),
122
+ "date": compact(date),
123
+ "descriptions": presence(descriptions),
124
+ "files": presence(files),
125
+ "fundingReferences": presence(meta.get("fundingReferences", None)),
126
+ "geoLocations": presence(geo_locations),
127
+ "identifiers": presence(identifiers),
128
+ "language": meta.get("language", None),
129
+ "license": presence(license_),
130
+ "provider": "DataCite",
131
+ "publisher": publisher,
132
+ "references": presence(references),
133
+ "relations": presence(relations),
134
+ "subjects": presence(subjects),
135
+ "titles": presence(titles),
136
+ "url": normalize_url(meta.get("url", None)),
137
+ "version": meta.get("version", None),
138
+ } | read_options
139
+
140
+
141
+ def get_identifiers(identifiers: list) -> list:
142
+ """get_identifiers"""
143
+
144
+ def is_identifier(identifier):
145
+ """supported identifier types"""
146
+ return identifier.get("identifierType", None) in [
147
+ "ARK",
148
+ "arXiv",
149
+ "Bibcode",
150
+ "DOI",
151
+ "Handle",
152
+ "ISBN",
153
+ "ISSN",
154
+ "PMID",
155
+ "PMCID",
156
+ "PURL",
157
+ "URL",
158
+ "URN",
159
+ "Other",
160
+ ]
161
+
162
+ def format_identifier(identifier):
163
+ """format_identifier"""
164
+ if is_identifier(identifier):
165
+ type_ = identifier.get("identifierType")
166
+ else:
167
+ type_ = "Other"
168
+
169
+ return compact(
170
+ {
171
+ "identifier": identifier.get("alternateIdentifier", None),
172
+ "identifierType": type_,
173
+ }
174
+ )
175
+
176
+ return [format_identifier(i) for i in wrap(identifiers)]
177
+
178
+
179
+ def get_references(references: list) -> list:
180
+ """get_references"""
181
+
182
+ def is_reference(reference):
183
+ """is_reference"""
184
+ return reference.get("relationType", None) in ["Cites", "References"]
185
+
186
+ def map_reference(reference, index):
187
+ """map_reference"""
188
+ identifier = reference.get("relatedIdentifier", None)
189
+ identifier_type = reference.get("relatedIdentifierType", None)
190
+ if identifier_type == "DOI":
191
+ id_ = normalize_doi(identifier)
192
+ elif identifier_type == "URL":
193
+ id_ = normalize_url(identifier)
194
+ else:
195
+ id_ = identifier
196
+ return compact(
197
+ {
198
+ "key": f"ref{index + 1}",
199
+ "id": id_,
200
+ }
201
+ )
202
+
203
+ return [
204
+ map_reference(i, index) for index, i in enumerate(references) if is_reference(i)
205
+ ]
206
+
207
+
208
+ def get_relations(relations: list) -> list:
209
+ """get_relations"""
210
+
211
+ def is_relation(relation):
212
+ """relation"""
213
+ return relation.get("relationType", None) in [
214
+ "IsNewVersionOf",
215
+ "IsPreviousVersionOf",
216
+ "IsVersionOf",
217
+ "HasVersion",
218
+ "IsPartOf",
219
+ "HasPart",
220
+ "IsVariantFormOf",
221
+ "IsOriginalFormOf",
222
+ "IsIdenticalTo",
223
+ "IsTranslationOf",
224
+ "IsReviewedBy",
225
+ "Reviews",
226
+ "IsPreprintOf",
227
+ "HasPreprint",
228
+ "IsSupplementTo",
229
+ ]
230
+
231
+ def map_relation(relation):
232
+ """map_relation"""
233
+
234
+ identifier = normalize_doi(
235
+ relation.get("relatedIdentifier", None)
236
+ ) or relation.get("relatedIdentifier", None)
237
+ relation_type = relation.get("relationType", None)
238
+ return compact(
239
+ {
240
+ "id": identifier,
241
+ "type": relation_type,
242
+ }
243
+ )
244
+
245
+ return [map_relation(i) for i in relations if is_relation(i)]
246
+
247
+
248
+ def get_file(file: str) -> dict:
249
+ """get_file"""
250
+ return compact({"url": file})
251
+
252
+
253
+ def get_dates(dates: list, publication_year) -> dict:
254
+ """convert date list to dict, rename and/or remove some keys"""
255
+ date: dict = defaultdict(list)
256
+ for sub in dates:
257
+ date[sub.get("dateType", None)] = sub.get("date", None)
258
+ if date.get("Issued", None) is None and publication_year is not None:
259
+ date["Issued"] = str(publication_year)
260
+ return normalize_date_dict(date)
261
+
262
+
263
+ def get_descriptions(descriptions: list) -> list:
264
+ """get_descriptions"""
265
+
266
+ def map_description(description):
267
+ """map_description"""
268
+ return compact(
269
+ {
270
+ "description": description.get("description", None),
271
+ "type": description.get("descriptionType")
272
+ if description.get("descriptionType", None)
273
+ in ["Abstract", "Methods", "TechnicalInfo", "Other"]
274
+ else "Other",
275
+ "language": description.get("lang", None),
276
+ }
277
+ )
278
+
279
+ return [
280
+ map_description(i)
281
+ for i in descriptions
282
+ if i.get("description", None) is not None
283
+ ]
284
+
285
+
286
+ def get_titles(titles: list) -> list:
287
+ """get_titles"""
288
+
289
+ def map_title(title):
290
+ """map_title"""
291
+ return compact(
292
+ {
293
+ "title": title.get("title", None),
294
+ "type": title.get("titleType")
295
+ if title.get("titleType", None)
296
+ in ["AlternativeTitle", "Subtitle", "TranslatedTitle"]
297
+ else None,
298
+ "language": title.get("lang", None),
299
+ }
300
+ )
301
+
302
+ return [map_title(i) for i in titles if i.get("title", None) is not None]
303
+
304
+
305
+ def get_publisher(publisher: dict) -> dict:
306
+ """get_publisher"""
307
+ return compact(
308
+ {"id": format_name_identifier(publisher), "name": publisher.get("name", None)}
309
+ )
310
+
311
+
312
+ def get_geolocation(geolocations: list) -> list:
313
+ """get_geolocation"""
314
+
315
+ def geo_location_point(point: dict):
316
+ """geo_location_point, convert lat and long to int"""
317
+ return {
318
+ "pointLatitude": float(point.get("pointLatitude"))
319
+ if point.get("pointLatitude", None)
320
+ else None,
321
+ "pointLongitude": float(point.get("pointLongitude"))
322
+ if point.get("pointLongitude", None)
323
+ else None,
324
+ }
325
+
326
+ def geo_location_box(box: dict):
327
+ """geo_location_box, convert lat and long to int"""
328
+ return {
329
+ "eastBoundLongitude": float(box.get("eastBoundLongitude"))
330
+ if box.get("eastBoundLongitude", None)
331
+ else None,
332
+ "northBoundLatitude": float(box.get("northBoundLatitude"))
333
+ if box.get("northBoundLatitude", None)
334
+ else None,
335
+ "southBoundLatitude": float(box.get("southBoundLatitude"))
336
+ if box.get("southBoundLatitude", None)
337
+ else None,
338
+ "westBoundLongitude": float(box.get("westBoundLongitude"))
339
+ if box.get("westBoundLongitude", None)
340
+ else None,
341
+ }
342
+
343
+ return [
344
+ compact(
345
+ {
346
+ "geoLocationPoint": geo_location_point(location.get("geoLocationPoint"))
347
+ if location.get("geoLocationPoint", None)
348
+ else None,
349
+ "geoLocationBox": geo_location_box(location.get("geoLocationBox"))
350
+ if location.get("geoLocationBox", None)
351
+ else None,
352
+ "geoLocationPlace": location.get("geoLocationPlace", None),
353
+ }
354
+ )
355
+ for location in geolocations
356
+ ]
357
+
358
+
359
+ def get_container(container: Optional[dict]) -> dict or None:
360
+ """get_container"""
361
+ if container is None:
362
+ return None
363
+ _type = (
364
+ DC_TO_CM_CONTAINER_TRANSLATIONS.get(container.get("type"), None)
365
+ if container.get("type", None)
366
+ else None
367
+ )
368
+
369
+ return compact(
370
+ {
371
+ "id": container.get("identifier", None),
372
+ "type": _type,
373
+ "title": container.get("title", None),
374
+ }
375
+ )
376
+
377
+
378
+ def get_random_datacite_id(number: int = 1) -> list:
379
+ """Get random DOI from DataCite"""
380
+ number = 20 if number > 20 else number
381
+ url = datacite_api_sample_url(number)
382
+ try:
383
+ response = httpx.get(url, timeout=60)
384
+ if response.status_code != 200:
385
+ return []
386
+
387
+ items = py_.get(response.json(), "data")
388
+ return [i.get("id") for i in items]
389
+ except httpx.ReadTimeout:
390
+ return []