commonmeta-py 0.22__py3-none-any.whl → 0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. commonmeta/__init__.py +96 -0
  2. commonmeta/api_utils.py +77 -0
  3. commonmeta/author_utils.py +260 -0
  4. commonmeta/base_utils.py +121 -0
  5. commonmeta/cli.py +200 -0
  6. commonmeta/constants.py +587 -0
  7. commonmeta/crossref_utils.py +575 -0
  8. commonmeta/date_utils.py +193 -0
  9. commonmeta/doi_utils.py +273 -0
  10. commonmeta/metadata.py +320 -0
  11. commonmeta/readers/__init__.py +1 -0
  12. commonmeta/readers/bibtex_reader.py +0 -0
  13. commonmeta/readers/cff_reader.py +199 -0
  14. commonmeta/readers/codemeta_reader.py +112 -0
  15. commonmeta/readers/commonmeta_reader.py +13 -0
  16. commonmeta/readers/crossref_reader.py +409 -0
  17. commonmeta/readers/crossref_xml_reader.py +505 -0
  18. commonmeta/readers/csl_reader.py +98 -0
  19. commonmeta/readers/datacite_reader.py +390 -0
  20. commonmeta/readers/datacite_xml_reader.py +359 -0
  21. commonmeta/readers/inveniordm_reader.py +218 -0
  22. commonmeta/readers/json_feed_reader.py +420 -0
  23. commonmeta/readers/kbase_reader.py +205 -0
  24. commonmeta/readers/ris_reader.py +103 -0
  25. commonmeta/readers/schema_org_reader.py +506 -0
  26. commonmeta/resources/cff_v1.2.0.json +1827 -0
  27. commonmeta/resources/commonmeta_v0.12.json +601 -0
  28. commonmeta/resources/commonmeta_v0.13.json +559 -0
  29. commonmeta/resources/commonmeta_v0.14.json +573 -0
  30. commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
  31. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
  32. commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
  33. commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
  34. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
  35. commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
  36. commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
  37. commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
  38. commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
  39. commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
  40. commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
  41. commonmeta/resources/crossref/fundref.xsd +49 -0
  42. commonmeta/resources/crossref/module-ali.xsd +39 -0
  43. commonmeta/resources/crossref/relations.xsd +444 -0
  44. commonmeta/resources/crossref-v0.2.json +60 -0
  45. commonmeta/resources/csl-data.json +538 -0
  46. commonmeta/resources/datacite-v4.5.json +829 -0
  47. commonmeta/resources/datacite-v4.5pr.json +608 -0
  48. commonmeta/resources/ietf-bcp-47.json +3025 -0
  49. commonmeta/resources/iso-8601.json +3182 -0
  50. commonmeta/resources/spdx/licenses.json +4851 -0
  51. commonmeta/resources/spdx-schema..json +903 -0
  52. commonmeta/resources/styles/apa.csl +1697 -0
  53. commonmeta/resources/styles/chicago-author-date.csl +684 -0
  54. commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
  55. commonmeta/resources/styles/ieee.csl +468 -0
  56. commonmeta/resources/styles/modern-language-association.csl +341 -0
  57. commonmeta/resources/styles/vancouver.csl +376 -0
  58. commonmeta/schema_utils.py +27 -0
  59. commonmeta/translators.py +47 -0
  60. commonmeta/utils.py +1108 -0
  61. commonmeta/writers/__init__.py +1 -0
  62. commonmeta/writers/bibtex_writer.py +149 -0
  63. commonmeta/writers/citation_writer.py +70 -0
  64. commonmeta/writers/commonmeta_writer.py +68 -0
  65. commonmeta/writers/crossref_xml_writer.py +17 -0
  66. commonmeta/writers/csl_writer.py +79 -0
  67. commonmeta/writers/datacite_writer.py +193 -0
  68. commonmeta/writers/inveniordm_writer.py +94 -0
  69. commonmeta/writers/ris_writer.py +58 -0
  70. commonmeta/writers/schema_org_writer.py +146 -0
  71. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
  72. commonmeta_py-0.24.dist-info/RECORD +75 -0
  73. {commonmeta_py-0.22.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
  74. commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
  75. commonmeta_py/__init__.py +0 -2
  76. commonmeta_py-0.22.dist-info/RECORD +0 -5
  77. {commonmeta_py-0.22.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
commonmeta/metadata.py ADDED
@@ -0,0 +1,320 @@
1
+ """Metadata"""
2
+
3
+ from os import path
4
+ import orjson as json
5
+ from typing import Optional, Union
6
+ import yaml
7
+ from pydash import py_
8
+
9
+ from .readers.crossref_reader import (
10
+ get_crossref,
11
+ read_crossref,
12
+ )
13
+ from .readers.datacite_reader import (
14
+ get_datacite,
15
+ read_datacite,
16
+ )
17
+ from .readers.datacite_xml_reader import read_datacite_xml
18
+ from .readers.crossref_xml_reader import (
19
+ get_crossref_xml,
20
+ read_crossref_xml,
21
+ )
22
+ from .readers.schema_org_reader import (
23
+ get_schema_org,
24
+ read_schema_org,
25
+ )
26
+ from .readers.codemeta_reader import (
27
+ get_codemeta,
28
+ read_codemeta,
29
+ )
30
+ from .readers.csl_reader import read_csl
31
+ from .readers.cff_reader import get_cff, read_cff
32
+ from .readers.json_feed_reader import get_json_feed_item, read_json_feed_item
33
+ from .readers.inveniordm_reader import (
34
+ get_inveniordm,
35
+ read_inveniordm,
36
+ )
37
+ from .readers.kbase_reader import read_kbase
38
+ from .readers.commonmeta_reader import read_commonmeta
39
+ from .readers.ris_reader import read_ris
40
+ from .writers.datacite_writer import write_datacite
41
+ from .writers.bibtex_writer import write_bibtex, write_bibtex_list
42
+ from .writers.citation_writer import write_citation, write_citation_list
43
+ from .writers.crossref_xml_writer import write_crossref_xml, write_crossref_xml_list
44
+ from .writers.csl_writer import write_csl, write_csl_list
45
+ from .writers.ris_writer import write_ris, write_ris_list
46
+ from .writers.schema_org_writer import write_schema_org
47
+ from .writers.commonmeta_writer import write_commonmeta, write_commonmeta_list
48
+ from .writers.inveniordm_writer import write_inveniordm
49
+ from .utils import normalize_id, find_from_format
50
+ from .base_utils import parse_xml, wrap
51
+ from .doi_utils import doi_from_url
52
+ from .schema_utils import json_schema_errors
53
+ from .constants import CM_TO_CR_TRANSLATIONS
54
+
55
+
56
+ # pylint: disable=R0902
57
+ class Metadata:
58
+ """Metadata"""
59
+
60
+ def __init__(self, string: Optional[Union[str, dict]], **kwargs):
61
+ if string is None or not isinstance(string, (str, dict)):
62
+ raise ValueError("No input found")
63
+ self.via = kwargs.get("via", None)
64
+ if isinstance(string, dict):
65
+ data = string
66
+ elif isinstance(string, str):
67
+ pid = normalize_id(string)
68
+ if pid is not None and self.via is None:
69
+ self.via = find_from_format(pid=pid)
70
+ elif path.exists(string):
71
+ with open(string, encoding="utf-8") as file:
72
+ string = file.read()
73
+ if self.via is None:
74
+ self.via = find_from_format(string=string)
75
+ if self.via is None:
76
+ self.via = "commonmeta"
77
+ data = self.get_metadata(pid=pid, string=string)
78
+ meta = self.read_metadata(data=data, **kwargs)
79
+
80
+ # required properties
81
+ self.id = meta.get("id") # pylint: disable=C0103
82
+ self.type = meta.get("type")
83
+ # recommended and optional properties
84
+ self.additional_type = meta.get("additionalType")
85
+ self.archive_locations = meta.get("archiveLocations")
86
+ self.container = meta.get("container")
87
+ self.contributors = meta.get("contributors")
88
+ self.date = meta.get("date")
89
+ self.descriptions = meta.get("descriptions")
90
+ self.files = meta.get("files")
91
+ self.funding_references = meta.get("fundingReferences")
92
+ self.geo_locations = meta.get("geoLocations")
93
+ self.identifiers = meta.get("identifiers")
94
+ self.language = meta.get("language")
95
+ self.license = meta.get("license")
96
+ self.provider = meta.get("provider")
97
+ self.publisher = meta.get("publisher")
98
+ self.references = meta.get("references")
99
+ self.relations = meta.get("relations")
100
+ self.subjects = meta.get("subjects")
101
+ self.titles = meta.get("titles")
102
+ self.url = meta.get("url")
103
+ self.version = meta.get("version")
104
+ # other properties
105
+ self.date_created = meta.get("date_created")
106
+ self.date_registered = meta.get("date_registered")
107
+ self.date_published = meta.get("date_published")
108
+ self.date_updated = meta.get("date_updated")
109
+ self.state = meta.get("state")
110
+
111
+ # Catch errors in the reader, then validate against JSON schema for Commonmeta
112
+ self.errors = meta.get("errors", None) or json_schema_errors(
113
+ json.loads(self.write())
114
+ )
115
+ self.write_errors = None
116
+ self.is_valid = (
117
+ meta.get("state", None) not in ["not_found", "forbidden", "bad_request"]
118
+ and self.errors is None
119
+ and self.write_errors is None
120
+ )
121
+
122
+ def get_metadata(self, pid, string) -> dict:
123
+ via = self.via
124
+ if pid is not None:
125
+ if via == "schema_org":
126
+ return get_schema_org(pid)
127
+ elif via == "datacite":
128
+ return get_datacite(pid)
129
+ elif via in ["crossref", "op"]:
130
+ return get_crossref(pid)
131
+ elif via == "crossref_xml":
132
+ return get_crossref_xml(pid)
133
+ elif via == "codemeta":
134
+ return get_codemeta(pid)
135
+ elif via == "cff":
136
+ return get_cff(pid)
137
+ elif via == "json_feed_item":
138
+ return get_json_feed_item(pid)
139
+ elif via == "inveniordm":
140
+ return get_inveniordm(pid)
141
+ elif string is not None:
142
+ if via == "datacite_xml":
143
+ return parse_xml(string)
144
+ elif via == "crossref_xml":
145
+ return parse_xml(string, dialect="crossref")
146
+ elif via == "cff":
147
+ return yaml.safe_load(string)
148
+ elif via == "bibtex":
149
+ raise ValueError("Bibtex not supported")
150
+ elif via == "ris":
151
+ return string
152
+ elif via in [
153
+ "commonmeta",
154
+ "crossref",
155
+ "datacite",
156
+ "schema_org",
157
+ "csl",
158
+ "json_feed_item",
159
+ "codemeta",
160
+ "kbase",
161
+ "inveniordm",
162
+ ]:
163
+ return json.loads(string)
164
+ else:
165
+ raise ValueError("No input format found")
166
+ else:
167
+ raise ValueError("No metadata found")
168
+
169
+ def read_metadata(self, data: dict, **kwargs) -> dict:
170
+ """get_metadata"""
171
+ via = isinstance(data, dict) and data.get("via", None) or self.via
172
+ if via == "commonmeta":
173
+ return read_commonmeta(data, **kwargs)
174
+ elif via == "schema_org":
175
+ return read_schema_org(data)
176
+ elif via == "datacite":
177
+ return read_datacite(data)
178
+ elif via == "datacite_xml":
179
+ return read_datacite_xml(data)
180
+ elif via in ["crossref", "op"]:
181
+ return read_crossref(data)
182
+ elif via == "crossref_xml":
183
+ return read_crossref_xml(data)
184
+ elif via == "csl":
185
+ return read_csl(data, **kwargs)
186
+ elif via == "codemeta":
187
+ return read_codemeta(data)
188
+ elif via == "cff":
189
+ return read_cff(data)
190
+ elif via == "json_feed_item":
191
+ return read_json_feed_item(data, **kwargs)
192
+ elif via == "inveniordm":
193
+ return read_inveniordm(data)
194
+ elif via == "kbase":
195
+ return read_kbase(data)
196
+ elif via == "ris":
197
+ return read_ris(data)
198
+ else:
199
+ raise ValueError("No input format found")
200
+
201
+ def write(self, to: str = "commonmeta", **kwargs) -> str:
202
+ """convert metadata into different formats"""
203
+ try:
204
+ if to == "commonmeta":
205
+ return write_commonmeta(self)
206
+ elif to == "bibtex":
207
+ return write_bibtex(self)
208
+ elif to == "csl":
209
+ instance = py_.omit(json.loads(write_csl(self)), [])
210
+ self.errors = json_schema_errors(instance, schema="csl")
211
+ return write_csl(self)
212
+ elif to == "citation":
213
+ self.style = kwargs.get("style", "apa")
214
+ self.locale = kwargs.get("locale", "en-US")
215
+ return write_citation(self)
216
+ elif to == "ris":
217
+ return write_ris(self)
218
+ elif to == "schema_org":
219
+ return write_schema_org(self)
220
+ elif to == "inveniordm":
221
+ return write_inveniordm(self)
222
+ elif to == "datacite":
223
+ instance = json.loads(write_datacite(self))
224
+ self.write_errors = json_schema_errors(instance, schema="datacite")
225
+ print(self.write_errors)
226
+ return write_datacite(self)
227
+ elif to == "crossref_xml":
228
+ doi = doi_from_url(self.id)
229
+ _type = CM_TO_CR_TRANSLATIONS.get(self.type, None)
230
+ url = self.url
231
+ instance = {"doi": doi, "type": _type, "url": url}
232
+ self.depositor = kwargs.get("depositor", None)
233
+ self.email = kwargs.get("email", None)
234
+ self.registrant = kwargs.get("registrant", None)
235
+ self.write_errors = json_schema_errors(instance, schema="crossref")
236
+ return write_crossref_xml(self)
237
+ else:
238
+ raise ValueError("No output format found")
239
+ except json.JSONDecodeError:
240
+ raise ValueError("Invalid JSON")
241
+
242
+
243
+ class MetadataList:
244
+ """MetadataList"""
245
+
246
+ def __init__(
247
+ self, dct: Optional[Union[str, dict]] = None, **kwargs
248
+ ) -> Optional[dict]:
249
+ if dct is None or not isinstance(dct, (str, bytes, dict)):
250
+ raise ValueError("No input found")
251
+ if isinstance(dct, dict):
252
+ meta = dct
253
+ elif isinstance(dct, (str, bytes)):
254
+ if path.exists(dct):
255
+ with open(dct, encoding="utf-8") as file:
256
+ dct = file.read()
257
+ self.via = kwargs.get("via", None) or find_from_format(string=dct)
258
+ meta = self.get_metadata_list(dct)
259
+
260
+ self.id = meta.get("id", None)
261
+ self.type = meta.get("type", None)
262
+ self.title = meta.get("title", None)
263
+ self.description = meta.get("description", None)
264
+
265
+ # options needed for Crossref DOI registration
266
+ self.depositor = kwargs.get("depositor", None)
267
+ self.email = kwargs.get("email", None)
268
+ self.registrant = kwargs.get("registrant", None)
269
+
270
+ self.items = self.read_metadata_list(wrap(meta.get("items", None)), **kwargs)
271
+ self.errors = [i.errors for i in self.items if i.errors is not None]
272
+ self.write_errors = [
273
+ i.write_errors for i in self.items if i.write_errors is not None
274
+ ]
275
+ self.is_valid = all([i.is_valid for i in self.items])
276
+
277
+ # other options
278
+ self.jsonlines = kwargs.get("jsonlines", False)
279
+ self.filename = kwargs.get("filename", None)
280
+
281
+ def get_metadata_list(self, string) -> list:
282
+ if string is None or not isinstance(string, (str, bytes)):
283
+ raise ValueError("No input found")
284
+ if self.via in [
285
+ "commonmeta",
286
+ "crossref",
287
+ "datacite",
288
+ "schema_org",
289
+ "csl",
290
+ "json_feed_item",
291
+ ]:
292
+ return json.loads(string)
293
+ else:
294
+ raise ValueError("No input format found")
295
+
296
+ def read_metadata_list(self, data: list, **kwargs) -> list:
297
+ """read_metadata_list"""
298
+ kwargs["via"] = kwargs.get("via", None) or self.via
299
+ return [Metadata(i, **kwargs) for i in data]
300
+
301
+ def write(self, to: str = "commonmeta", **kwargs) -> str:
302
+ """convert metadata list into different formats"""
303
+ if to == "commonmeta":
304
+ return write_commonmeta_list(self)
305
+ elif to == "bibtex":
306
+ return write_bibtex_list(self)
307
+ elif to == "csl":
308
+ return write_csl_list(self)
309
+ elif to == "citation":
310
+ return write_citation_list(self, **kwargs)
311
+ elif to == "ris":
312
+ return write_ris_list(self)
313
+ elif to == "schema_org":
314
+ raise ValueError("Schema.org not supported for metadata lists")
315
+ elif to == "datacite":
316
+ raise ValueError("Datacite not supported for metadata lists")
317
+ elif to == "crossref_xml":
318
+ return write_crossref_xml_list(self)
319
+ else:
320
+ raise ValueError("No output format found")
@@ -0,0 +1 @@
1
+ """Readers for different metadata formats"""
File without changes
@@ -0,0 +1,199 @@
1
+ """cff reader for commonmeta-py"""
2
+ from typing import Optional
3
+ from urllib.parse import urlparse
4
+ import httpx
5
+ import yaml
6
+
7
+ from ..utils import (
8
+ normalize_id,
9
+ name_to_fos,
10
+ dict_to_spdx,
11
+ normalize_orcid,
12
+ github_as_cff_url,
13
+ github_as_repo_url,
14
+ )
15
+ from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
16
+ from ..date_utils import get_iso8601_date
17
+ from ..constants import Commonmeta
18
+
19
+
20
+ def get_cff(pid: str, **kwargs) -> dict:
21
+ """get_cff"""
22
+ url = github_as_cff_url(pid)
23
+ response = httpx.get(url, timeout=10, **kwargs)
24
+ if response.status_code != 200:
25
+ return {"state": "not_found"}
26
+ text = response.text
27
+ repo_url = github_as_repo_url(url)
28
+ data = yaml.safe_load(text)
29
+
30
+ # collect metadata not included in the CFF file
31
+ if data.get("repository-code", None) is None:
32
+ data["repository-code"] = repo_url
33
+
34
+ return data
35
+
36
+
37
+ def read_cff(data: Optional[dict], **kwargs) -> Commonmeta:
38
+ """read_cff"""
39
+ if data is None:
40
+ return {"state": "not_found"}
41
+ meta = data
42
+
43
+ read_options = kwargs or {}
44
+
45
+ # read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
46
+
47
+ # identifiers = Array.wrap(meta.fetch('identifiers', nil)).map do |r|
48
+ # r = normalize_id(r) if r.is_a?(String)
49
+ # if r.is_a?(String) && URI(r).host != 'doi.org'
50
+ # { 'identifierType' => 'URL', 'identifier' => r }
51
+ # elsif r.is_a?(Hash)
52
+ # { 'identifierType' => get_identifier_type(r['propertyID']), 'identifier' => r['value'] }
53
+ # end
54
+ # end.compact.uniq
55
+
56
+ _id = normalize_id(kwargs.get("doi", None) or meta.get("doi", None))
57
+ # Array.wrap(meta.fetch('identifiers', nil)).find do |i|
58
+ # i['type'] == 'doi'
59
+ # end.fetch('value', nil))
60
+ _type = "Software"
61
+ url = normalize_id(meta.get("repository-code", None))
62
+ contributors = cff_contributors(wrap(meta.get("authors", None)))
63
+
64
+ if meta.get("title", None):
65
+ titles = [{"title": meta.get("title", None)}]
66
+ else:
67
+ titles = []
68
+
69
+ date = {
70
+ "published": get_iso8601_date(meta.get("date-released"))
71
+ if meta.get("date-released", None)
72
+ else None
73
+ }
74
+
75
+ publisher = (
76
+ {"name": "GitHub"} if url and urlparse(url).hostname == "github.com" else None
77
+ )
78
+
79
+ if meta.get("abstract", None):
80
+ descriptions = [
81
+ {
82
+ "description": sanitize(meta.get("abstract")),
83
+ "type": "Abstract",
84
+ }
85
+ ]
86
+ else:
87
+ descriptions = []
88
+
89
+ subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
90
+
91
+ license_ = meta.get("license", None)
92
+ if license_ is not None:
93
+ license_ = dict_to_spdx({"id": meta.get("license")})
94
+
95
+ references = cff_references(wrap(meta.get("references", None)))
96
+
97
+ state = "findable" if meta or read_options else "not_found"
98
+
99
+ return {
100
+ "id": _id,
101
+ "type": _type,
102
+ # 'identifiers' => identifiers,
103
+ "url": url,
104
+ "titles": titles,
105
+ "contributors": presence(contributors),
106
+ "publisher": publisher,
107
+ "references": presence(references),
108
+ "date": date,
109
+ "descriptions": presence(descriptions),
110
+ "license": license_,
111
+ "version": meta.get("version", None),
112
+ "subjects": presence(subjects),
113
+ "provider": "DataCite" if _id else "GitHub",
114
+ "state": state,
115
+ } | read_options
116
+
117
+
118
+ def cff_contributors(contributors):
119
+ """cff_contributors"""
120
+
121
+ def format_affiliation(affiliation):
122
+ """format_affiliation"""
123
+ if isinstance(affiliation, str):
124
+ return {"name": affiliation}
125
+ if isinstance(affiliation, dict):
126
+ return compact(affiliation)
127
+ return None
128
+ # if a.is_a?(Hash)
129
+ # a
130
+ # elsif a.is_a?(Hash) && a.key?('#text_') && a['#text'].strip.blank?
131
+ # nil
132
+ # elsif a.is_a?(Hash) && a.key?('#text')
133
+ # { 'name' => a['#text'] }
134
+ # elsif a.strip.blank
135
+
136
+ def format_element(i):
137
+ """format_element"""
138
+ if normalize_orcid(parse_attributes(i.get("orcid", None))):
139
+ _id = normalize_orcid(parse_attributes(i.get("orcid", None)))
140
+ else:
141
+ _id = None
142
+ if i.get("given-names", None) or i.get("family-names", None) or _id:
143
+ given_name = parse_attributes(i.get("given-names", None))
144
+ family_name = parse_attributes(i.get("family-names", None))
145
+ affiliation = compact(
146
+ [format_affiliation(a) for a in wrap(i.get("affiliation", None))]
147
+ )
148
+
149
+ return compact(
150
+ {
151
+ "id": _id,
152
+ "contributorRoles": ["Author"],
153
+ "type": "Person",
154
+ "givenName": given_name,
155
+ "familyName": family_name,
156
+ "affiliation": affiliation,
157
+ }
158
+ )
159
+ return {
160
+ "contributorRoles": ["Author"],
161
+ "type": "Organization",
162
+ "name": i.get("name", None) or i.get("#text", None),
163
+ }
164
+
165
+ return [format_element(i) for i in contributors]
166
+
167
+
168
+ def cff_references(references):
169
+ """cff_references"""
170
+
171
+ def is_reference(i):
172
+ """is_reference"""
173
+ return (
174
+ next(
175
+ (
176
+ item
177
+ for item in wrap(i.get("identifers", None))
178
+ if item.get("type", None) == "doi"
179
+ ),
180
+ None,
181
+ )
182
+ is not None
183
+ )
184
+
185
+ def map_reference(i):
186
+ """map_element"""
187
+ identifier = next(
188
+ (
189
+ item
190
+ for item in wrap(i.get("identifers", None))
191
+ if item.get("type", None) == "doi"
192
+ ),
193
+ None,
194
+ )
195
+ return compact(
196
+ {"doi": normalize_id(parse_attributes(identifier.get("value", None)))}
197
+ )
198
+
199
+ return [map_reference(i) for i in references if is_reference(i)]
@@ -0,0 +1,112 @@
1
+ """codemeta reader for commonmeta-py"""
2
+ from typing import Optional
3
+ from collections import defaultdict
4
+ import httpx
5
+
6
+ from ..utils import (
7
+ normalize_id,
8
+ from_schema_org_creators,
9
+ name_to_fos,
10
+ dict_to_spdx,
11
+ github_as_codemeta_url,
12
+ github_as_repo_url,
13
+ doi_from_url,
14
+ )
15
+ from ..base_utils import wrap, presence, compact, sanitize
16
+ from ..author_utils import get_authors
17
+ from ..constants import (
18
+ Commonmeta,
19
+ SO_TO_CM_TRANSLATIONS,
20
+ )
21
+
22
+
23
+ def get_codemeta(pid: str, **kwargs) -> dict:
24
+ """get_codemeta"""
25
+ url = str(github_as_codemeta_url(pid))
26
+ response = httpx.get(url, timeout=10, **kwargs)
27
+ if response.status_code != 200:
28
+ return {"state": "not_found"}
29
+ data = response.json()
30
+ if data.get("codeRepository", None) is None:
31
+ data["codeRepository"] = github_as_repo_url(url)
32
+
33
+ return data
34
+
35
+
36
+ def read_codemeta(data: Optional[dict], **kwargs) -> Commonmeta:
37
+ """read_codemeta"""
38
+ if data is None:
39
+ return {"state": "not_found"}
40
+ meta = data
41
+
42
+ read_options = kwargs or {}
43
+ # ActiveSupport: : HashWithIndifferentAccess.new(options.except(: doi, : id, : url,
44
+ # : sandbox, : validate, : ra)
45
+
46
+ _id = normalize_id(meta.get("id", None) or meta.get("identifier", None))
47
+ # id = normalize_id(options[:doi] | | meta.get('@id', None) | | meta.get('identifier', None))
48
+ _type = SO_TO_CM_TRANSLATIONS.get(meta.get("@type", "Software"))
49
+ # identifiers = Array.wrap(meta.get('identifier', None)).map do | r|
50
+ # r = normalize_id(r) if r.is_a?(String)
51
+ # if r.is_a?(String) & & URI(r) != 'doi.org'
52
+ # {'identifierType': 'URL', 'identifier': r}
53
+ # elsif r.is_a?(Hash)
54
+ # {'identifierType': get_identifier_type(
55
+ # r['propertyID']), 'identifier': r['value']}
56
+ # end
57
+ # end.compact.uniq
58
+
59
+ has_agents = meta.get("agents", None)
60
+ authors = meta.get("authors", None) if has_agents is None else has_agents
61
+ contributors = get_authors(from_schema_org_creators(wrap(authors)))
62
+ contrib = get_authors(from_schema_org_creators(wrap(meta.get("editor", None))))
63
+ if contrib:
64
+ contributors += contrib
65
+ date: dict = defaultdict(list)
66
+ date["created"] = meta.get("dateCreated", None)
67
+ date["published"] = meta.get("datePublished", None)
68
+ date["updated"] = meta.get("dateModified", None)
69
+
70
+ publisher = {"name": meta.get("publisher", None)}
71
+
72
+ if meta.get("description", None):
73
+ descriptions = [
74
+ {
75
+ "description": sanitize(str(meta.get("description"))),
76
+ "descriptionType": "Abstract",
77
+ }
78
+ ]
79
+ else:
80
+ descriptions = None
81
+
82
+ subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
83
+
84
+ has_title = meta.get("title", None)
85
+ if has_title is None:
86
+ titles = [{"title": meta.get("name", None)}]
87
+ else:
88
+ titles = [{"title": has_title}]
89
+
90
+ license_ = meta.get("licenseId", None)
91
+ if license_:
92
+ license_ = dict_to_spdx({"id": meta.get("licenseId")})
93
+
94
+ provider = "DataCite" if doi_from_url(_id) else "GitHub"
95
+ state = "findable" if meta or read_options else "not_found"
96
+
97
+ return {
98
+ "id": _id,
99
+ "type": _type,
100
+ "url": normalize_id(meta.get("codeRepository", None)),
101
+ "identifiers": None,
102
+ "titles": titles,
103
+ "contributors": presence(contributors),
104
+ "publisher": publisher,
105
+ "date": compact(date),
106
+ "descriptions": descriptions,
107
+ "license": license_,
108
+ "version": meta.get("version", None),
109
+ "subjects": presence(subjects),
110
+ "provider": provider,
111
+ "state": state,
112
+ } | read_options
@@ -0,0 +1,13 @@
1
+ """Commonmeta reader for commonmeta-py"""
2
+ from ..constants import Commonmeta
3
+
4
+
5
+ def read_commonmeta(data: dict, **kwargs) -> Commonmeta:
6
+ """read_commonmeta"""
7
+ if data is None:
8
+ return {"state": "not_found"}
9
+ meta = data
10
+
11
+ read_options = kwargs or {}
12
+
13
+ return meta | read_options