commonmeta-py 0.23__py3-none-any.whl → 0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +96 -0
- commonmeta/api_utils.py +77 -0
- commonmeta/author_utils.py +260 -0
- commonmeta/base_utils.py +121 -0
- commonmeta/cli.py +200 -0
- commonmeta/constants.py +587 -0
- commonmeta/crossref_utils.py +575 -0
- commonmeta/date_utils.py +193 -0
- commonmeta/doi_utils.py +273 -0
- commonmeta/metadata.py +320 -0
- commonmeta/readers/__init__.py +1 -0
- commonmeta/readers/cff_reader.py +199 -0
- commonmeta/readers/codemeta_reader.py +112 -0
- commonmeta/readers/commonmeta_reader.py +13 -0
- commonmeta/readers/crossref_reader.py +409 -0
- commonmeta/readers/crossref_xml_reader.py +505 -0
- commonmeta/readers/csl_reader.py +98 -0
- commonmeta/readers/datacite_reader.py +390 -0
- commonmeta/readers/datacite_xml_reader.py +359 -0
- commonmeta/readers/inveniordm_reader.py +218 -0
- commonmeta/readers/json_feed_reader.py +420 -0
- commonmeta/readers/kbase_reader.py +205 -0
- commonmeta/readers/ris_reader.py +103 -0
- commonmeta/readers/schema_org_reader.py +506 -0
- commonmeta/resources/cff_v1.2.0.json +1827 -0
- commonmeta/resources/commonmeta_v0.12.json +601 -0
- commonmeta/resources/commonmeta_v0.13.json +559 -0
- commonmeta/resources/commonmeta_v0.14.json +573 -0
- commonmeta/resources/crossref/AccessIndicators.xsd +47 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +10130 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +48 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +8705 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +8608 -0
- commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +49 -0
- commonmeta/resources/crossref/JATS-journalpublishing1.xsd +6176 -0
- commonmeta/resources/crossref/clinicaltrials.xsd +61 -0
- commonmeta/resources/crossref/common5.3.1.xsd +1538 -0
- commonmeta/resources/crossref/crossref5.3.1.xsd +1949 -0
- commonmeta/resources/crossref/crossref_query_output3.0.xsd +1097 -0
- commonmeta/resources/crossref/fundref.xsd +49 -0
- commonmeta/resources/crossref/module-ali.xsd +39 -0
- commonmeta/resources/crossref/relations.xsd +444 -0
- commonmeta/resources/crossref-v0.2.json +60 -0
- commonmeta/resources/csl-data.json +538 -0
- commonmeta/resources/datacite-v4.5.json +829 -0
- commonmeta/resources/datacite-v4.5pr.json +608 -0
- commonmeta/resources/ietf-bcp-47.json +3025 -0
- commonmeta/resources/iso-8601.json +3182 -0
- commonmeta/resources/spdx/licenses.json +4851 -0
- commonmeta/resources/spdx-schema..json +903 -0
- commonmeta/resources/styles/apa.csl +1697 -0
- commonmeta/resources/styles/chicago-author-date.csl +684 -0
- commonmeta/resources/styles/harvard-cite-them-right.csl +321 -0
- commonmeta/resources/styles/ieee.csl +468 -0
- commonmeta/resources/styles/modern-language-association.csl +341 -0
- commonmeta/resources/styles/vancouver.csl +376 -0
- commonmeta/schema_utils.py +27 -0
- commonmeta/translators.py +47 -0
- commonmeta/utils.py +1108 -0
- commonmeta/writers/__init__.py +1 -0
- commonmeta/writers/bibtex_writer.py +149 -0
- commonmeta/writers/citation_writer.py +70 -0
- commonmeta/writers/commonmeta_writer.py +68 -0
- commonmeta/writers/crossref_xml_writer.py +17 -0
- commonmeta/writers/csl_writer.py +79 -0
- commonmeta/writers/datacite_writer.py +193 -0
- commonmeta/writers/inveniordm_writer.py +94 -0
- commonmeta/writers/ris_writer.py +58 -0
- commonmeta/writers/schema_org_writer.py +146 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/METADATA +56 -45
- commonmeta_py-0.24.dist-info/RECORD +75 -0
- {commonmeta_py-0.23.dist-info → commonmeta_py-0.24.dist-info}/WHEEL +1 -1
- commonmeta_py-0.24.dist-info/entry_points.txt +3 -0
- commonmeta_py-0.23.dist-info/RECORD +0 -5
- /commonmeta_py/__init__.py → /commonmeta/readers/bibtex_reader.py +0 -0
- {commonmeta_py-0.23.dist-info/licenses → commonmeta_py-0.24.dist-info}/LICENSE +0 -0
commonmeta/metadata.py
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
"""Metadata"""
|
2
|
+
|
3
|
+
from os import path
|
4
|
+
import orjson as json
|
5
|
+
from typing import Optional, Union
|
6
|
+
import yaml
|
7
|
+
from pydash import py_
|
8
|
+
|
9
|
+
from .readers.crossref_reader import (
|
10
|
+
get_crossref,
|
11
|
+
read_crossref,
|
12
|
+
)
|
13
|
+
from .readers.datacite_reader import (
|
14
|
+
get_datacite,
|
15
|
+
read_datacite,
|
16
|
+
)
|
17
|
+
from .readers.datacite_xml_reader import read_datacite_xml
|
18
|
+
from .readers.crossref_xml_reader import (
|
19
|
+
get_crossref_xml,
|
20
|
+
read_crossref_xml,
|
21
|
+
)
|
22
|
+
from .readers.schema_org_reader import (
|
23
|
+
get_schema_org,
|
24
|
+
read_schema_org,
|
25
|
+
)
|
26
|
+
from .readers.codemeta_reader import (
|
27
|
+
get_codemeta,
|
28
|
+
read_codemeta,
|
29
|
+
)
|
30
|
+
from .readers.csl_reader import read_csl
|
31
|
+
from .readers.cff_reader import get_cff, read_cff
|
32
|
+
from .readers.json_feed_reader import get_json_feed_item, read_json_feed_item
|
33
|
+
from .readers.inveniordm_reader import (
|
34
|
+
get_inveniordm,
|
35
|
+
read_inveniordm,
|
36
|
+
)
|
37
|
+
from .readers.kbase_reader import read_kbase
|
38
|
+
from .readers.commonmeta_reader import read_commonmeta
|
39
|
+
from .readers.ris_reader import read_ris
|
40
|
+
from .writers.datacite_writer import write_datacite
|
41
|
+
from .writers.bibtex_writer import write_bibtex, write_bibtex_list
|
42
|
+
from .writers.citation_writer import write_citation, write_citation_list
|
43
|
+
from .writers.crossref_xml_writer import write_crossref_xml, write_crossref_xml_list
|
44
|
+
from .writers.csl_writer import write_csl, write_csl_list
|
45
|
+
from .writers.ris_writer import write_ris, write_ris_list
|
46
|
+
from .writers.schema_org_writer import write_schema_org
|
47
|
+
from .writers.commonmeta_writer import write_commonmeta, write_commonmeta_list
|
48
|
+
from .writers.inveniordm_writer import write_inveniordm
|
49
|
+
from .utils import normalize_id, find_from_format
|
50
|
+
from .base_utils import parse_xml, wrap
|
51
|
+
from .doi_utils import doi_from_url
|
52
|
+
from .schema_utils import json_schema_errors
|
53
|
+
from .constants import CM_TO_CR_TRANSLATIONS
|
54
|
+
|
55
|
+
|
56
|
+
# pylint: disable=R0902
|
57
|
+
class Metadata:
|
58
|
+
"""Metadata"""
|
59
|
+
|
60
|
+
def __init__(self, string: Optional[Union[str, dict]], **kwargs):
|
61
|
+
if string is None or not isinstance(string, (str, dict)):
|
62
|
+
raise ValueError("No input found")
|
63
|
+
self.via = kwargs.get("via", None)
|
64
|
+
if isinstance(string, dict):
|
65
|
+
data = string
|
66
|
+
elif isinstance(string, str):
|
67
|
+
pid = normalize_id(string)
|
68
|
+
if pid is not None and self.via is None:
|
69
|
+
self.via = find_from_format(pid=pid)
|
70
|
+
elif path.exists(string):
|
71
|
+
with open(string, encoding="utf-8") as file:
|
72
|
+
string = file.read()
|
73
|
+
if self.via is None:
|
74
|
+
self.via = find_from_format(string=string)
|
75
|
+
if self.via is None:
|
76
|
+
self.via = "commonmeta"
|
77
|
+
data = self.get_metadata(pid=pid, string=string)
|
78
|
+
meta = self.read_metadata(data=data, **kwargs)
|
79
|
+
|
80
|
+
# required properties
|
81
|
+
self.id = meta.get("id") # pylint: disable=C0103
|
82
|
+
self.type = meta.get("type")
|
83
|
+
# recommended and optional properties
|
84
|
+
self.additional_type = meta.get("additionalType")
|
85
|
+
self.archive_locations = meta.get("archiveLocations")
|
86
|
+
self.container = meta.get("container")
|
87
|
+
self.contributors = meta.get("contributors")
|
88
|
+
self.date = meta.get("date")
|
89
|
+
self.descriptions = meta.get("descriptions")
|
90
|
+
self.files = meta.get("files")
|
91
|
+
self.funding_references = meta.get("fundingReferences")
|
92
|
+
self.geo_locations = meta.get("geoLocations")
|
93
|
+
self.identifiers = meta.get("identifiers")
|
94
|
+
self.language = meta.get("language")
|
95
|
+
self.license = meta.get("license")
|
96
|
+
self.provider = meta.get("provider")
|
97
|
+
self.publisher = meta.get("publisher")
|
98
|
+
self.references = meta.get("references")
|
99
|
+
self.relations = meta.get("relations")
|
100
|
+
self.subjects = meta.get("subjects")
|
101
|
+
self.titles = meta.get("titles")
|
102
|
+
self.url = meta.get("url")
|
103
|
+
self.version = meta.get("version")
|
104
|
+
# other properties
|
105
|
+
self.date_created = meta.get("date_created")
|
106
|
+
self.date_registered = meta.get("date_registered")
|
107
|
+
self.date_published = meta.get("date_published")
|
108
|
+
self.date_updated = meta.get("date_updated")
|
109
|
+
self.state = meta.get("state")
|
110
|
+
|
111
|
+
# Catch errors in the reader, then validate against JSON schema for Commonmeta
|
112
|
+
self.errors = meta.get("errors", None) or json_schema_errors(
|
113
|
+
json.loads(self.write())
|
114
|
+
)
|
115
|
+
self.write_errors = None
|
116
|
+
self.is_valid = (
|
117
|
+
meta.get("state", None) not in ["not_found", "forbidden", "bad_request"]
|
118
|
+
and self.errors is None
|
119
|
+
and self.write_errors is None
|
120
|
+
)
|
121
|
+
|
122
|
+
def get_metadata(self, pid, string) -> dict:
|
123
|
+
via = self.via
|
124
|
+
if pid is not None:
|
125
|
+
if via == "schema_org":
|
126
|
+
return get_schema_org(pid)
|
127
|
+
elif via == "datacite":
|
128
|
+
return get_datacite(pid)
|
129
|
+
elif via in ["crossref", "op"]:
|
130
|
+
return get_crossref(pid)
|
131
|
+
elif via == "crossref_xml":
|
132
|
+
return get_crossref_xml(pid)
|
133
|
+
elif via == "codemeta":
|
134
|
+
return get_codemeta(pid)
|
135
|
+
elif via == "cff":
|
136
|
+
return get_cff(pid)
|
137
|
+
elif via == "json_feed_item":
|
138
|
+
return get_json_feed_item(pid)
|
139
|
+
elif via == "inveniordm":
|
140
|
+
return get_inveniordm(pid)
|
141
|
+
elif string is not None:
|
142
|
+
if via == "datacite_xml":
|
143
|
+
return parse_xml(string)
|
144
|
+
elif via == "crossref_xml":
|
145
|
+
return parse_xml(string, dialect="crossref")
|
146
|
+
elif via == "cff":
|
147
|
+
return yaml.safe_load(string)
|
148
|
+
elif via == "bibtex":
|
149
|
+
raise ValueError("Bibtex not supported")
|
150
|
+
elif via == "ris":
|
151
|
+
return string
|
152
|
+
elif via in [
|
153
|
+
"commonmeta",
|
154
|
+
"crossref",
|
155
|
+
"datacite",
|
156
|
+
"schema_org",
|
157
|
+
"csl",
|
158
|
+
"json_feed_item",
|
159
|
+
"codemeta",
|
160
|
+
"kbase",
|
161
|
+
"inveniordm",
|
162
|
+
]:
|
163
|
+
return json.loads(string)
|
164
|
+
else:
|
165
|
+
raise ValueError("No input format found")
|
166
|
+
else:
|
167
|
+
raise ValueError("No metadata found")
|
168
|
+
|
169
|
+
def read_metadata(self, data: dict, **kwargs) -> dict:
|
170
|
+
"""get_metadata"""
|
171
|
+
via = isinstance(data, dict) and data.get("via", None) or self.via
|
172
|
+
if via == "commonmeta":
|
173
|
+
return read_commonmeta(data, **kwargs)
|
174
|
+
elif via == "schema_org":
|
175
|
+
return read_schema_org(data)
|
176
|
+
elif via == "datacite":
|
177
|
+
return read_datacite(data)
|
178
|
+
elif via == "datacite_xml":
|
179
|
+
return read_datacite_xml(data)
|
180
|
+
elif via in ["crossref", "op"]:
|
181
|
+
return read_crossref(data)
|
182
|
+
elif via == "crossref_xml":
|
183
|
+
return read_crossref_xml(data)
|
184
|
+
elif via == "csl":
|
185
|
+
return read_csl(data, **kwargs)
|
186
|
+
elif via == "codemeta":
|
187
|
+
return read_codemeta(data)
|
188
|
+
elif via == "cff":
|
189
|
+
return read_cff(data)
|
190
|
+
elif via == "json_feed_item":
|
191
|
+
return read_json_feed_item(data, **kwargs)
|
192
|
+
elif via == "inveniordm":
|
193
|
+
return read_inveniordm(data)
|
194
|
+
elif via == "kbase":
|
195
|
+
return read_kbase(data)
|
196
|
+
elif via == "ris":
|
197
|
+
return read_ris(data)
|
198
|
+
else:
|
199
|
+
raise ValueError("No input format found")
|
200
|
+
|
201
|
+
def write(self, to: str = "commonmeta", **kwargs) -> str:
|
202
|
+
"""convert metadata into different formats"""
|
203
|
+
try:
|
204
|
+
if to == "commonmeta":
|
205
|
+
return write_commonmeta(self)
|
206
|
+
elif to == "bibtex":
|
207
|
+
return write_bibtex(self)
|
208
|
+
elif to == "csl":
|
209
|
+
instance = py_.omit(json.loads(write_csl(self)), [])
|
210
|
+
self.errors = json_schema_errors(instance, schema="csl")
|
211
|
+
return write_csl(self)
|
212
|
+
elif to == "citation":
|
213
|
+
self.style = kwargs.get("style", "apa")
|
214
|
+
self.locale = kwargs.get("locale", "en-US")
|
215
|
+
return write_citation(self)
|
216
|
+
elif to == "ris":
|
217
|
+
return write_ris(self)
|
218
|
+
elif to == "schema_org":
|
219
|
+
return write_schema_org(self)
|
220
|
+
elif to == "inveniordm":
|
221
|
+
return write_inveniordm(self)
|
222
|
+
elif to == "datacite":
|
223
|
+
instance = json.loads(write_datacite(self))
|
224
|
+
self.write_errors = json_schema_errors(instance, schema="datacite")
|
225
|
+
print(self.write_errors)
|
226
|
+
return write_datacite(self)
|
227
|
+
elif to == "crossref_xml":
|
228
|
+
doi = doi_from_url(self.id)
|
229
|
+
_type = CM_TO_CR_TRANSLATIONS.get(self.type, None)
|
230
|
+
url = self.url
|
231
|
+
instance = {"doi": doi, "type": _type, "url": url}
|
232
|
+
self.depositor = kwargs.get("depositor", None)
|
233
|
+
self.email = kwargs.get("email", None)
|
234
|
+
self.registrant = kwargs.get("registrant", None)
|
235
|
+
self.write_errors = json_schema_errors(instance, schema="crossref")
|
236
|
+
return write_crossref_xml(self)
|
237
|
+
else:
|
238
|
+
raise ValueError("No output format found")
|
239
|
+
except json.JSONDecodeError:
|
240
|
+
raise ValueError("Invalid JSON")
|
241
|
+
|
242
|
+
|
243
|
+
class MetadataList:
|
244
|
+
"""MetadataList"""
|
245
|
+
|
246
|
+
def __init__(
|
247
|
+
self, dct: Optional[Union[str, dict]] = None, **kwargs
|
248
|
+
) -> Optional[dict]:
|
249
|
+
if dct is None or not isinstance(dct, (str, bytes, dict)):
|
250
|
+
raise ValueError("No input found")
|
251
|
+
if isinstance(dct, dict):
|
252
|
+
meta = dct
|
253
|
+
elif isinstance(dct, (str, bytes)):
|
254
|
+
if path.exists(dct):
|
255
|
+
with open(dct, encoding="utf-8") as file:
|
256
|
+
dct = file.read()
|
257
|
+
self.via = kwargs.get("via", None) or find_from_format(string=dct)
|
258
|
+
meta = self.get_metadata_list(dct)
|
259
|
+
|
260
|
+
self.id = meta.get("id", None)
|
261
|
+
self.type = meta.get("type", None)
|
262
|
+
self.title = meta.get("title", None)
|
263
|
+
self.description = meta.get("description", None)
|
264
|
+
|
265
|
+
# options needed for Crossref DOI registration
|
266
|
+
self.depositor = kwargs.get("depositor", None)
|
267
|
+
self.email = kwargs.get("email", None)
|
268
|
+
self.registrant = kwargs.get("registrant", None)
|
269
|
+
|
270
|
+
self.items = self.read_metadata_list(wrap(meta.get("items", None)), **kwargs)
|
271
|
+
self.errors = [i.errors for i in self.items if i.errors is not None]
|
272
|
+
self.write_errors = [
|
273
|
+
i.write_errors for i in self.items if i.write_errors is not None
|
274
|
+
]
|
275
|
+
self.is_valid = all([i.is_valid for i in self.items])
|
276
|
+
|
277
|
+
# other options
|
278
|
+
self.jsonlines = kwargs.get("jsonlines", False)
|
279
|
+
self.filename = kwargs.get("filename", None)
|
280
|
+
|
281
|
+
def get_metadata_list(self, string) -> list:
|
282
|
+
if string is None or not isinstance(string, (str, bytes)):
|
283
|
+
raise ValueError("No input found")
|
284
|
+
if self.via in [
|
285
|
+
"commonmeta",
|
286
|
+
"crossref",
|
287
|
+
"datacite",
|
288
|
+
"schema_org",
|
289
|
+
"csl",
|
290
|
+
"json_feed_item",
|
291
|
+
]:
|
292
|
+
return json.loads(string)
|
293
|
+
else:
|
294
|
+
raise ValueError("No input format found")
|
295
|
+
|
296
|
+
def read_metadata_list(self, data: list, **kwargs) -> list:
|
297
|
+
"""read_metadata_list"""
|
298
|
+
kwargs["via"] = kwargs.get("via", None) or self.via
|
299
|
+
return [Metadata(i, **kwargs) for i in data]
|
300
|
+
|
301
|
+
def write(self, to: str = "commonmeta", **kwargs) -> str:
|
302
|
+
"""convert metadata list into different formats"""
|
303
|
+
if to == "commonmeta":
|
304
|
+
return write_commonmeta_list(self)
|
305
|
+
elif to == "bibtex":
|
306
|
+
return write_bibtex_list(self)
|
307
|
+
elif to == "csl":
|
308
|
+
return write_csl_list(self)
|
309
|
+
elif to == "citation":
|
310
|
+
return write_citation_list(self, **kwargs)
|
311
|
+
elif to == "ris":
|
312
|
+
return write_ris_list(self)
|
313
|
+
elif to == "schema_org":
|
314
|
+
raise ValueError("Schema.org not supported for metadata lists")
|
315
|
+
elif to == "datacite":
|
316
|
+
raise ValueError("Datacite not supported for metadata lists")
|
317
|
+
elif to == "crossref_xml":
|
318
|
+
return write_crossref_xml_list(self)
|
319
|
+
else:
|
320
|
+
raise ValueError("No output format found")
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Readers for different metadata formats"""
|
@@ -0,0 +1,199 @@
|
|
1
|
+
"""cff reader for commonmeta-py"""
|
2
|
+
from typing import Optional
|
3
|
+
from urllib.parse import urlparse
|
4
|
+
import httpx
|
5
|
+
import yaml
|
6
|
+
|
7
|
+
from ..utils import (
|
8
|
+
normalize_id,
|
9
|
+
name_to_fos,
|
10
|
+
dict_to_spdx,
|
11
|
+
normalize_orcid,
|
12
|
+
github_as_cff_url,
|
13
|
+
github_as_repo_url,
|
14
|
+
)
|
15
|
+
from ..base_utils import compact, wrap, presence, sanitize, parse_attributes
|
16
|
+
from ..date_utils import get_iso8601_date
|
17
|
+
from ..constants import Commonmeta
|
18
|
+
|
19
|
+
|
20
|
+
def get_cff(pid: str, **kwargs) -> dict:
|
21
|
+
"""get_cff"""
|
22
|
+
url = github_as_cff_url(pid)
|
23
|
+
response = httpx.get(url, timeout=10, **kwargs)
|
24
|
+
if response.status_code != 200:
|
25
|
+
return {"state": "not_found"}
|
26
|
+
text = response.text
|
27
|
+
repo_url = github_as_repo_url(url)
|
28
|
+
data = yaml.safe_load(text)
|
29
|
+
|
30
|
+
# collect metadata not included in the CFF file
|
31
|
+
if data.get("repository-code", None) is None:
|
32
|
+
data["repository-code"] = repo_url
|
33
|
+
|
34
|
+
return data
|
35
|
+
|
36
|
+
|
37
|
+
def read_cff(data: Optional[dict], **kwargs) -> Commonmeta:
|
38
|
+
"""read_cff"""
|
39
|
+
if data is None:
|
40
|
+
return {"state": "not_found"}
|
41
|
+
meta = data
|
42
|
+
|
43
|
+
read_options = kwargs or {}
|
44
|
+
|
45
|
+
# read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
46
|
+
|
47
|
+
# identifiers = Array.wrap(meta.fetch('identifiers', nil)).map do |r|
|
48
|
+
# r = normalize_id(r) if r.is_a?(String)
|
49
|
+
# if r.is_a?(String) && URI(r).host != 'doi.org'
|
50
|
+
# { 'identifierType' => 'URL', 'identifier' => r }
|
51
|
+
# elsif r.is_a?(Hash)
|
52
|
+
# { 'identifierType' => get_identifier_type(r['propertyID']), 'identifier' => r['value'] }
|
53
|
+
# end
|
54
|
+
# end.compact.uniq
|
55
|
+
|
56
|
+
_id = normalize_id(kwargs.get("doi", None) or meta.get("doi", None))
|
57
|
+
# Array.wrap(meta.fetch('identifiers', nil)).find do |i|
|
58
|
+
# i['type'] == 'doi'
|
59
|
+
# end.fetch('value', nil))
|
60
|
+
_type = "Software"
|
61
|
+
url = normalize_id(meta.get("repository-code", None))
|
62
|
+
contributors = cff_contributors(wrap(meta.get("authors", None)))
|
63
|
+
|
64
|
+
if meta.get("title", None):
|
65
|
+
titles = [{"title": meta.get("title", None)}]
|
66
|
+
else:
|
67
|
+
titles = []
|
68
|
+
|
69
|
+
date = {
|
70
|
+
"published": get_iso8601_date(meta.get("date-released"))
|
71
|
+
if meta.get("date-released", None)
|
72
|
+
else None
|
73
|
+
}
|
74
|
+
|
75
|
+
publisher = (
|
76
|
+
{"name": "GitHub"} if url and urlparse(url).hostname == "github.com" else None
|
77
|
+
)
|
78
|
+
|
79
|
+
if meta.get("abstract", None):
|
80
|
+
descriptions = [
|
81
|
+
{
|
82
|
+
"description": sanitize(meta.get("abstract")),
|
83
|
+
"type": "Abstract",
|
84
|
+
}
|
85
|
+
]
|
86
|
+
else:
|
87
|
+
descriptions = []
|
88
|
+
|
89
|
+
subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
|
90
|
+
|
91
|
+
license_ = meta.get("license", None)
|
92
|
+
if license_ is not None:
|
93
|
+
license_ = dict_to_spdx({"id": meta.get("license")})
|
94
|
+
|
95
|
+
references = cff_references(wrap(meta.get("references", None)))
|
96
|
+
|
97
|
+
state = "findable" if meta or read_options else "not_found"
|
98
|
+
|
99
|
+
return {
|
100
|
+
"id": _id,
|
101
|
+
"type": _type,
|
102
|
+
# 'identifiers' => identifiers,
|
103
|
+
"url": url,
|
104
|
+
"titles": titles,
|
105
|
+
"contributors": presence(contributors),
|
106
|
+
"publisher": publisher,
|
107
|
+
"references": presence(references),
|
108
|
+
"date": date,
|
109
|
+
"descriptions": presence(descriptions),
|
110
|
+
"license": license_,
|
111
|
+
"version": meta.get("version", None),
|
112
|
+
"subjects": presence(subjects),
|
113
|
+
"provider": "DataCite" if _id else "GitHub",
|
114
|
+
"state": state,
|
115
|
+
} | read_options
|
116
|
+
|
117
|
+
|
118
|
+
def cff_contributors(contributors):
|
119
|
+
"""cff_contributors"""
|
120
|
+
|
121
|
+
def format_affiliation(affiliation):
|
122
|
+
"""format_affiliation"""
|
123
|
+
if isinstance(affiliation, str):
|
124
|
+
return {"name": affiliation}
|
125
|
+
if isinstance(affiliation, dict):
|
126
|
+
return compact(affiliation)
|
127
|
+
return None
|
128
|
+
# if a.is_a?(Hash)
|
129
|
+
# a
|
130
|
+
# elsif a.is_a?(Hash) && a.key?('#text_') && a['#text'].strip.blank?
|
131
|
+
# nil
|
132
|
+
# elsif a.is_a?(Hash) && a.key?('#text')
|
133
|
+
# { 'name' => a['#text'] }
|
134
|
+
# elsif a.strip.blank
|
135
|
+
|
136
|
+
def format_element(i):
|
137
|
+
"""format_element"""
|
138
|
+
if normalize_orcid(parse_attributes(i.get("orcid", None))):
|
139
|
+
_id = normalize_orcid(parse_attributes(i.get("orcid", None)))
|
140
|
+
else:
|
141
|
+
_id = None
|
142
|
+
if i.get("given-names", None) or i.get("family-names", None) or _id:
|
143
|
+
given_name = parse_attributes(i.get("given-names", None))
|
144
|
+
family_name = parse_attributes(i.get("family-names", None))
|
145
|
+
affiliation = compact(
|
146
|
+
[format_affiliation(a) for a in wrap(i.get("affiliation", None))]
|
147
|
+
)
|
148
|
+
|
149
|
+
return compact(
|
150
|
+
{
|
151
|
+
"id": _id,
|
152
|
+
"contributorRoles": ["Author"],
|
153
|
+
"type": "Person",
|
154
|
+
"givenName": given_name,
|
155
|
+
"familyName": family_name,
|
156
|
+
"affiliation": affiliation,
|
157
|
+
}
|
158
|
+
)
|
159
|
+
return {
|
160
|
+
"contributorRoles": ["Author"],
|
161
|
+
"type": "Organization",
|
162
|
+
"name": i.get("name", None) or i.get("#text", None),
|
163
|
+
}
|
164
|
+
|
165
|
+
return [format_element(i) for i in contributors]
|
166
|
+
|
167
|
+
|
168
|
+
def cff_references(references):
|
169
|
+
"""cff_references"""
|
170
|
+
|
171
|
+
def is_reference(i):
|
172
|
+
"""is_reference"""
|
173
|
+
return (
|
174
|
+
next(
|
175
|
+
(
|
176
|
+
item
|
177
|
+
for item in wrap(i.get("identifers", None))
|
178
|
+
if item.get("type", None) == "doi"
|
179
|
+
),
|
180
|
+
None,
|
181
|
+
)
|
182
|
+
is not None
|
183
|
+
)
|
184
|
+
|
185
|
+
def map_reference(i):
|
186
|
+
"""map_element"""
|
187
|
+
identifier = next(
|
188
|
+
(
|
189
|
+
item
|
190
|
+
for item in wrap(i.get("identifers", None))
|
191
|
+
if item.get("type", None) == "doi"
|
192
|
+
),
|
193
|
+
None,
|
194
|
+
)
|
195
|
+
return compact(
|
196
|
+
{"doi": normalize_id(parse_attributes(identifier.get("value", None)))}
|
197
|
+
)
|
198
|
+
|
199
|
+
return [map_reference(i) for i in references if is_reference(i)]
|
@@ -0,0 +1,112 @@
|
|
1
|
+
"""codemeta reader for commonmeta-py"""
|
2
|
+
from typing import Optional
|
3
|
+
from collections import defaultdict
|
4
|
+
import httpx
|
5
|
+
|
6
|
+
from ..utils import (
|
7
|
+
normalize_id,
|
8
|
+
from_schema_org_creators,
|
9
|
+
name_to_fos,
|
10
|
+
dict_to_spdx,
|
11
|
+
github_as_codemeta_url,
|
12
|
+
github_as_repo_url,
|
13
|
+
doi_from_url,
|
14
|
+
)
|
15
|
+
from ..base_utils import wrap, presence, compact, sanitize
|
16
|
+
from ..author_utils import get_authors
|
17
|
+
from ..constants import (
|
18
|
+
Commonmeta,
|
19
|
+
SO_TO_CM_TRANSLATIONS,
|
20
|
+
)
|
21
|
+
|
22
|
+
|
23
|
+
def get_codemeta(pid: str, **kwargs) -> dict:
|
24
|
+
"""get_codemeta"""
|
25
|
+
url = str(github_as_codemeta_url(pid))
|
26
|
+
response = httpx.get(url, timeout=10, **kwargs)
|
27
|
+
if response.status_code != 200:
|
28
|
+
return {"state": "not_found"}
|
29
|
+
data = response.json()
|
30
|
+
if data.get("codeRepository", None) is None:
|
31
|
+
data["codeRepository"] = github_as_repo_url(url)
|
32
|
+
|
33
|
+
return data
|
34
|
+
|
35
|
+
|
36
|
+
def read_codemeta(data: Optional[dict], **kwargs) -> Commonmeta:
|
37
|
+
"""read_codemeta"""
|
38
|
+
if data is None:
|
39
|
+
return {"state": "not_found"}
|
40
|
+
meta = data
|
41
|
+
|
42
|
+
read_options = kwargs or {}
|
43
|
+
# ActiveSupport: : HashWithIndifferentAccess.new(options.except(: doi, : id, : url,
|
44
|
+
# : sandbox, : validate, : ra)
|
45
|
+
|
46
|
+
_id = normalize_id(meta.get("id", None) or meta.get("identifier", None))
|
47
|
+
# id = normalize_id(options[:doi] | | meta.get('@id', None) | | meta.get('identifier', None))
|
48
|
+
_type = SO_TO_CM_TRANSLATIONS.get(meta.get("@type", "Software"))
|
49
|
+
# identifiers = Array.wrap(meta.get('identifier', None)).map do | r|
|
50
|
+
# r = normalize_id(r) if r.is_a?(String)
|
51
|
+
# if r.is_a?(String) & & URI(r) != 'doi.org'
|
52
|
+
# {'identifierType': 'URL', 'identifier': r}
|
53
|
+
# elsif r.is_a?(Hash)
|
54
|
+
# {'identifierType': get_identifier_type(
|
55
|
+
# r['propertyID']), 'identifier': r['value']}
|
56
|
+
# end
|
57
|
+
# end.compact.uniq
|
58
|
+
|
59
|
+
has_agents = meta.get("agents", None)
|
60
|
+
authors = meta.get("authors", None) if has_agents is None else has_agents
|
61
|
+
contributors = get_authors(from_schema_org_creators(wrap(authors)))
|
62
|
+
contrib = get_authors(from_schema_org_creators(wrap(meta.get("editor", None))))
|
63
|
+
if contrib:
|
64
|
+
contributors += contrib
|
65
|
+
date: dict = defaultdict(list)
|
66
|
+
date["created"] = meta.get("dateCreated", None)
|
67
|
+
date["published"] = meta.get("datePublished", None)
|
68
|
+
date["updated"] = meta.get("dateModified", None)
|
69
|
+
|
70
|
+
publisher = {"name": meta.get("publisher", None)}
|
71
|
+
|
72
|
+
if meta.get("description", None):
|
73
|
+
descriptions = [
|
74
|
+
{
|
75
|
+
"description": sanitize(str(meta.get("description"))),
|
76
|
+
"descriptionType": "Abstract",
|
77
|
+
}
|
78
|
+
]
|
79
|
+
else:
|
80
|
+
descriptions = None
|
81
|
+
|
82
|
+
subjects = [name_to_fos(i) for i in wrap(meta.get("keywords", None))]
|
83
|
+
|
84
|
+
has_title = meta.get("title", None)
|
85
|
+
if has_title is None:
|
86
|
+
titles = [{"title": meta.get("name", None)}]
|
87
|
+
else:
|
88
|
+
titles = [{"title": has_title}]
|
89
|
+
|
90
|
+
license_ = meta.get("licenseId", None)
|
91
|
+
if license_:
|
92
|
+
license_ = dict_to_spdx({"id": meta.get("licenseId")})
|
93
|
+
|
94
|
+
provider = "DataCite" if doi_from_url(_id) else "GitHub"
|
95
|
+
state = "findable" if meta or read_options else "not_found"
|
96
|
+
|
97
|
+
return {
|
98
|
+
"id": _id,
|
99
|
+
"type": _type,
|
100
|
+
"url": normalize_id(meta.get("codeRepository", None)),
|
101
|
+
"identifiers": None,
|
102
|
+
"titles": titles,
|
103
|
+
"contributors": presence(contributors),
|
104
|
+
"publisher": publisher,
|
105
|
+
"date": compact(date),
|
106
|
+
"descriptions": descriptions,
|
107
|
+
"license": license_,
|
108
|
+
"version": meta.get("version", None),
|
109
|
+
"subjects": presence(subjects),
|
110
|
+
"provider": provider,
|
111
|
+
"state": state,
|
112
|
+
} | read_options
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""Commonmeta reader for commonmeta-py"""
|
2
|
+
from ..constants import Commonmeta
|
3
|
+
|
4
|
+
|
5
|
+
def read_commonmeta(data: dict, **kwargs) -> Commonmeta:
|
6
|
+
"""read_commonmeta"""
|
7
|
+
if data is None:
|
8
|
+
return {"state": "not_found"}
|
9
|
+
meta = data
|
10
|
+
|
11
|
+
read_options = kwargs or {}
|
12
|
+
|
13
|
+
return meta | read_options
|