commonmeta-py 0.107__py3-none-any.whl → 0.108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +12 -15
- commonmeta/api_utils.py +3 -2
- commonmeta/base_utils.py +186 -3
- commonmeta/cli.py +114 -34
- commonmeta/constants.py +20 -0
- commonmeta/file_utils.py +112 -0
- commonmeta/metadata.py +102 -42
- commonmeta/readers/codemeta_reader.py +1 -1
- commonmeta/readers/crossref_reader.py +23 -10
- commonmeta/readers/crossref_xml_reader.py +1 -1
- commonmeta/readers/datacite_reader.py +6 -4
- commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
- commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
- commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
- commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
- commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
- commonmeta/resources/crossref/fundref.xsd +29 -19
- commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
- commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
- commonmeta/resources/crossref/module-ali.xsd +14 -6
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
- commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
- commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
- commonmeta/resources/crossref/xml.xsd +287 -0
- commonmeta/schema_utils.py +25 -0
- commonmeta/utils.py +25 -9
- commonmeta/writers/bibtex_writer.py +5 -5
- commonmeta/writers/commonmeta_writer.py +4 -17
- commonmeta/writers/crossref_xml_writer.py +1031 -4
- commonmeta/writers/csl_writer.py +1 -2
- commonmeta/writers/datacite_writer.py +8 -4
- commonmeta/writers/inveniordm_writer.py +277 -2
- commonmeta/writers/ris_writer.py +3 -3
- commonmeta/writers/schema_org_writer.py +10 -5
- {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/METADATA +4 -2
- {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/RECORD +45 -31
- commonmeta/crossref_utils.py +0 -662
- commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
- {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.107.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/metadata.py
CHANGED
@@ -8,8 +8,7 @@ import yaml
|
|
8
8
|
from pydash import py_
|
9
9
|
|
10
10
|
from .base_utils import parse_xml, wrap
|
11
|
-
from .
|
12
|
-
from .doi_utils import doi_from_url
|
11
|
+
from .file_utils import write_output
|
13
12
|
from .readers.cff_reader import get_cff, read_cff
|
14
13
|
from .readers.codemeta_reader import (
|
15
14
|
get_codemeta,
|
@@ -34,7 +33,7 @@ from .readers.inveniordm_reader import (
|
|
34
33
|
get_inveniordm,
|
35
34
|
read_inveniordm,
|
36
35
|
)
|
37
|
-
from .readers.
|
36
|
+
from .readers.jsonfeed_reader import get_jsonfeed, read_jsonfeed
|
38
37
|
from .readers.kbase_reader import read_kbase
|
39
38
|
from .readers.openalex_reader import (
|
40
39
|
get_openalex,
|
@@ -45,17 +44,25 @@ from .readers.schema_org_reader import (
|
|
45
44
|
get_schema_org,
|
46
45
|
read_schema_org,
|
47
46
|
)
|
48
|
-
from .schema_utils import json_schema_errors
|
47
|
+
from .schema_utils import json_schema_errors, xml_schema_errors
|
49
48
|
from .utils import find_from_format, normalize_id
|
50
49
|
from .writers.bibtex_writer import write_bibtex, write_bibtex_list
|
51
50
|
from .writers.citation_writer import write_citation, write_citation_list
|
52
51
|
from .writers.commonmeta_writer import write_commonmeta, write_commonmeta_list
|
53
|
-
from .writers.crossref_xml_writer import
|
52
|
+
from .writers.crossref_xml_writer import (
|
53
|
+
push_crossref_xml_list,
|
54
|
+
write_crossref_xml,
|
55
|
+
write_crossref_xml_list,
|
56
|
+
)
|
54
57
|
from .writers.csl_writer import write_csl, write_csl_list
|
55
|
-
from .writers.datacite_writer import write_datacite
|
56
|
-
from .writers.inveniordm_writer import
|
58
|
+
from .writers.datacite_writer import write_datacite, write_datacite_list
|
59
|
+
from .writers.inveniordm_writer import (
|
60
|
+
push_inveniordm_list,
|
61
|
+
write_inveniordm,
|
62
|
+
write_inveniordm_list,
|
63
|
+
)
|
57
64
|
from .writers.ris_writer import write_ris, write_ris_list
|
58
|
-
from .writers.schema_org_writer import write_schema_org
|
65
|
+
from .writers.schema_org_writer import write_schema_org, write_schema_org_list
|
59
66
|
|
60
67
|
|
61
68
|
# pylint: disable=R0902
|
@@ -154,8 +161,8 @@ class Metadata:
|
|
154
161
|
return get_codemeta(pid)
|
155
162
|
elif via == "cff":
|
156
163
|
return get_cff(pid)
|
157
|
-
elif via == "
|
158
|
-
return
|
164
|
+
elif via == "jsonfeed":
|
165
|
+
return get_jsonfeed(pid)
|
159
166
|
elif via == "inveniordm":
|
160
167
|
return get_inveniordm(pid)
|
161
168
|
elif via == "openalex":
|
@@ -195,7 +202,7 @@ class Metadata:
|
|
195
202
|
"datacite",
|
196
203
|
"schema_org",
|
197
204
|
"csl",
|
198
|
-
"
|
205
|
+
"jsonfeed",
|
199
206
|
"codemeta",
|
200
207
|
"kbase",
|
201
208
|
"inveniordm",
|
@@ -231,8 +238,8 @@ class Metadata:
|
|
231
238
|
return dict(read_codemeta(data))
|
232
239
|
elif via == "cff":
|
233
240
|
return dict(read_cff(data))
|
234
|
-
elif via == "
|
235
|
-
return dict(
|
241
|
+
elif via == "jsonfeed":
|
242
|
+
return dict(read_jsonfeed(data, **kwargs))
|
236
243
|
elif via == "inveniordm":
|
237
244
|
return dict(read_inveniordm(data))
|
238
245
|
elif via == "kbase":
|
@@ -270,13 +277,13 @@ class Metadata:
|
|
270
277
|
def _write_json_format(self, to: str) -> str:
|
271
278
|
"""Handle JSON-based output formats."""
|
272
279
|
if to == "commonmeta":
|
273
|
-
result = write_commonmeta(self)
|
280
|
+
result = json.dumps(write_commonmeta(self))
|
274
281
|
elif to == "datacite":
|
275
|
-
result = write_datacite(self)
|
282
|
+
result = json.dumps(write_datacite(self))
|
276
283
|
elif to == "inveniordm":
|
277
|
-
result = write_inveniordm(self)
|
284
|
+
result = json.dumps(write_inveniordm(self))
|
278
285
|
elif to == "schema_org":
|
279
|
-
result = write_schema_org(self)
|
286
|
+
result = json.dumps(write_schema_org(self))
|
280
287
|
else:
|
281
288
|
return "{}"
|
282
289
|
|
@@ -340,16 +347,19 @@ class Metadata:
|
|
340
347
|
|
341
348
|
def _write_crossref_xml(self, **kwargs) -> str:
|
342
349
|
"""Write in Crossref XML format with error checking."""
|
343
|
-
doi = doi_from_url(self.id)
|
344
|
-
_type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
|
345
|
-
url = self.url
|
346
|
-
instance = {"doi": doi, "type": _type, "url": url}
|
350
|
+
# doi = doi_from_url(self.id)
|
351
|
+
# _type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
|
352
|
+
# url = self.url
|
353
|
+
# instance = {"doi": doi, "type": _type, "url": url}
|
347
354
|
self.depositor = kwargs.get("depositor", None)
|
348
355
|
self.email = kwargs.get("email", None)
|
349
356
|
self.registrant = kwargs.get("registrant", None)
|
350
|
-
|
351
|
-
|
352
|
-
|
357
|
+
output = write_crossref_xml(self)
|
358
|
+
self.write_errors = xml_schema_errors(output, schema="crossref_xml")
|
359
|
+
if self.write_errors is not None:
|
360
|
+
self.is_valid = False
|
361
|
+
return ""
|
362
|
+
return output if output is not None else ""
|
353
363
|
|
354
364
|
|
355
365
|
class MetadataList:
|
@@ -378,6 +388,12 @@ class MetadataList:
|
|
378
388
|
self.depositor = kwargs.get("depositor", None)
|
379
389
|
self.email = kwargs.get("email", None)
|
380
390
|
self.registrant = kwargs.get("registrant", None)
|
391
|
+
self.login_id = kwargs.get("login_id", None)
|
392
|
+
self.login_passwd = kwargs.get("login_passwd", None)
|
393
|
+
|
394
|
+
# options needed for InvenioRDM registration
|
395
|
+
self.host = kwargs.get("host", None)
|
396
|
+
self.token = kwargs.get("token", None)
|
381
397
|
|
382
398
|
self.items = self.read_metadata_list(wrap(meta.get("items", None)), **kwargs)
|
383
399
|
self.errors = [i.errors for i in self.items if i.errors is not None]
|
@@ -387,8 +403,7 @@ class MetadataList:
|
|
387
403
|
self.is_valid = all([i.is_valid for i in self.items])
|
388
404
|
|
389
405
|
# other options
|
390
|
-
self.
|
391
|
-
self.filename = kwargs.get("filename", None)
|
406
|
+
self.file = kwargs.get("file", None)
|
392
407
|
|
393
408
|
def get_metadata_list(self, string) -> list:
|
394
409
|
if string is None or not isinstance(string, (str, bytes)):
|
@@ -396,11 +411,12 @@ class MetadataList:
|
|
396
411
|
if self.via in [
|
397
412
|
"commonmeta",
|
398
413
|
"crossref",
|
414
|
+
"csl",
|
399
415
|
"datacite",
|
400
|
-
"
|
416
|
+
"inveniordm",
|
417
|
+
"jsonfeed",
|
401
418
|
"openalex",
|
402
|
-
"
|
403
|
-
"json_feed_item",
|
419
|
+
"schema_org",
|
404
420
|
]:
|
405
421
|
return json.loads(string)
|
406
422
|
else:
|
@@ -413,23 +429,67 @@ class MetadataList:
|
|
413
429
|
|
414
430
|
def write(self, to: str = "commonmeta", **kwargs) -> str:
|
415
431
|
"""convert metadata list into different formats"""
|
416
|
-
if to == "
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
432
|
+
if to == "bibtex":
|
433
|
+
output = write_bibtex_list(self)
|
434
|
+
if self.file:
|
435
|
+
return write_output(self.file, output, [".bib"])
|
436
|
+
else:
|
437
|
+
return output
|
422
438
|
elif to == "citation":
|
423
439
|
return write_citation_list(self, **kwargs)
|
440
|
+
elif to == "commonmeta":
|
441
|
+
output = json.dumps(write_commonmeta_list(self))
|
442
|
+
if self.file:
|
443
|
+
return write_output(self.file, output, [".json", ".jsonl"])
|
444
|
+
else:
|
445
|
+
return output
|
446
|
+
elif to == "crossref_xml":
|
447
|
+
output = write_crossref_xml_list(self)
|
448
|
+
if self.file:
|
449
|
+
return write_output(self.file, output, [".xml"])
|
450
|
+
else:
|
451
|
+
return output
|
452
|
+
elif to == "csl":
|
453
|
+
output = json.dumps(write_csl_list(self))
|
454
|
+
if self.file:
|
455
|
+
return write_output(self.file, output, [".json"])
|
456
|
+
else:
|
457
|
+
return output
|
458
|
+
elif to == "datacite":
|
459
|
+
output = json.dumps(write_datacite_list(self))
|
460
|
+
if self.file:
|
461
|
+
return write_output(self.file, output, [".json"])
|
462
|
+
else:
|
463
|
+
return output
|
464
|
+
elif to == "inveniordm":
|
465
|
+
output = json.dumps(write_inveniordm_list(self))
|
466
|
+
if self.file:
|
467
|
+
return write_output(self.file, output, [".json"])
|
468
|
+
else:
|
469
|
+
return output
|
424
470
|
elif to == "ris":
|
425
471
|
return write_ris_list(self)
|
426
472
|
elif to == "schema_org":
|
427
|
-
|
473
|
+
output = json.dumps(write_schema_org_list(self))
|
474
|
+
if self.file:
|
475
|
+
return write_output(self.file, output, [".json"])
|
476
|
+
else:
|
477
|
+
return output
|
478
|
+
else:
|
479
|
+
raise ValueError("No valid output format found")
|
480
|
+
|
481
|
+
def push(self, to: str = "commonmeta", **kwargs) -> str:
|
482
|
+
"""push metadata list to external APIs"""
|
483
|
+
|
484
|
+
if to == "crossref_xml":
|
485
|
+
response = push_crossref_xml_list(
|
486
|
+
self, login_id=self.login_id, login_passwd=self.login_passwd
|
487
|
+
)
|
488
|
+
return response
|
428
489
|
elif to == "datacite":
|
429
|
-
raise ValueError("Datacite not supported for metadata lists")
|
430
|
-
elif to == "
|
431
|
-
|
432
|
-
|
433
|
-
return write_crossref_xml_list(self)
|
490
|
+
raise ValueError("Datacite not yet supported for metadata lists")
|
491
|
+
elif to == "inveniordm":
|
492
|
+
response = push_inveniordm_list(self, host=self.host, token=self.token)
|
493
|
+
return response
|
434
494
|
else:
|
435
|
-
raise ValueError("No output format found")
|
495
|
+
raise ValueError("No valid output format found")
|
@@ -1,13 +1,14 @@
|
|
1
1
|
"""crossref reader for commonmeta-py"""
|
2
2
|
|
3
3
|
from typing import Optional
|
4
|
+
from xml.parsers.expat import ExpatError
|
4
5
|
|
5
6
|
import requests
|
6
7
|
from pydash import py_
|
7
8
|
from requests.exceptions import ConnectionError, ReadTimeout
|
8
9
|
|
9
10
|
from ..author_utils import get_authors
|
10
|
-
from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
|
11
|
+
from ..base_utils import compact, parse_attributes, parse_xml, presence, sanitize, wrap
|
11
12
|
from ..constants import (
|
12
13
|
CR_TO_CM_CONTAINER_TRANSLATIONS,
|
13
14
|
CR_TO_CM_TRANSLATIONS,
|
@@ -66,6 +67,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
66
67
|
doi = meta.get("DOI", None)
|
67
68
|
_id = doi_as_url(doi)
|
68
69
|
_type = CR_TO_CM_TRANSLATIONS.get(meta.get("type", None)) or "Other"
|
70
|
+
additional_type = meta.get("subtype", None)
|
69
71
|
|
70
72
|
archive_locations = wrap(meta.get("archive", None))
|
71
73
|
|
@@ -120,13 +122,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
120
122
|
relations = py_.uniq(relations)
|
121
123
|
references = py_.uniq([get_reference(i) for i in wrap(meta.get("reference", None))])
|
122
124
|
funding_references = from_crossref_funding(wrap(meta.get("funder", None)))
|
123
|
-
|
124
|
-
description = meta.get("abstract", None)
|
125
|
-
if description is not None:
|
126
|
-
descriptions = [{"description": sanitize(description), "type": "Abstract"}]
|
127
|
-
else:
|
128
|
-
descriptions = None
|
129
|
-
|
125
|
+
descriptions = get_abstract(meta)
|
130
126
|
subjects = py_.uniq(
|
131
127
|
[
|
132
128
|
{"subject": i}
|
@@ -146,7 +142,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
146
142
|
"id": _id,
|
147
143
|
"type": _type,
|
148
144
|
# recommended and optional properties
|
149
|
-
"additionalType":
|
145
|
+
"additionalType": additional_type,
|
150
146
|
"archiveLocations": presence(archive_locations),
|
151
147
|
"container": presence(container),
|
152
148
|
"contributors": presence(contributors),
|
@@ -201,6 +197,23 @@ def get_titles(meta):
|
|
201
197
|
)
|
202
198
|
|
203
199
|
|
200
|
+
def get_abstract(meta: dict) -> Optional[str]:
|
201
|
+
"""Get abstract from Crossref metadata."""
|
202
|
+
abstract = meta.get("abstract", None)
|
203
|
+
if abstract is None:
|
204
|
+
return None
|
205
|
+
|
206
|
+
try:
|
207
|
+
# Parse the abstract XML if it is JATS formatted
|
208
|
+
description_dct = parse_xml(abstract, xml_attribs=True)
|
209
|
+
description = py_.get(description_dct, "jats:p")
|
210
|
+
if description is None:
|
211
|
+
description = abstract
|
212
|
+
return [{"description": sanitize(description), "type": "Abstract"}]
|
213
|
+
except (TypeError, ExpatError):
|
214
|
+
return [{"description": sanitize(abstract), "type": "Abstract"}]
|
215
|
+
|
216
|
+
|
204
217
|
def get_reference(reference: Optional[dict]) -> Optional[dict]:
|
205
218
|
"""Get reference from Crossref reference"""
|
206
219
|
if reference is None or not isinstance(reference, dict):
|
@@ -341,7 +354,7 @@ def get_container(meta: dict, issn: str) -> dict:
|
|
341
354
|
)
|
342
355
|
isbn = isbn["value"] if isbn else None
|
343
356
|
container_title = parse_attributes(meta.get("container-title", None), first=True)
|
344
|
-
if not container_title
|
357
|
+
if not container_title:
|
345
358
|
container_title = py_.get(meta, "institution.0.name")
|
346
359
|
volume = meta.get("volume", None)
|
347
360
|
issue = py_.get(meta, "journal-issue.issue")
|
@@ -267,13 +267,15 @@ def get_descriptions(descriptions: list) -> list:
|
|
267
267
|
|
268
268
|
def map_description(description):
|
269
269
|
"""map_description"""
|
270
|
+
type = description.get("descriptionType", None)
|
271
|
+
if type is None:
|
272
|
+
type = "Abstract"
|
273
|
+
elif type not in ["Abstract", "Methods", "TechnicalInfo", "Other"]:
|
274
|
+
type = "Other"
|
270
275
|
return compact(
|
271
276
|
{
|
272
277
|
"description": description.get("description", None),
|
273
|
-
"type":
|
274
|
-
if description.get("descriptionType", None)
|
275
|
-
in ["Abstract", "Methods", "TechnicalInfo", "Other"]
|
276
|
-
else "Other",
|
278
|
+
"type": type,
|
277
279
|
"language": description.get("lang", None),
|
278
280
|
}
|
279
281
|
)
|
@@ -21,7 +21,7 @@ from ..doi_utils import (
|
|
21
21
|
from ..utils import (
|
22
22
|
compact,
|
23
23
|
dict_to_spdx,
|
24
|
-
|
24
|
+
from_jsonfeed,
|
25
25
|
issn_as_url,
|
26
26
|
name_to_fos,
|
27
27
|
normalize_url,
|
@@ -31,19 +31,19 @@ from ..utils import (
|
|
31
31
|
)
|
32
32
|
|
33
33
|
|
34
|
-
def
|
35
|
-
"""
|
34
|
+
def get_jsonfeed(pid: str, **kwargs) -> dict:
|
35
|
+
"""get_jsonfeed"""
|
36
36
|
if pid is None:
|
37
37
|
return {"state": "not_found"}
|
38
38
|
url = normalize_url(pid)
|
39
39
|
response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
|
40
40
|
if response.status_code != 200:
|
41
41
|
return {"state": "not_found"}
|
42
|
-
return response.json() | {"via": "
|
42
|
+
return response.json() | {"via": "jsonfeed"}
|
43
43
|
|
44
44
|
|
45
|
-
def
|
46
|
-
"""
|
45
|
+
def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
|
46
|
+
"""read_jsonfeed"""
|
47
47
|
if data is None:
|
48
48
|
return {"state": "not_found"}
|
49
49
|
meta = data
|
@@ -64,7 +64,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
64
64
|
_id = encode_doi(prefix)
|
65
65
|
|
66
66
|
if meta.get("authors", None):
|
67
|
-
contributors = get_authors(
|
67
|
+
contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
|
68
68
|
else:
|
69
69
|
contributors = None
|
70
70
|
|
@@ -176,7 +176,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
|
|
176
176
|
|
177
177
|
|
178
178
|
def get_references(references: list) -> list:
|
179
|
-
"""get
|
179
|
+
"""get jsonfeed references."""
|
180
180
|
|
181
181
|
def get_reference(reference: dict) -> Optional[dict]:
|
182
182
|
if reference is None or not isinstance(reference, dict):
|
@@ -396,8 +396,8 @@ def get_files(pid: str) -> Optional[list]:
|
|
396
396
|
]
|
397
397
|
|
398
398
|
|
399
|
-
def
|
400
|
-
"""get
|
399
|
+
def get_jsonfeed_uuid(id: str):
|
400
|
+
"""get jsonfeed by uuid"""
|
401
401
|
if id is None:
|
402
402
|
return None
|
403
403
|
url = f"https://api.rogue-scholar.org/posts/{id}"
|
@@ -424,8 +424,8 @@ def get_json_feed_item_uuid(id: str):
|
|
424
424
|
)
|
425
425
|
|
426
426
|
|
427
|
-
def
|
428
|
-
"""get
|
427
|
+
def get_jsonfeed_blog_slug(id: str):
|
428
|
+
"""get jsonfeed by id and return blog slug"""
|
429
429
|
if id is None:
|
430
430
|
return None
|
431
431
|
url = f"https://api.rogue-scholar.org/posts/{id}"
|