commonmeta-py 0.106__py3-none-any.whl → 0.108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. commonmeta/__init__.py +12 -3
  2. commonmeta/api_utils.py +3 -2
  3. commonmeta/base_utils.py +186 -3
  4. commonmeta/cli.py +114 -34
  5. commonmeta/constants.py +20 -0
  6. commonmeta/file_utils.py +112 -0
  7. commonmeta/metadata.py +102 -42
  8. commonmeta/readers/codemeta_reader.py +1 -1
  9. commonmeta/readers/crossref_reader.py +23 -10
  10. commonmeta/readers/crossref_xml_reader.py +1 -1
  11. commonmeta/readers/datacite_reader.py +6 -4
  12. commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
  13. commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
  14. commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
  15. commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
  16. commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
  17. commonmeta/resources/crossref/fundref.xsd +29 -19
  18. commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
  19. commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
  20. commonmeta/resources/crossref/module-ali.xsd +14 -6
  21. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
  22. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
  23. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
  24. commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
  25. commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
  26. commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
  27. commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
  28. commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
  29. commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
  30. commonmeta/resources/crossref/xml.xsd +287 -0
  31. commonmeta/schema_utils.py +25 -0
  32. commonmeta/utils.py +90 -15
  33. commonmeta/writers/bibtex_writer.py +5 -5
  34. commonmeta/writers/citation_writer.py +10 -5
  35. commonmeta/writers/commonmeta_writer.py +5 -17
  36. commonmeta/writers/crossref_xml_writer.py +1032 -4
  37. commonmeta/writers/csl_writer.py +6 -6
  38. commonmeta/writers/datacite_writer.py +11 -6
  39. commonmeta/writers/inveniordm_writer.py +286 -10
  40. commonmeta/writers/ris_writer.py +3 -3
  41. commonmeta/writers/schema_org_writer.py +10 -5
  42. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/METADATA +5 -2
  43. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/RECORD +46 -32
  44. commonmeta/crossref_utils.py +0 -583
  45. commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
  46. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
  47. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
  48. {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/metadata.py CHANGED
@@ -8,8 +8,7 @@ import yaml
8
8
  from pydash import py_
9
9
 
10
10
  from .base_utils import parse_xml, wrap
11
- from .constants import CM_TO_CR_TRANSLATIONS
12
- from .doi_utils import doi_from_url
11
+ from .file_utils import write_output
13
12
  from .readers.cff_reader import get_cff, read_cff
14
13
  from .readers.codemeta_reader import (
15
14
  get_codemeta,
@@ -34,7 +33,7 @@ from .readers.inveniordm_reader import (
34
33
  get_inveniordm,
35
34
  read_inveniordm,
36
35
  )
37
- from .readers.json_feed_reader import get_json_feed_item, read_json_feed_item
36
+ from .readers.jsonfeed_reader import get_jsonfeed, read_jsonfeed
38
37
  from .readers.kbase_reader import read_kbase
39
38
  from .readers.openalex_reader import (
40
39
  get_openalex,
@@ -45,17 +44,25 @@ from .readers.schema_org_reader import (
45
44
  get_schema_org,
46
45
  read_schema_org,
47
46
  )
48
- from .schema_utils import json_schema_errors
47
+ from .schema_utils import json_schema_errors, xml_schema_errors
49
48
  from .utils import find_from_format, normalize_id
50
49
  from .writers.bibtex_writer import write_bibtex, write_bibtex_list
51
50
  from .writers.citation_writer import write_citation, write_citation_list
52
51
  from .writers.commonmeta_writer import write_commonmeta, write_commonmeta_list
53
- from .writers.crossref_xml_writer import write_crossref_xml, write_crossref_xml_list
52
+ from .writers.crossref_xml_writer import (
53
+ push_crossref_xml_list,
54
+ write_crossref_xml,
55
+ write_crossref_xml_list,
56
+ )
54
57
  from .writers.csl_writer import write_csl, write_csl_list
55
- from .writers.datacite_writer import write_datacite
56
- from .writers.inveniordm_writer import write_inveniordm
58
+ from .writers.datacite_writer import write_datacite, write_datacite_list
59
+ from .writers.inveniordm_writer import (
60
+ push_inveniordm_list,
61
+ write_inveniordm,
62
+ write_inveniordm_list,
63
+ )
57
64
  from .writers.ris_writer import write_ris, write_ris_list
58
- from .writers.schema_org_writer import write_schema_org
65
+ from .writers.schema_org_writer import write_schema_org, write_schema_org_list
59
66
 
60
67
 
61
68
  # pylint: disable=R0902
@@ -154,8 +161,8 @@ class Metadata:
154
161
  return get_codemeta(pid)
155
162
  elif via == "cff":
156
163
  return get_cff(pid)
157
- elif via == "json_feed_item":
158
- return get_json_feed_item(pid)
164
+ elif via == "jsonfeed":
165
+ return get_jsonfeed(pid)
159
166
  elif via == "inveniordm":
160
167
  return get_inveniordm(pid)
161
168
  elif via == "openalex":
@@ -195,7 +202,7 @@ class Metadata:
195
202
  "datacite",
196
203
  "schema_org",
197
204
  "csl",
198
- "json_feed_item",
205
+ "jsonfeed",
199
206
  "codemeta",
200
207
  "kbase",
201
208
  "inveniordm",
@@ -231,8 +238,8 @@ class Metadata:
231
238
  return dict(read_codemeta(data))
232
239
  elif via == "cff":
233
240
  return dict(read_cff(data))
234
- elif via == "json_feed_item":
235
- return dict(read_json_feed_item(data, **kwargs))
241
+ elif via == "jsonfeed":
242
+ return dict(read_jsonfeed(data, **kwargs))
236
243
  elif via == "inveniordm":
237
244
  return dict(read_inveniordm(data))
238
245
  elif via == "kbase":
@@ -270,13 +277,13 @@ class Metadata:
270
277
  def _write_json_format(self, to: str) -> str:
271
278
  """Handle JSON-based output formats."""
272
279
  if to == "commonmeta":
273
- result = write_commonmeta(self)
280
+ result = json.dumps(write_commonmeta(self))
274
281
  elif to == "datacite":
275
- result = write_datacite(self)
282
+ result = json.dumps(write_datacite(self))
276
283
  elif to == "inveniordm":
277
- result = write_inveniordm(self)
284
+ result = json.dumps(write_inveniordm(self))
278
285
  elif to == "schema_org":
279
- result = write_schema_org(self)
286
+ result = json.dumps(write_schema_org(self))
280
287
  else:
281
288
  return "{}"
282
289
 
@@ -340,16 +347,19 @@ class Metadata:
340
347
 
341
348
  def _write_crossref_xml(self, **kwargs) -> str:
342
349
  """Write in Crossref XML format with error checking."""
343
- doi = doi_from_url(self.id)
344
- _type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
345
- url = self.url
346
- instance = {"doi": doi, "type": _type, "url": url}
350
+ # doi = doi_from_url(self.id)
351
+ # _type = CM_TO_CR_TRANSLATIONS.get(str(self.type or ""), None)
352
+ # url = self.url
353
+ # instance = {"doi": doi, "type": _type, "url": url}
347
354
  self.depositor = kwargs.get("depositor", None)
348
355
  self.email = kwargs.get("email", None)
349
356
  self.registrant = kwargs.get("registrant", None)
350
- self.write_errors = json_schema_errors(instance, schema="crossref")
351
- result = write_crossref_xml(self)
352
- return result if result is not None else ""
357
+ output = write_crossref_xml(self)
358
+ self.write_errors = xml_schema_errors(output, schema="crossref_xml")
359
+ if self.write_errors is not None:
360
+ self.is_valid = False
361
+ return ""
362
+ return output if output is not None else ""
353
363
 
354
364
 
355
365
  class MetadataList:
@@ -378,6 +388,12 @@ class MetadataList:
378
388
  self.depositor = kwargs.get("depositor", None)
379
389
  self.email = kwargs.get("email", None)
380
390
  self.registrant = kwargs.get("registrant", None)
391
+ self.login_id = kwargs.get("login_id", None)
392
+ self.login_passwd = kwargs.get("login_passwd", None)
393
+
394
+ # options needed for InvenioRDM registration
395
+ self.host = kwargs.get("host", None)
396
+ self.token = kwargs.get("token", None)
381
397
 
382
398
  self.items = self.read_metadata_list(wrap(meta.get("items", None)), **kwargs)
383
399
  self.errors = [i.errors for i in self.items if i.errors is not None]
@@ -387,8 +403,7 @@ class MetadataList:
387
403
  self.is_valid = all([i.is_valid for i in self.items])
388
404
 
389
405
  # other options
390
- self.jsonlines = kwargs.get("jsonlines", False)
391
- self.filename = kwargs.get("filename", None)
406
+ self.file = kwargs.get("file", None)
392
407
 
393
408
  def get_metadata_list(self, string) -> list:
394
409
  if string is None or not isinstance(string, (str, bytes)):
@@ -396,11 +411,12 @@ class MetadataList:
396
411
  if self.via in [
397
412
  "commonmeta",
398
413
  "crossref",
414
+ "csl",
399
415
  "datacite",
400
- "schema_org",
416
+ "inveniordm",
417
+ "jsonfeed",
401
418
  "openalex",
402
- "csl",
403
- "json_feed_item",
419
+ "schema_org",
404
420
  ]:
405
421
  return json.loads(string)
406
422
  else:
@@ -413,23 +429,67 @@ class MetadataList:
413
429
 
414
430
  def write(self, to: str = "commonmeta", **kwargs) -> str:
415
431
  """convert metadata list into different formats"""
416
- if to == "commonmeta":
417
- return write_commonmeta_list(self)
418
- elif to == "bibtex":
419
- return write_bibtex_list(self)
420
- elif to == "csl":
421
- return write_csl_list(self)
432
+ if to == "bibtex":
433
+ output = write_bibtex_list(self)
434
+ if self.file:
435
+ return write_output(self.file, output, [".bib"])
436
+ else:
437
+ return output
422
438
  elif to == "citation":
423
439
  return write_citation_list(self, **kwargs)
440
+ elif to == "commonmeta":
441
+ output = json.dumps(write_commonmeta_list(self))
442
+ if self.file:
443
+ return write_output(self.file, output, [".json", ".jsonl"])
444
+ else:
445
+ return output
446
+ elif to == "crossref_xml":
447
+ output = write_crossref_xml_list(self)
448
+ if self.file:
449
+ return write_output(self.file, output, [".xml"])
450
+ else:
451
+ return output
452
+ elif to == "csl":
453
+ output = json.dumps(write_csl_list(self))
454
+ if self.file:
455
+ return write_output(self.file, output, [".json"])
456
+ else:
457
+ return output
458
+ elif to == "datacite":
459
+ output = json.dumps(write_datacite_list(self))
460
+ if self.file:
461
+ return write_output(self.file, output, [".json"])
462
+ else:
463
+ return output
464
+ elif to == "inveniordm":
465
+ output = json.dumps(write_inveniordm_list(self))
466
+ if self.file:
467
+ return write_output(self.file, output, [".json"])
468
+ else:
469
+ return output
424
470
  elif to == "ris":
425
471
  return write_ris_list(self)
426
472
  elif to == "schema_org":
427
- raise ValueError("Schema.org not supported for metadata lists")
473
+ output = json.dumps(write_schema_org_list(self))
474
+ if self.file:
475
+ return write_output(self.file, output, [".json"])
476
+ else:
477
+ return output
478
+ else:
479
+ raise ValueError("No valid output format found")
480
+
481
+ def push(self, to: str = "commonmeta", **kwargs) -> str:
482
+ """push metadata list to external APIs"""
483
+
484
+ if to == "crossref_xml":
485
+ response = push_crossref_xml_list(
486
+ self, login_id=self.login_id, login_passwd=self.login_passwd
487
+ )
488
+ return response
428
489
  elif to == "datacite":
429
- raise ValueError("Datacite not supported for metadata lists")
430
- elif to == "openalex":
431
- raise ValueError("OpenAlex not supported for metadata lists")
432
- elif to == "crossref_xml":
433
- return write_crossref_xml_list(self)
490
+ raise ValueError("Datacite not yet supported for metadata lists")
491
+ elif to == "inveniordm":
492
+ response = push_inveniordm_list(self, host=self.host, token=self.token)
493
+ return response
434
494
  else:
435
- raise ValueError("No output format found")
495
+ raise ValueError("No valid output format found")
@@ -75,7 +75,7 @@ def read_codemeta(data: Optional[dict], **kwargs) -> Commonmeta:
75
75
  descriptions = [
76
76
  {
77
77
  "description": sanitize(str(meta.get("description"))),
78
- "descriptionType": "Abstract",
78
+ "type": "Abstract",
79
79
  }
80
80
  ]
81
81
  else:
@@ -1,13 +1,14 @@
1
1
  """crossref reader for commonmeta-py"""
2
2
 
3
3
  from typing import Optional
4
+ from xml.parsers.expat import ExpatError
4
5
 
5
6
  import requests
6
7
  from pydash import py_
7
8
  from requests.exceptions import ConnectionError, ReadTimeout
8
9
 
9
10
  from ..author_utils import get_authors
10
- from ..base_utils import compact, parse_attributes, presence, sanitize, wrap
11
+ from ..base_utils import compact, parse_attributes, parse_xml, presence, sanitize, wrap
11
12
  from ..constants import (
12
13
  CR_TO_CM_CONTAINER_TRANSLATIONS,
13
14
  CR_TO_CM_TRANSLATIONS,
@@ -66,6 +67,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
66
67
  doi = meta.get("DOI", None)
67
68
  _id = doi_as_url(doi)
68
69
  _type = CR_TO_CM_TRANSLATIONS.get(meta.get("type", None)) or "Other"
70
+ additional_type = meta.get("subtype", None)
69
71
 
70
72
  archive_locations = wrap(meta.get("archive", None))
71
73
 
@@ -120,13 +122,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
120
122
  relations = py_.uniq(relations)
121
123
  references = py_.uniq([get_reference(i) for i in wrap(meta.get("reference", None))])
122
124
  funding_references = from_crossref_funding(wrap(meta.get("funder", None)))
123
-
124
- description = meta.get("abstract", None)
125
- if description is not None:
126
- descriptions = [{"description": sanitize(description), "type": "Abstract"}]
127
- else:
128
- descriptions = None
129
-
125
+ descriptions = get_abstract(meta)
130
126
  subjects = py_.uniq(
131
127
  [
132
128
  {"subject": i}
@@ -146,7 +142,7 @@ def read_crossref(data: Optional[dict], **kwargs) -> Commonmeta:
146
142
  "id": _id,
147
143
  "type": _type,
148
144
  # recommended and optional properties
149
- "additionalType": None,
145
+ "additionalType": additional_type,
150
146
  "archiveLocations": presence(archive_locations),
151
147
  "container": presence(container),
152
148
  "contributors": presence(contributors),
@@ -201,6 +197,23 @@ def get_titles(meta):
201
197
  )
202
198
 
203
199
 
200
+ def get_abstract(meta: dict) -> Optional[str]:
201
+ """Get abstract from Crossref metadata."""
202
+ abstract = meta.get("abstract", None)
203
+ if abstract is None:
204
+ return None
205
+
206
+ try:
207
+ # Parse the abstract XML if it is JATS formatted
208
+ description_dct = parse_xml(abstract, xml_attribs=True)
209
+ description = py_.get(description_dct, "jats:p")
210
+ if description is None:
211
+ description = abstract
212
+ return [{"description": sanitize(description), "type": "Abstract"}]
213
+ except (TypeError, ExpatError):
214
+ return [{"description": sanitize(abstract), "type": "Abstract"}]
215
+
216
+
204
217
  def get_reference(reference: Optional[dict]) -> Optional[dict]:
205
218
  """Get reference from Crossref reference"""
206
219
  if reference is None or not isinstance(reference, dict):
@@ -341,7 +354,7 @@ def get_container(meta: dict, issn: str) -> dict:
341
354
  )
342
355
  isbn = isbn["value"] if isbn else None
343
356
  container_title = parse_attributes(meta.get("container-title", None), first=True)
344
- if not container_title and container_type in ["Periodical"]:
357
+ if not container_title:
345
358
  container_title = py_.get(meta, "institution.0.name")
346
359
  volume = meta.get("volume", None)
347
360
  issue = py_.get(meta, "journal-issue.issue")
@@ -332,7 +332,7 @@ def crossref_description(bibmeta):
332
332
  )
333
333
  return compact(
334
334
  {
335
- "descriptionType": description_type,
335
+ "type": description_type,
336
336
  "description": sanitize(
337
337
  parse_attributes(element, content="p", first=True)
338
338
  ),
@@ -267,13 +267,15 @@ def get_descriptions(descriptions: list) -> list:
267
267
 
268
268
  def map_description(description):
269
269
  """map_description"""
270
+ type = description.get("descriptionType", None)
271
+ if type is None:
272
+ type = "Abstract"
273
+ elif type not in ["Abstract", "Methods", "TechnicalInfo", "Other"]:
274
+ type = "Other"
270
275
  return compact(
271
276
  {
272
277
  "description": description.get("description", None),
273
- "type": description.get("descriptionType")
274
- if description.get("descriptionType", None)
275
- in ["Abstract", "Methods", "TechnicalInfo", "Other"]
276
- else "Other",
278
+ "type": type,
277
279
  "language": description.get("lang", None),
278
280
  }
279
281
  )
@@ -21,7 +21,7 @@ from ..doi_utils import (
21
21
  from ..utils import (
22
22
  compact,
23
23
  dict_to_spdx,
24
- from_json_feed,
24
+ from_jsonfeed,
25
25
  issn_as_url,
26
26
  name_to_fos,
27
27
  normalize_url,
@@ -31,19 +31,19 @@ from ..utils import (
31
31
  )
32
32
 
33
33
 
34
- def get_json_feed_item(pid: str, **kwargs) -> dict:
35
- """get_json_feed_item"""
34
+ def get_jsonfeed(pid: str, **kwargs) -> dict:
35
+ """get_jsonfeed"""
36
36
  if pid is None:
37
37
  return {"state": "not_found"}
38
38
  url = normalize_url(pid)
39
39
  response = requests.get(url, timeout=10, allow_redirects=True, **kwargs)
40
40
  if response.status_code != 200:
41
41
  return {"state": "not_found"}
42
- return response.json() | {"via": "json_feed_item"}
42
+ return response.json() | {"via": "jsonfeed"}
43
43
 
44
44
 
45
- def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
46
- """read_json_feed_item"""
45
+ def read_jsonfeed(data: Optional[dict], **kwargs) -> Commonmeta:
46
+ """read_jsonfeed"""
47
47
  if data is None:
48
48
  return {"state": "not_found"}
49
49
  meta = data
@@ -64,7 +64,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
64
64
  _id = encode_doi(prefix)
65
65
 
66
66
  if meta.get("authors", None):
67
- contributors = get_authors(from_json_feed(wrap(meta.get("authors"))))
67
+ contributors = get_authors(from_jsonfeed(wrap(meta.get("authors"))))
68
68
  else:
69
69
  contributors = None
70
70
 
@@ -176,7 +176,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
176
176
 
177
177
 
178
178
  def get_references(references: list) -> list:
179
- """get json feed references."""
179
+ """get jsonfeed references."""
180
180
 
181
181
  def get_reference(reference: dict) -> Optional[dict]:
182
182
  if reference is None or not isinstance(reference, dict):
@@ -396,8 +396,8 @@ def get_files(pid: str) -> Optional[list]:
396
396
  ]
397
397
 
398
398
 
399
- def get_json_feed_item_uuid(id: str):
400
- """get JSON Feed item by uuid"""
399
+ def get_jsonfeed_uuid(id: str):
400
+ """get jsonfeed by uuid"""
401
401
  if id is None:
402
402
  return None
403
403
  url = f"https://api.rogue-scholar.org/posts/{id}"
@@ -424,8 +424,8 @@ def get_json_feed_item_uuid(id: str):
424
424
  )
425
425
 
426
426
 
427
- def get_json_feed_blog_slug(id: str):
428
- """get JSON Feed item by id and return blog slug"""
427
+ def get_jsonfeed_blog_slug(id: str):
428
+ """get jsonfeed by id and return blog slug"""
429
429
  if id is None:
430
430
  return None
431
431
  url = f"https://api.rogue-scholar.org/posts/{id}"