commonmeta-py 0.107__tar.gz → 0.108__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {commonmeta_py-0.107 → commonmeta_py-0.108}/PKG-INFO +4 -2
  2. {commonmeta_py-0.107 → commonmeta_py-0.108}/README.md +1 -1
  3. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/__init__.py +12 -15
  4. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/api_utils.py +3 -2
  5. commonmeta_py-0.108/commonmeta/base_utils.py +307 -0
  6. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/cli.py +114 -34
  7. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/constants.py +20 -0
  8. commonmeta_py-0.108/commonmeta/file_utils.py +112 -0
  9. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/metadata.py +102 -42
  10. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/codemeta_reader.py +1 -1
  11. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/crossref_reader.py +23 -10
  12. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/crossref_xml_reader.py +1 -1
  13. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/datacite_reader.py +6 -4
  14. commonmeta_py-0.107/commonmeta/readers/json_feed_reader.py → commonmeta_py-0.108/commonmeta/readers/jsonfeed_reader.py +12 -12
  15. commonmeta_py-0.108/commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
  16. commonmeta_py-0.107/commonmeta/resources/crossref/crossref5.3.1.xsd → commonmeta_py-0.108/commonmeta/resources/crossref/crossref5.4.0.xsd +286 -88
  17. commonmeta_py-0.108/commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
  18. commonmeta_py-0.108/commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
  19. commonmeta_py-0.108/commonmeta/resources/crossref/fundref.xsd +59 -0
  20. commonmeta_py-0.108/commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
  21. commonmeta_py-0.108/commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
  22. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/module-ali.xsd +14 -6
  23. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
  24. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
  25. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
  26. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
  27. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
  28. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
  29. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
  30. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
  31. commonmeta_py-0.108/commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
  32. commonmeta_py-0.108/commonmeta/resources/crossref/xml.xsd +287 -0
  33. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/schema_utils.py +25 -0
  34. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/utils.py +25 -9
  35. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/bibtex_writer.py +5 -5
  36. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/commonmeta_writer.py +4 -17
  37. commonmeta_py-0.108/commonmeta/writers/crossref_xml_writer.py +1046 -0
  38. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/csl_writer.py +1 -2
  39. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/datacite_writer.py +8 -4
  40. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/inveniordm_writer.py +277 -2
  41. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/ris_writer.py +3 -3
  42. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/schema_org_writer.py +10 -5
  43. {commonmeta_py-0.107 → commonmeta_py-0.108}/pyproject.toml +3 -1
  44. commonmeta_py-0.107/commonmeta/base_utils.py +0 -124
  45. commonmeta_py-0.107/commonmeta/crossref_utils.py +0 -662
  46. commonmeta_py-0.107/commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
  47. commonmeta_py-0.107/commonmeta/resources/crossref/fundref.xsd +0 -49
  48. commonmeta_py-0.107/commonmeta/writers/crossref_xml_writer.py +0 -19
  49. {commonmeta_py-0.107 → commonmeta_py-0.108}/.gitignore +0 -0
  50. {commonmeta_py-0.107 → commonmeta_py-0.108}/LICENSE +0 -0
  51. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/author_utils.py +0 -0
  52. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/date_utils.py +0 -0
  53. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/doi_utils.py +0 -0
  54. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/__init__.py +0 -0
  55. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/bibtex_reader.py +0 -0
  56. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/cff_reader.py +0 -0
  57. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/commonmeta_reader.py +0 -0
  58. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/csl_reader.py +0 -0
  59. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/datacite_xml_reader.py +0 -0
  60. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/inveniordm_reader.py +0 -0
  61. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/kbase_reader.py +0 -0
  62. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/openalex_reader.py +0 -0
  63. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/ris_reader.py +0 -0
  64. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/readers/schema_org_reader.py +0 -0
  65. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/cff_v1.2.0.json +0 -0
  66. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.12.json +0 -0
  67. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.13.json +0 -0
  68. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.14.json +0 -0
  69. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.15.json +0 -0
  70. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/commonmeta_v0.16.json +0 -0
  71. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/AccessIndicators.xsd +0 -0
  72. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3-elements.xsd +0 -0
  73. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-3d2-mathml3.xsd +0 -0
  74. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-elements.xsd +0 -0
  75. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3-elements.xsd +0 -0
  76. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1-mathml3.xsd +0 -0
  77. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/JATS-journalpublishing1.xsd +0 -0
  78. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/clinicaltrials.xsd +0 -0
  79. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/crossref_query_output3.0.xsd +0 -0
  80. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref/relations.xsd +0 -0
  81. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/crossref-v0.2.json +0 -0
  82. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/csl-data.json +0 -0
  83. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/datacite-v4.5.json +0 -0
  84. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/datacite-v4.5pr.json +0 -0
  85. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/spdx/licenses.json +0 -0
  86. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/spdx-schema.json +0 -0
  87. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/apa.csl +0 -0
  88. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/chicago-author-date.csl +0 -0
  89. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/harvard-cite-them-right.csl +0 -0
  90. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/ieee.csl +0 -0
  91. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/modern-language-association.csl +0 -0
  92. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/resources/styles/vancouver.csl +0 -0
  93. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/translators.py +0 -0
  94. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/__init__.py +0 -0
  95. {commonmeta_py-0.107 → commonmeta_py-0.108}/commonmeta/writers/citation_writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commonmeta-py
3
- Version: 0.107
3
+ Version: 0.108
4
4
  Summary: Library for conversions to/from the Commonmeta scholarly metadata format
5
5
  Project-URL: Homepage, https://python.commonmeta.org
6
6
  Project-URL: Repository, https://github.com/front-matter/commonmeta-py
@@ -33,6 +33,7 @@ Requires-Dist: pydash<9,>=6
33
33
  Requires-Dist: pyjwt<3,>=2.8.0
34
34
  Requires-Dist: python-dateutil<3,>=2.8.2
35
35
  Requires-Dist: pyyaml>=5.4
36
+ Requires-Dist: requests-toolbelt>=1.0.0
36
37
  Requires-Dist: requests>=2.31.0
37
38
  Requires-Dist: requests>=2.32.3
38
39
  Requires-Dist: simplejson~=3.18
@@ -40,6 +41,7 @@ Requires-Dist: types-beautifulsoup4<5,>=4.11
40
41
  Requires-Dist: types-dateparser~=1.1
41
42
  Requires-Dist: types-pyyaml>=5.4
42
43
  Requires-Dist: types-xmltodict<0.20,>=0.13
44
+ Requires-Dist: xmlschema>=4.0.1
43
45
  Requires-Dist: xmltodict<0.20,>=0.12
44
46
  Description-Content-Type: text/markdown
45
47
 
@@ -85,7 +87,7 @@ Commometa-py reads and/or writes these metadata formats:
85
87
  | [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
86
88
  | [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
87
89
  | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
88
- | [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
90
+ | [JSON Feed](https://www.jsonfeed.org/) | jsonfeed | application/feed+json | yes | later |
89
91
  | [OpenAlex](https://www.openalex.org/) | openalex | | yes | no |
90
92
 
91
93
  _commonmeta_: the Commonmeta format is the native format for the library and used internally.
@@ -40,7 +40,7 @@ Commometa-py reads and/or writes these metadata formats:
40
40
  | [BibTex](http://en.wikipedia.org/wiki/BibTeX) | bibtex | application/x-bibtex | later | yes |
41
41
  | [RIS](http://en.wikipedia.org/wiki/RIS_(file_format)) | ris | application/x-research-info-systems | yes | yes |
42
42
  | [InvenioRDM](https://inveniordm.docs.cern.ch/reference/metadata/) | inveniordm | application/vnd.inveniordm.v1+json | yes | yes |
43
- | [JSON Feed](https://www.jsonfeed.org/) | json_feed_item | application/feed+json | yes | later |
43
+ | [JSON Feed](https://www.jsonfeed.org/) | jsonfeed | application/feed+json | yes | later |
44
44
  | [OpenAlex](https://www.openalex.org/) | openalex | | yes | no |
45
45
 
46
46
  _commonmeta_: the Commonmeta format is the native format for the library and used internally.
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
10
10
  """
11
11
 
12
12
  __title__ = "commonmeta-py"
13
- __version__ = "0.107"
13
+ __version__ = "0.108"
14
14
  __author__ = "Martin Fenner"
15
15
  __license__ = "MIT"
16
16
 
@@ -31,18 +31,6 @@ from .base_utils import (
31
31
  unwrap,
32
32
  wrap,
33
33
  )
34
- from .crossref_utils import (
35
- CrossrefBadRequestError,
36
- CrossrefError,
37
- CrossrefForbiddenError,
38
- CrossrefNoContentError,
39
- CrossrefNotFoundError,
40
- CrossrefRequestError,
41
- CrossrefServerError,
42
- CrossrefUnauthorizedError,
43
- HttpError,
44
- generate_crossref_xml,
45
- )
46
34
  from .date_utils import (
47
35
  get_date_from_crossref_parts,
48
36
  get_date_from_date_parts,
@@ -66,6 +54,14 @@ from .doi_utils import (
66
54
  validate_doi,
67
55
  validate_prefix,
68
56
  )
57
+ from .file_utils import (
58
+ download_file,
59
+ read_file,
60
+ read_gz_file,
61
+ read_zip_file,
62
+ uncompress_content,
63
+ unzip_content,
64
+ )
69
65
  from .metadata import Metadata, MetadataList
70
66
  from .readers import (
71
67
  cff_reader,
@@ -75,7 +71,7 @@ from .readers import (
75
71
  datacite_reader,
76
72
  datacite_xml_reader,
77
73
  inveniordm_reader,
78
- json_feed_reader,
74
+ jsonfeed_reader,
79
75
  kbase_reader,
80
76
  openalex_reader,
81
77
  ris_reader,
@@ -87,7 +83,7 @@ from .utils import (
87
83
  extract_url,
88
84
  extract_urls,
89
85
  from_csl,
90
- from_json_feed,
86
+ from_jsonfeed,
91
87
  from_schema_org,
92
88
  get_language,
93
89
  issn_as_url,
@@ -109,6 +105,7 @@ from .writers import (
109
105
  bibtex_writer,
110
106
  citation_writer,
111
107
  commonmeta_writer,
108
+ crossref_xml_writer,
112
109
  csl_writer,
113
110
  datacite_writer,
114
111
  ris_writer,
@@ -7,8 +7,9 @@ import jwt
7
7
  import requests
8
8
  from furl import furl
9
9
 
10
+ from commonmeta.readers.jsonfeed_reader import get_jsonfeed_uuid
11
+
10
12
  from .doi_utils import doi_as_url, validate_doi
11
- from .readers.json_feed_reader import get_json_feed_item_uuid
12
13
 
13
14
 
14
15
  def generate_ghost_token(key: str) -> str:
@@ -34,7 +35,7 @@ def update_ghost_post_via_api(
34
35
  """Update Ghost post via API"""
35
36
  # get post doi and url from Rogue Scholar API
36
37
  # post url is needed to find post via Ghost API
37
- post = get_json_feed_item_uuid(_id)
38
+ post = get_jsonfeed_uuid(_id)
38
39
  if post.get("error", None):
39
40
  return post
40
41
  doi = validate_doi(post.get("doi", None))
@@ -0,0 +1,307 @@
1
+ """Base utilities for commonmeta-py"""
2
+
3
+ import html
4
+ import re
5
+ import uuid
6
+ from datetime import datetime
7
+ from os import path
8
+ from typing import Optional, Union
9
+
10
+ import nh3
11
+ import pydash as py_
12
+ import xmltodict
13
+
14
+
15
+ def wrap(item) -> list:
16
+ """Turn None, dict, or list into list"""
17
+ if item is None:
18
+ return []
19
+ if isinstance(item, list):
20
+ return item
21
+ return [item]
22
+
23
+
24
+ def unwrap(lst: list) -> Optional[Union[dict, list]]:
25
+ """Turn list into dict or None, depending on list size"""
26
+ if len(lst) == 0:
27
+ return None
28
+ if len(lst) == 1:
29
+ return lst[0]
30
+ return lst
31
+
32
+
33
+ def presence(
34
+ item: Optional[Union[dict, list, str]],
35
+ ) -> Optional[Union[dict, list, str]]:
36
+ """Turn empty list, dict or str into None"""
37
+ return None if item is None or len(item) == 0 or item == [{}] else item
38
+
39
+
40
+ def compact(dict_or_list: Union[dict, list]) -> Optional[Union[dict, list]]:
41
+ """Remove None from dict or list"""
42
+ if isinstance(dict_or_list, dict):
43
+ return {k: v for k, v in dict_or_list.items() if v is not None}
44
+ if isinstance(dict_or_list, list):
45
+ lst = [compact(i) for i in dict_or_list]
46
+ return lst if len(lst) > 0 else None
47
+
48
+ return None
49
+
50
+
51
+ def parse_attributes(
52
+ element: Union[str, dict, list], **kwargs
53
+ ) -> Optional[Union[str, list]]:
54
+ """extract attributes from a string, dict or list"""
55
+
56
+ def parse_item(item):
57
+ if isinstance(item, dict):
58
+ return item.get(html.unescape(content), None)
59
+ return html.unescape(item)
60
+
61
+ content = kwargs.get("content", "#text")
62
+ if isinstance(element, str) and kwargs.get("content", None) is None:
63
+ return html.unescape(element)
64
+ if isinstance(element, dict):
65
+ return element.get(html.unescape(content), None)
66
+ if isinstance(element, list):
67
+ arr = [parse_item(i) for i in element if i]
68
+ arr = arr[0] if len(arr) > 0 and kwargs.get("first") else unwrap(arr)
69
+ return arr
70
+
71
+
72
+ def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
73
+ """Parse XML into dict using xmltodict. Set default options, and options for Crossref XML"""
74
+ if string is None or string == "{}":
75
+ return None
76
+ if path.exists(string):
77
+ with open(string, encoding="utf-8") as file:
78
+ string = file.read()
79
+
80
+ if kwargs.get("dialect", None) == "crossref":
81
+ # remove namespaces from xml
82
+ namespaces = {
83
+ "http://www.crossref.org/schema/5.4.0": None,
84
+ "http://www.crossref.org/qrschema/3.0": None,
85
+ "http://www.crossref.org/xschema/1.0": None,
86
+ "http://www.crossref.org/xschema/1.1": None,
87
+ "http://www.crossref.org/AccessIndicators.xsd": None,
88
+ "http://www.crossref.org/relations.xsd": None,
89
+ "http://www.crossref.org/fundref.xsd": None,
90
+ "http://www.ncbi.nlm.nih.gov/JATS1": None,
91
+ }
92
+
93
+ kwargs["process_namespaces"] = True
94
+ kwargs["namespaces"] = namespaces
95
+ kwargs["force_list"] = {
96
+ "person_name",
97
+ "organization",
98
+ "titles",
99
+ "abstract",
100
+ "item",
101
+ "citation",
102
+ "program",
103
+ "related_item",
104
+ }
105
+
106
+ kwargs["attr_prefix"] = ""
107
+ kwargs["dict_constructor"] = dict
108
+ kwargs.pop("dialect", None)
109
+ return xmltodict.parse(string, **kwargs)
110
+
111
+
112
+ def unparse_xml(input: Optional[dict], **kwargs) -> str:
113
+ """Unparse (dump) dict into XML using xmltodict. Set default options, and options for Crossref XML"""
114
+ if input is None:
115
+ return None
116
+ if kwargs.get("dialect", None) == "crossref":
117
+ # Add additional logic for crossref dialect
118
+ # add body and root element as wrapping elements
119
+ type = next(iter(input))
120
+ attributes = input.get(type)
121
+ input.pop(type)
122
+
123
+ if type == "book":
124
+ book_metadata = py_.get(input, "book_metadata") or {}
125
+ input.pop("book_metadata")
126
+ book_metadata = {**book_metadata, **input}
127
+ input = {"book": {**attributes, "book_metadata": book_metadata}}
128
+ elif type == "database":
129
+ database_metadata = py_.get(input, "database_metadata") or {}
130
+ input.pop("database_metadata")
131
+ val = input.pop("publisher_item")
132
+ institution = input.pop("institution", None)
133
+ database_metadata = {**{"titles": val}, **database_metadata}
134
+ database_metadata["institution"] = institution or {}
135
+ component = input.pop("component", None)
136
+ input = {
137
+ "database": {
138
+ **attributes,
139
+ "database_metadata": database_metadata,
140
+ "component_list": {"component": component | input},
141
+ }
142
+ }
143
+ elif type == "journal":
144
+ journal_metadata = py_.get(input, "journal_metadata") or {}
145
+ journal_issue = py_.get(input, "journal_issue") or {}
146
+ journal_article = py_.get(input, "journal_article") or {}
147
+ input.pop("journal_metadata")
148
+ input.pop("journal_issue")
149
+ input.pop("journal_article")
150
+ input = {
151
+ "journal": {
152
+ "journal_metadata": journal_metadata,
153
+ "journal_issue": journal_issue,
154
+ "journal_article": journal_article | input,
155
+ }
156
+ }
157
+ elif type == "proceedings_article":
158
+ proceedings_metadata = py_.get(input, "proceedings_metadata") or {}
159
+ input.pop("proceedings_metadata")
160
+ input = {
161
+ "proceedings": {
162
+ **attributes,
163
+ "proceedings_metadata": proceedings_metadata,
164
+ "conference_paper": input,
165
+ }
166
+ }
167
+ elif type == "sa_component":
168
+ component = py_.get(input, "component") or {}
169
+ input.pop("component")
170
+ input = {
171
+ "sa_component": {
172
+ **attributes,
173
+ "component_list": {"component": component | input},
174
+ }
175
+ }
176
+ else:
177
+ input = {type: attributes | input}
178
+
179
+ doi_batch = {
180
+ "@xmlns": "http://www.crossref.org/schema/5.4.0",
181
+ "@version": "5.4.0",
182
+ "head": get_crossref_xml_head(input),
183
+ "body": input,
184
+ }
185
+ input = {"doi_batch": doi_batch}
186
+ kwargs["pretty"] = True
187
+ kwargs["indent"] = " "
188
+ kwargs.pop("dialect", None)
189
+ return xmltodict.unparse(input, **kwargs)
190
+
191
+
192
+ def unparse_xml_list(input: Optional[list], **kwargs) -> str:
193
+ """Unparse (dump) list into XML using xmltodict. Set default options, and options for Crossref XML"""
194
+ if input is None:
195
+ return None
196
+ if kwargs.get("dialect", None) == "crossref":
197
+ # Add additional logic for crossref dialect
198
+ # add body and root element as wrapping elements
199
+
200
+ # Group items by type with minimal grouping
201
+ items_by_type = {}
202
+
203
+ for item in wrap(input):
204
+ type = next(iter(item))
205
+ attributes = item.get(type)
206
+ item.pop(type)
207
+
208
+ # handle nested book_metadata and journal structure as in unparse_xml
209
+ if type == "book":
210
+ book_metadata = py_.get(item, "book_metadata") or {}
211
+ item.pop("book_metadata")
212
+ book_metadata = {**book_metadata, **item}
213
+ item = {"book": {**attributes, "book_metadata": book_metadata}}
214
+ elif type == "database":
215
+ database_metadata = py_.get(item, "database_metadata") or {}
216
+ item.pop("database_metadata")
217
+ database_metadata = {**database_metadata, **item}
218
+ item = {
219
+ "database": {**attributes, "database_metadata": database_metadata}
220
+ }
221
+ elif type == "journal":
222
+ journal_metadata = py_.get(item, "journal_metadata") or {}
223
+ journal_issue = py_.get(item, "journal_issue") or {}
224
+ journal_article = py_.get(item, "journal_article") or {}
225
+ item.pop("journal_metadata")
226
+ item.pop("journal_issue")
227
+ item.pop("journal_article")
228
+ item = {
229
+ "journal": {
230
+ "journal_metadata": journal_metadata,
231
+ "journal_issue": journal_issue,
232
+ "journal_article": journal_article | item,
233
+ }
234
+ }
235
+ elif type == "sa_component":
236
+ component = py_.get(input, "component") or {}
237
+ item.pop("component")
238
+ item = {
239
+ "sa_component": {
240
+ **attributes,
241
+ "component_list": {"component": component | item},
242
+ }
243
+ }
244
+ else:
245
+ item = {type: attributes | item}
246
+
247
+ # Add item to appropriate type bucket
248
+ if type not in items_by_type:
249
+ items_by_type[type] = []
250
+ items_by_type[type].append(item[type])
251
+
252
+ # Create the final structure with body containing all grouped items
253
+ body_content = {}
254
+ for type_key, items in items_by_type.items():
255
+ if len(items) == 1:
256
+ body_content[type_key] = items[0] # Use single item without array
257
+ else:
258
+ body_content[type_key] = items # Use array when multiple items
259
+ head = kwargs["head"] or {}
260
+ doi_batch = {
261
+ "@xmlns": "http://www.crossref.org/schema/5.4.0",
262
+ "@xmlns:ai": "http://www.crossref.org/AccessIndicators.xsd",
263
+ "@xmlns:rel": "http://www.crossref.org/relations.xsd",
264
+ "@xmlns:fr": "http://www.crossref.org/fundref.xsd",
265
+ "@version": "5.4.0",
266
+ "head": get_crossref_xml_head(head),
267
+ "body": body_content,
268
+ }
269
+ output = {"doi_batch": doi_batch}
270
+
271
+ kwargs["pretty"] = True
272
+ kwargs["indent"] = " "
273
+ kwargs.pop("dialect", None)
274
+ kwargs.pop("head", None)
275
+ return xmltodict.unparse(output, **kwargs)
276
+
277
+
278
+ def sanitize(text: str, **kwargs) -> str:
279
+ """Sanitize text"""
280
+ # default whitelisted HTML tags
281
+ tags = kwargs.get("tags", None) or {
282
+ "b",
283
+ "br",
284
+ "code",
285
+ "em",
286
+ "i",
287
+ "sub",
288
+ "sup",
289
+ "strong",
290
+ }
291
+ attributes = kwargs.get("attributes", None)
292
+ string = nh3.clean(text, tags=tags, attributes=attributes, link_rel=None)
293
+ # remove excessive internal whitespace
294
+ return " ".join(re.split(r"\s+", string, flags=re.UNICODE))
295
+
296
+
297
+ def get_crossref_xml_head(metadata: dict) -> dict:
298
+ """Get head element for Crossref XML"""
299
+ return {
300
+ "doi_batch_id": str(uuid.uuid4()),
301
+ "timestamp": datetime.now().strftime("%Y%m%d%H%M%S"),
302
+ "depositor": {
303
+ "depositor_name": metadata.get("depositor", None) or "test",
304
+ "email_address": metadata.get("email", None) or "info@example.org",
305
+ },
306
+ "registrant": metadata.get("registrant", None) or "test",
307
+ }
@@ -2,16 +2,12 @@ import time
2
2
 
3
3
  import click
4
4
  import orjson as json
5
- import pydash as py_
6
5
 
7
6
  from commonmeta import Metadata, MetadataList # __version__
8
7
  from commonmeta.api_utils import update_ghost_post_via_api
9
8
  from commonmeta.doi_utils import decode_doi, encode_doi, validate_prefix
10
9
  from commonmeta.readers.crossref_reader import get_random_crossref_id
11
10
  from commonmeta.readers.datacite_reader import get_random_datacite_id
12
- from commonmeta.readers.json_feed_reader import (
13
- get_json_feed_item_uuid,
14
- )
15
11
  from commonmeta.readers.openalex_reader import get_random_openalex_id
16
12
 
17
13
 
@@ -46,6 +42,49 @@ def convert(
46
42
  email,
47
43
  registrant,
48
44
  show_errors,
45
+ ):
46
+ metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
47
+ if show_errors and not metadata.is_valid:
48
+ raise click.ClickException(str(metadata.errors))
49
+
50
+ click.echo(
51
+ metadata.write(
52
+ to=to,
53
+ style=style,
54
+ locale=locale,
55
+ depositor=depositor,
56
+ email=email,
57
+ registrant=registrant,
58
+ )
59
+ )
60
+ if show_errors and metadata.write_errors:
61
+ raise click.ClickException(str(metadata.write_errors))
62
+
63
+
64
+ @cli.command()
65
+ @click.argument("input", type=str, required=True)
66
+ @click.option("--via", "-f", type=str, default=None)
67
+ @click.option("--to", "-t", type=str, default="commonmeta")
68
+ @click.option("--style", "-s", type=str, default="apa")
69
+ @click.option("--locale", "-l", type=str, default="en-US")
70
+ @click.option("--doi", type=str)
71
+ @click.option("--prefix", type=str)
72
+ @click.option("--depositor", type=str)
73
+ @click.option("--email", type=str)
74
+ @click.option("--registrant", type=str)
75
+ @click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
76
+ def put(
77
+ input,
78
+ via,
79
+ to,
80
+ style,
81
+ locale,
82
+ doi,
83
+ prefix,
84
+ depositor,
85
+ email,
86
+ registrant,
87
+ show_errors,
49
88
  ):
50
89
  metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
51
90
  if show_errors and not metadata.is_valid:
@@ -75,8 +114,7 @@ def convert(
75
114
  @click.option("--depositor", type=str)
76
115
  @click.option("--email", type=str)
77
116
  @click.option("--registrant", type=str)
78
- @click.option("--filename", type=str)
79
- @click.option("--jsonlines/--no-jsonlines", type=bool, show_default=True, default=False)
117
+ @click.option("--file", type=str)
80
118
  @click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
81
119
  @click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
82
120
  def list(
@@ -89,8 +127,69 @@ def list(
89
127
  depositor,
90
128
  email,
91
129
  registrant,
92
- filename,
93
- jsonlines,
130
+ file,
131
+ show_errors,
132
+ show_timer,
133
+ ):
134
+ start = time.time()
135
+ metadata_list = MetadataList(
136
+ string,
137
+ via=via,
138
+ file=file,
139
+ depositor=depositor,
140
+ email=email,
141
+ registrant=registrant,
142
+ prefix=prefix,
143
+ )
144
+ end = time.time()
145
+ runtime = end - start
146
+ if show_errors and not metadata_list.is_valid:
147
+ raise click.ClickException(str(metadata_list.errors))
148
+ if file:
149
+ metadata_list.write(to=to, style=style, locale=locale)
150
+ else:
151
+ click.echo(metadata_list.write(to=to, style=style, locale=locale))
152
+
153
+ if show_errors and len(metadata_list.write_errors) > 0:
154
+ raise click.ClickException(str(metadata_list.write_errors))
155
+ if show_timer:
156
+ click.echo(f"Runtime: {runtime:.2f} seconds")
157
+
158
+
159
+ @cli.command()
160
+ @click.argument("string", type=str, required=True)
161
+ @click.option("--via", "-f", type=str)
162
+ @click.option("--to", "-t", type=str, default="commonmeta")
163
+ @click.option("--style", "-s", type=str, default="apa")
164
+ @click.option("--locale", "-l", type=str, default="en-US")
165
+ @click.option("--prefix", type=str)
166
+ @click.option("--depositor", type=str)
167
+ @click.option("--email", type=str)
168
+ @click.option("--registrant", type=str)
169
+ @click.option("--login_id", type=str)
170
+ @click.option("--login_passwd", type=str)
171
+ @click.option("--host", type=str)
172
+ @click.option("--token", type=str)
173
+ @click.option("--legacy-key", type=str)
174
+ @click.option("--file", type=str)
175
+ @click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
176
+ @click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
177
+ def push(
178
+ string,
179
+ via,
180
+ to,
181
+ style,
182
+ locale,
183
+ prefix,
184
+ depositor,
185
+ email,
186
+ registrant,
187
+ login_id,
188
+ login_passwd,
189
+ host,
190
+ token,
191
+ legacy_key,
192
+ file,
94
193
  show_errors,
95
194
  show_timer,
96
195
  ):
@@ -98,18 +197,22 @@ def list(
98
197
  metadata_list = MetadataList(
99
198
  string,
100
199
  via=via,
200
+ file=file,
101
201
  depositor=depositor,
102
202
  email=email,
103
203
  registrant=registrant,
204
+ login_id=login_id,
205
+ login_passwd=login_passwd,
206
+ host=host,
207
+ token=token,
104
208
  prefix=prefix,
105
- filename=filename,
106
- jsonlines=jsonlines,
107
209
  )
108
210
  end = time.time()
109
211
  runtime = end - start
110
212
  if show_errors and not metadata_list.is_valid:
111
213
  raise click.ClickException(str(metadata_list.errors))
112
- click.echo(metadata_list.write(to=to, style=style, locale=locale))
214
+
215
+ click.echo(metadata_list.push(to=to, style=style, locale=locale))
113
216
  if show_errors and len(metadata_list.write_errors) > 0:
114
217
  raise click.ClickException(str(metadata_list.write_errors))
115
218
  if show_timer:
@@ -167,29 +270,6 @@ def decode(doi):
167
270
  click.echo(output)
168
271
 
169
272
 
170
- @cli.command()
171
- @click.argument("id", type=str, required=True)
172
- def encode_by_id(id):
173
- post = get_json_feed_item_uuid(id)
174
- prefix = py_.get(post, "blog.prefix")
175
- if validate_prefix(prefix) is None:
176
- return None
177
- output = encode_doi(prefix)
178
- click.echo(output)
179
-
180
-
181
- @cli.command()
182
- @click.argument("filter", type=str, required=True, default="unregistered")
183
- @click.option("--id", type=str)
184
- def json_feed(filter, id=None):
185
- if filter == "blog_slug" and id is not None:
186
- post = get_json_feed_item_uuid(id)
187
- output = py_.get(post, "blog.slug", "no slug found")
188
- else:
189
- output = "no filter specified"
190
- click.echo(output)
191
-
192
-
193
273
  @cli.command()
194
274
  @click.argument("id", type=str, required=True)
195
275
  @click.option("--api-key", "-k", type=str, required=True)
@@ -190,6 +190,7 @@ CM_TO_CR_TRANSLATIONS = {
190
190
  "JournalIssue": "JournalIssue",
191
191
  "JournalVolume": "JournalVolume",
192
192
  "Journal": "Journal",
193
+ "PeerReview": "PeerReview",
193
194
  "ProceedingsArticle": "ProceedingsArticle",
194
195
  "ProceedingsSeries": "ProceedingsSeries",
195
196
  "Proceedings": "Proceedings",
@@ -698,3 +699,22 @@ ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS = {
698
699
  "https://ror.org/00yjd3n13": "https://doi.org/10.13039/501100001711",
699
700
  "https://ror.org/04wfr2810": "https://doi.org/10.13039/501100003043",
700
701
  }
702
+
703
+ COMMUNITY_TRANSLATIONS = {
704
+ "ai": "artificialintelligence",
705
+ "llms": "artificialintelligence",
706
+ "book%20review": "bookreview",
707
+ "bjps%20review%20of%20books": "bookreview",
708
+ "books": "bookreview",
709
+ "nachrichten": "news",
710
+ "opencitations": "researchassessment",
711
+ "papers": "researchblogging",
712
+ "urheberrecht": "copyright",
713
+ "workshop": "events",
714
+ "veranstaltungen": "events",
715
+ "veranstaltungshinweise": "events",
716
+ "asapbio": "preprints",
717
+ "biorxiv": "preprints",
718
+ "runiverse": "r",
719
+ "bericht": "report",
720
+ }