commonmeta-py 0.106__py3-none-any.whl → 0.108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commonmeta/__init__.py +12 -3
- commonmeta/api_utils.py +3 -2
- commonmeta/base_utils.py +186 -3
- commonmeta/cli.py +114 -34
- commonmeta/constants.py +20 -0
- commonmeta/file_utils.py +112 -0
- commonmeta/metadata.py +102 -42
- commonmeta/readers/codemeta_reader.py +1 -1
- commonmeta/readers/crossref_reader.py +23 -10
- commonmeta/readers/crossref_xml_reader.py +1 -1
- commonmeta/readers/datacite_reader.py +6 -4
- commonmeta/readers/{json_feed_reader.py → jsonfeed_reader.py} +12 -12
- commonmeta/resources/crossref/common5.4.0.xsd +1264 -0
- commonmeta/resources/crossref/{crossref5.3.1.xsd → crossref5.4.0.xsd} +286 -88
- commonmeta/resources/crossref/doi_resources5.4.0.xsd +117 -0
- commonmeta/resources/crossref/fundingdata5.4.0.xsd +59 -0
- commonmeta/resources/crossref/fundref.xsd +29 -19
- commonmeta/resources/crossref/languages5.4.0.xsd +8119 -0
- commonmeta/resources/crossref/mediatypes5.4.0.xsd +2207 -0
- commonmeta/resources/crossref/module-ali.xsd +14 -6
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-common.xsd +101 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-content.xsd +683 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-presentation.xsd +2092 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3-strict-content.xsd +186 -0
- commonmeta/resources/crossref/standard-modules/mathml3/mathml3.xsd +9 -0
- commonmeta/resources/crossref/standard-modules/mathml3/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/module-ali.xsd +47 -0
- commonmeta/resources/crossref/standard-modules/xlink.xsd +100 -0
- commonmeta/resources/crossref/standard-modules/xml.xsd +287 -0
- commonmeta/resources/crossref/xml.xsd +287 -0
- commonmeta/schema_utils.py +25 -0
- commonmeta/utils.py +90 -15
- commonmeta/writers/bibtex_writer.py +5 -5
- commonmeta/writers/citation_writer.py +10 -5
- commonmeta/writers/commonmeta_writer.py +5 -17
- commonmeta/writers/crossref_xml_writer.py +1032 -4
- commonmeta/writers/csl_writer.py +6 -6
- commonmeta/writers/datacite_writer.py +11 -6
- commonmeta/writers/inveniordm_writer.py +286 -10
- commonmeta/writers/ris_writer.py +3 -3
- commonmeta/writers/schema_org_writer.py +10 -5
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/METADATA +5 -2
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/RECORD +46 -32
- commonmeta/crossref_utils.py +0 -583
- commonmeta/resources/crossref/common5.3.1.xsd +0 -1538
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/WHEEL +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/entry_points.txt +0 -0
- {commonmeta_py-0.106.dist-info → commonmeta_py-0.108.dist-info}/licenses/LICENSE +0 -0
commonmeta/__init__.py
CHANGED
@@ -10,7 +10,7 @@ commonmeta-py is a Python library to convert scholarly metadata
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
__title__ = "commonmeta-py"
|
13
|
-
__version__ = "0.
|
13
|
+
__version__ = "0.108"
|
14
14
|
__author__ = "Martin Fenner"
|
15
15
|
__license__ = "MIT"
|
16
16
|
|
@@ -54,6 +54,14 @@ from .doi_utils import (
|
|
54
54
|
validate_doi,
|
55
55
|
validate_prefix,
|
56
56
|
)
|
57
|
+
from .file_utils import (
|
58
|
+
download_file,
|
59
|
+
read_file,
|
60
|
+
read_gz_file,
|
61
|
+
read_zip_file,
|
62
|
+
uncompress_content,
|
63
|
+
unzip_content,
|
64
|
+
)
|
57
65
|
from .metadata import Metadata, MetadataList
|
58
66
|
from .readers import (
|
59
67
|
cff_reader,
|
@@ -63,7 +71,7 @@ from .readers import (
|
|
63
71
|
datacite_reader,
|
64
72
|
datacite_xml_reader,
|
65
73
|
inveniordm_reader,
|
66
|
-
|
74
|
+
jsonfeed_reader,
|
67
75
|
kbase_reader,
|
68
76
|
openalex_reader,
|
69
77
|
ris_reader,
|
@@ -75,7 +83,7 @@ from .utils import (
|
|
75
83
|
extract_url,
|
76
84
|
extract_urls,
|
77
85
|
from_csl,
|
78
|
-
|
86
|
+
from_jsonfeed,
|
79
87
|
from_schema_org,
|
80
88
|
get_language,
|
81
89
|
issn_as_url,
|
@@ -97,6 +105,7 @@ from .writers import (
|
|
97
105
|
bibtex_writer,
|
98
106
|
citation_writer,
|
99
107
|
commonmeta_writer,
|
108
|
+
crossref_xml_writer,
|
100
109
|
csl_writer,
|
101
110
|
datacite_writer,
|
102
111
|
ris_writer,
|
commonmeta/api_utils.py
CHANGED
@@ -7,8 +7,9 @@ import jwt
|
|
7
7
|
import requests
|
8
8
|
from furl import furl
|
9
9
|
|
10
|
+
from commonmeta.readers.jsonfeed_reader import get_jsonfeed_uuid
|
11
|
+
|
10
12
|
from .doi_utils import doi_as_url, validate_doi
|
11
|
-
from .readers.json_feed_reader import get_json_feed_item_uuid
|
12
13
|
|
13
14
|
|
14
15
|
def generate_ghost_token(key: str) -> str:
|
@@ -34,7 +35,7 @@ def update_ghost_post_via_api(
|
|
34
35
|
"""Update Ghost post via API"""
|
35
36
|
# get post doi and url from Rogue Scholar API
|
36
37
|
# post url is needed to find post via Ghost API
|
37
|
-
post =
|
38
|
+
post = get_jsonfeed_uuid(_id)
|
38
39
|
if post.get("error", None):
|
39
40
|
return post
|
40
41
|
doi = validate_doi(post.get("doi", None))
|
commonmeta/base_utils.py
CHANGED
@@ -2,10 +2,13 @@
|
|
2
2
|
|
3
3
|
import html
|
4
4
|
import re
|
5
|
+
import uuid
|
6
|
+
from datetime import datetime
|
5
7
|
from os import path
|
6
8
|
from typing import Optional, Union
|
7
9
|
|
8
10
|
import nh3
|
11
|
+
import pydash as py_
|
9
12
|
import xmltodict
|
10
13
|
|
11
14
|
|
@@ -67,8 +70,8 @@ def parse_attributes(
|
|
67
70
|
|
68
71
|
|
69
72
|
def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
|
70
|
-
"""Parse XML into dict. Set default options, and options for Crossref XML"""
|
71
|
-
if string is None:
|
73
|
+
"""Parse XML into dict using xmltodict. Set default options, and options for Crossref XML"""
|
74
|
+
if string is None or string == "{}":
|
72
75
|
return None
|
73
76
|
if path.exists(string):
|
74
77
|
with open(string, encoding="utf-8") as file:
|
@@ -77,7 +80,7 @@ def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
|
|
77
80
|
if kwargs.get("dialect", None) == "crossref":
|
78
81
|
# remove namespaces from xml
|
79
82
|
namespaces = {
|
80
|
-
"http://www.crossref.org/schema/5.
|
83
|
+
"http://www.crossref.org/schema/5.4.0": None,
|
81
84
|
"http://www.crossref.org/qrschema/3.0": None,
|
82
85
|
"http://www.crossref.org/xschema/1.0": None,
|
83
86
|
"http://www.crossref.org/xschema/1.1": None,
|
@@ -93,6 +96,7 @@ def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
|
|
93
96
|
"person_name",
|
94
97
|
"organization",
|
95
98
|
"titles",
|
99
|
+
"abstract",
|
96
100
|
"item",
|
97
101
|
"citation",
|
98
102
|
"program",
|
@@ -105,6 +109,172 @@ def parse_xml(string: Optional[str], **kwargs) -> Optional[Union[dict, list]]:
|
|
105
109
|
return xmltodict.parse(string, **kwargs)
|
106
110
|
|
107
111
|
|
112
|
+
def unparse_xml(input: Optional[dict], **kwargs) -> str:
|
113
|
+
"""Unparse (dump) dict into XML using xmltodict. Set default options, and options for Crossref XML"""
|
114
|
+
if input is None:
|
115
|
+
return None
|
116
|
+
if kwargs.get("dialect", None) == "crossref":
|
117
|
+
# Add additional logic for crossref dialect
|
118
|
+
# add body and root element as wrapping elements
|
119
|
+
type = next(iter(input))
|
120
|
+
attributes = input.get(type)
|
121
|
+
input.pop(type)
|
122
|
+
|
123
|
+
if type == "book":
|
124
|
+
book_metadata = py_.get(input, "book_metadata") or {}
|
125
|
+
input.pop("book_metadata")
|
126
|
+
book_metadata = {**book_metadata, **input}
|
127
|
+
input = {"book": {**attributes, "book_metadata": book_metadata}}
|
128
|
+
elif type == "database":
|
129
|
+
database_metadata = py_.get(input, "database_metadata") or {}
|
130
|
+
input.pop("database_metadata")
|
131
|
+
val = input.pop("publisher_item")
|
132
|
+
institution = input.pop("institution", None)
|
133
|
+
database_metadata = {**{"titles": val}, **database_metadata}
|
134
|
+
database_metadata["institution"] = institution or {}
|
135
|
+
component = input.pop("component", None)
|
136
|
+
input = {
|
137
|
+
"database": {
|
138
|
+
**attributes,
|
139
|
+
"database_metadata": database_metadata,
|
140
|
+
"component_list": {"component": component | input},
|
141
|
+
}
|
142
|
+
}
|
143
|
+
elif type == "journal":
|
144
|
+
journal_metadata = py_.get(input, "journal_metadata") or {}
|
145
|
+
journal_issue = py_.get(input, "journal_issue") or {}
|
146
|
+
journal_article = py_.get(input, "journal_article") or {}
|
147
|
+
input.pop("journal_metadata")
|
148
|
+
input.pop("journal_issue")
|
149
|
+
input.pop("journal_article")
|
150
|
+
input = {
|
151
|
+
"journal": {
|
152
|
+
"journal_metadata": journal_metadata,
|
153
|
+
"journal_issue": journal_issue,
|
154
|
+
"journal_article": journal_article | input,
|
155
|
+
}
|
156
|
+
}
|
157
|
+
elif type == "proceedings_article":
|
158
|
+
proceedings_metadata = py_.get(input, "proceedings_metadata") or {}
|
159
|
+
input.pop("proceedings_metadata")
|
160
|
+
input = {
|
161
|
+
"proceedings": {
|
162
|
+
**attributes,
|
163
|
+
"proceedings_metadata": proceedings_metadata,
|
164
|
+
"conference_paper": input,
|
165
|
+
}
|
166
|
+
}
|
167
|
+
elif type == "sa_component":
|
168
|
+
component = py_.get(input, "component") or {}
|
169
|
+
input.pop("component")
|
170
|
+
input = {
|
171
|
+
"sa_component": {
|
172
|
+
**attributes,
|
173
|
+
"component_list": {"component": component | input},
|
174
|
+
}
|
175
|
+
}
|
176
|
+
else:
|
177
|
+
input = {type: attributes | input}
|
178
|
+
|
179
|
+
doi_batch = {
|
180
|
+
"@xmlns": "http://www.crossref.org/schema/5.4.0",
|
181
|
+
"@version": "5.4.0",
|
182
|
+
"head": get_crossref_xml_head(input),
|
183
|
+
"body": input,
|
184
|
+
}
|
185
|
+
input = {"doi_batch": doi_batch}
|
186
|
+
kwargs["pretty"] = True
|
187
|
+
kwargs["indent"] = " "
|
188
|
+
kwargs.pop("dialect", None)
|
189
|
+
return xmltodict.unparse(input, **kwargs)
|
190
|
+
|
191
|
+
|
192
|
+
def unparse_xml_list(input: Optional[list], **kwargs) -> str:
|
193
|
+
"""Unparse (dump) list into XML using xmltodict. Set default options, and options for Crossref XML"""
|
194
|
+
if input is None:
|
195
|
+
return None
|
196
|
+
if kwargs.get("dialect", None) == "crossref":
|
197
|
+
# Add additional logic for crossref dialect
|
198
|
+
# add body and root element as wrapping elements
|
199
|
+
|
200
|
+
# Group items by type with minimal grouping
|
201
|
+
items_by_type = {}
|
202
|
+
|
203
|
+
for item in wrap(input):
|
204
|
+
type = next(iter(item))
|
205
|
+
attributes = item.get(type)
|
206
|
+
item.pop(type)
|
207
|
+
|
208
|
+
# handle nested book_metadata and journal structure as in unparse_xml
|
209
|
+
if type == "book":
|
210
|
+
book_metadata = py_.get(item, "book_metadata") or {}
|
211
|
+
item.pop("book_metadata")
|
212
|
+
book_metadata = {**book_metadata, **item}
|
213
|
+
item = {"book": {**attributes, "book_metadata": book_metadata}}
|
214
|
+
elif type == "database":
|
215
|
+
database_metadata = py_.get(item, "database_metadata") or {}
|
216
|
+
item.pop("database_metadata")
|
217
|
+
database_metadata = {**database_metadata, **item}
|
218
|
+
item = {
|
219
|
+
"database": {**attributes, "database_metadata": database_metadata}
|
220
|
+
}
|
221
|
+
elif type == "journal":
|
222
|
+
journal_metadata = py_.get(item, "journal_metadata") or {}
|
223
|
+
journal_issue = py_.get(item, "journal_issue") or {}
|
224
|
+
journal_article = py_.get(item, "journal_article") or {}
|
225
|
+
item.pop("journal_metadata")
|
226
|
+
item.pop("journal_issue")
|
227
|
+
item.pop("journal_article")
|
228
|
+
item = {
|
229
|
+
"journal": {
|
230
|
+
"journal_metadata": journal_metadata,
|
231
|
+
"journal_issue": journal_issue,
|
232
|
+
"journal_article": journal_article | item,
|
233
|
+
}
|
234
|
+
}
|
235
|
+
elif type == "sa_component":
|
236
|
+
component = py_.get(input, "component") or {}
|
237
|
+
item.pop("component")
|
238
|
+
item = {
|
239
|
+
"sa_component": {
|
240
|
+
**attributes,
|
241
|
+
"component_list": {"component": component | item},
|
242
|
+
}
|
243
|
+
}
|
244
|
+
else:
|
245
|
+
item = {type: attributes | item}
|
246
|
+
|
247
|
+
# Add item to appropriate type bucket
|
248
|
+
if type not in items_by_type:
|
249
|
+
items_by_type[type] = []
|
250
|
+
items_by_type[type].append(item[type])
|
251
|
+
|
252
|
+
# Create the final structure with body containing all grouped items
|
253
|
+
body_content = {}
|
254
|
+
for type_key, items in items_by_type.items():
|
255
|
+
if len(items) == 1:
|
256
|
+
body_content[type_key] = items[0] # Use single item without array
|
257
|
+
else:
|
258
|
+
body_content[type_key] = items # Use array when multiple items
|
259
|
+
head = kwargs["head"] or {}
|
260
|
+
doi_batch = {
|
261
|
+
"@xmlns": "http://www.crossref.org/schema/5.4.0",
|
262
|
+
"@xmlns:ai": "http://www.crossref.org/AccessIndicators.xsd",
|
263
|
+
"@xmlns:rel": "http://www.crossref.org/relations.xsd",
|
264
|
+
"@xmlns:fr": "http://www.crossref.org/fundref.xsd",
|
265
|
+
"@version": "5.4.0",
|
266
|
+
"head": get_crossref_xml_head(head),
|
267
|
+
"body": body_content,
|
268
|
+
}
|
269
|
+
output = {"doi_batch": doi_batch}
|
270
|
+
|
271
|
+
kwargs["pretty"] = True
|
272
|
+
kwargs["indent"] = " "
|
273
|
+
kwargs.pop("dialect", None)
|
274
|
+
kwargs.pop("head", None)
|
275
|
+
return xmltodict.unparse(output, **kwargs)
|
276
|
+
|
277
|
+
|
108
278
|
def sanitize(text: str, **kwargs) -> str:
|
109
279
|
"""Sanitize text"""
|
110
280
|
# default whitelisted HTML tags
|
@@ -122,3 +292,16 @@ def sanitize(text: str, **kwargs) -> str:
|
|
122
292
|
string = nh3.clean(text, tags=tags, attributes=attributes, link_rel=None)
|
123
293
|
# remove excessive internal whitespace
|
124
294
|
return " ".join(re.split(r"\s+", string, flags=re.UNICODE))
|
295
|
+
|
296
|
+
|
297
|
+
def get_crossref_xml_head(metadata: dict) -> dict:
|
298
|
+
"""Get head element for Crossref XML"""
|
299
|
+
return {
|
300
|
+
"doi_batch_id": str(uuid.uuid4()),
|
301
|
+
"timestamp": datetime.now().strftime("%Y%m%d%H%M%S"),
|
302
|
+
"depositor": {
|
303
|
+
"depositor_name": metadata.get("depositor", None) or "test",
|
304
|
+
"email_address": metadata.get("email", None) or "info@example.org",
|
305
|
+
},
|
306
|
+
"registrant": metadata.get("registrant", None) or "test",
|
307
|
+
}
|
commonmeta/cli.py
CHANGED
@@ -2,16 +2,12 @@ import time
|
|
2
2
|
|
3
3
|
import click
|
4
4
|
import orjson as json
|
5
|
-
import pydash as py_
|
6
5
|
|
7
6
|
from commonmeta import Metadata, MetadataList # __version__
|
8
7
|
from commonmeta.api_utils import update_ghost_post_via_api
|
9
8
|
from commonmeta.doi_utils import decode_doi, encode_doi, validate_prefix
|
10
9
|
from commonmeta.readers.crossref_reader import get_random_crossref_id
|
11
10
|
from commonmeta.readers.datacite_reader import get_random_datacite_id
|
12
|
-
from commonmeta.readers.json_feed_reader import (
|
13
|
-
get_json_feed_item_uuid,
|
14
|
-
)
|
15
11
|
from commonmeta.readers.openalex_reader import get_random_openalex_id
|
16
12
|
|
17
13
|
|
@@ -46,6 +42,49 @@ def convert(
|
|
46
42
|
email,
|
47
43
|
registrant,
|
48
44
|
show_errors,
|
45
|
+
):
|
46
|
+
metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
|
47
|
+
if show_errors and not metadata.is_valid:
|
48
|
+
raise click.ClickException(str(metadata.errors))
|
49
|
+
|
50
|
+
click.echo(
|
51
|
+
metadata.write(
|
52
|
+
to=to,
|
53
|
+
style=style,
|
54
|
+
locale=locale,
|
55
|
+
depositor=depositor,
|
56
|
+
email=email,
|
57
|
+
registrant=registrant,
|
58
|
+
)
|
59
|
+
)
|
60
|
+
if show_errors and metadata.write_errors:
|
61
|
+
raise click.ClickException(str(metadata.write_errors))
|
62
|
+
|
63
|
+
|
64
|
+
@cli.command()
|
65
|
+
@click.argument("input", type=str, required=True)
|
66
|
+
@click.option("--via", "-f", type=str, default=None)
|
67
|
+
@click.option("--to", "-t", type=str, default="commonmeta")
|
68
|
+
@click.option("--style", "-s", type=str, default="apa")
|
69
|
+
@click.option("--locale", "-l", type=str, default="en-US")
|
70
|
+
@click.option("--doi", type=str)
|
71
|
+
@click.option("--prefix", type=str)
|
72
|
+
@click.option("--depositor", type=str)
|
73
|
+
@click.option("--email", type=str)
|
74
|
+
@click.option("--registrant", type=str)
|
75
|
+
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
76
|
+
def put(
|
77
|
+
input,
|
78
|
+
via,
|
79
|
+
to,
|
80
|
+
style,
|
81
|
+
locale,
|
82
|
+
doi,
|
83
|
+
prefix,
|
84
|
+
depositor,
|
85
|
+
email,
|
86
|
+
registrant,
|
87
|
+
show_errors,
|
49
88
|
):
|
50
89
|
metadata = Metadata(input, via=via, doi=doi, prefix=prefix)
|
51
90
|
if show_errors and not metadata.is_valid:
|
@@ -75,8 +114,7 @@ def convert(
|
|
75
114
|
@click.option("--depositor", type=str)
|
76
115
|
@click.option("--email", type=str)
|
77
116
|
@click.option("--registrant", type=str)
|
78
|
-
@click.option("--
|
79
|
-
@click.option("--jsonlines/--no-jsonlines", type=bool, show_default=True, default=False)
|
117
|
+
@click.option("--file", type=str)
|
80
118
|
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
81
119
|
@click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
|
82
120
|
def list(
|
@@ -89,8 +127,69 @@ def list(
|
|
89
127
|
depositor,
|
90
128
|
email,
|
91
129
|
registrant,
|
92
|
-
|
93
|
-
|
130
|
+
file,
|
131
|
+
show_errors,
|
132
|
+
show_timer,
|
133
|
+
):
|
134
|
+
start = time.time()
|
135
|
+
metadata_list = MetadataList(
|
136
|
+
string,
|
137
|
+
via=via,
|
138
|
+
file=file,
|
139
|
+
depositor=depositor,
|
140
|
+
email=email,
|
141
|
+
registrant=registrant,
|
142
|
+
prefix=prefix,
|
143
|
+
)
|
144
|
+
end = time.time()
|
145
|
+
runtime = end - start
|
146
|
+
if show_errors and not metadata_list.is_valid:
|
147
|
+
raise click.ClickException(str(metadata_list.errors))
|
148
|
+
if file:
|
149
|
+
metadata_list.write(to=to, style=style, locale=locale)
|
150
|
+
else:
|
151
|
+
click.echo(metadata_list.write(to=to, style=style, locale=locale))
|
152
|
+
|
153
|
+
if show_errors and len(metadata_list.write_errors) > 0:
|
154
|
+
raise click.ClickException(str(metadata_list.write_errors))
|
155
|
+
if show_timer:
|
156
|
+
click.echo(f"Runtime: {runtime:.2f} seconds")
|
157
|
+
|
158
|
+
|
159
|
+
@cli.command()
|
160
|
+
@click.argument("string", type=str, required=True)
|
161
|
+
@click.option("--via", "-f", type=str)
|
162
|
+
@click.option("--to", "-t", type=str, default="commonmeta")
|
163
|
+
@click.option("--style", "-s", type=str, default="apa")
|
164
|
+
@click.option("--locale", "-l", type=str, default="en-US")
|
165
|
+
@click.option("--prefix", type=str)
|
166
|
+
@click.option("--depositor", type=str)
|
167
|
+
@click.option("--email", type=str)
|
168
|
+
@click.option("--registrant", type=str)
|
169
|
+
@click.option("--login_id", type=str)
|
170
|
+
@click.option("--login_passwd", type=str)
|
171
|
+
@click.option("--host", type=str)
|
172
|
+
@click.option("--token", type=str)
|
173
|
+
@click.option("--legacy-key", type=str)
|
174
|
+
@click.option("--file", type=str)
|
175
|
+
@click.option("--show-errors/--no-errors", type=bool, show_default=True, default=False)
|
176
|
+
@click.option("--show-timer/--no-timer", type=bool, show_default=True, default=False)
|
177
|
+
def push(
|
178
|
+
string,
|
179
|
+
via,
|
180
|
+
to,
|
181
|
+
style,
|
182
|
+
locale,
|
183
|
+
prefix,
|
184
|
+
depositor,
|
185
|
+
email,
|
186
|
+
registrant,
|
187
|
+
login_id,
|
188
|
+
login_passwd,
|
189
|
+
host,
|
190
|
+
token,
|
191
|
+
legacy_key,
|
192
|
+
file,
|
94
193
|
show_errors,
|
95
194
|
show_timer,
|
96
195
|
):
|
@@ -98,18 +197,22 @@ def list(
|
|
98
197
|
metadata_list = MetadataList(
|
99
198
|
string,
|
100
199
|
via=via,
|
200
|
+
file=file,
|
101
201
|
depositor=depositor,
|
102
202
|
email=email,
|
103
203
|
registrant=registrant,
|
204
|
+
login_id=login_id,
|
205
|
+
login_passwd=login_passwd,
|
206
|
+
host=host,
|
207
|
+
token=token,
|
104
208
|
prefix=prefix,
|
105
|
-
filename=filename,
|
106
|
-
jsonlines=jsonlines,
|
107
209
|
)
|
108
210
|
end = time.time()
|
109
211
|
runtime = end - start
|
110
212
|
if show_errors and not metadata_list.is_valid:
|
111
213
|
raise click.ClickException(str(metadata_list.errors))
|
112
|
-
|
214
|
+
|
215
|
+
click.echo(metadata_list.push(to=to, style=style, locale=locale))
|
113
216
|
if show_errors and len(metadata_list.write_errors) > 0:
|
114
217
|
raise click.ClickException(str(metadata_list.write_errors))
|
115
218
|
if show_timer:
|
@@ -167,29 +270,6 @@ def decode(doi):
|
|
167
270
|
click.echo(output)
|
168
271
|
|
169
272
|
|
170
|
-
@cli.command()
|
171
|
-
@click.argument("id", type=str, required=True)
|
172
|
-
def encode_by_id(id):
|
173
|
-
post = get_json_feed_item_uuid(id)
|
174
|
-
prefix = py_.get(post, "blog.prefix")
|
175
|
-
if validate_prefix(prefix) is None:
|
176
|
-
return None
|
177
|
-
output = encode_doi(prefix)
|
178
|
-
click.echo(output)
|
179
|
-
|
180
|
-
|
181
|
-
@cli.command()
|
182
|
-
@click.argument("filter", type=str, required=True, default="unregistered")
|
183
|
-
@click.option("--id", type=str)
|
184
|
-
def json_feed(filter, id=None):
|
185
|
-
if filter == "blog_slug" and id is not None:
|
186
|
-
post = get_json_feed_item_uuid(id)
|
187
|
-
output = py_.get(post, "blog.slug", "no slug found")
|
188
|
-
else:
|
189
|
-
output = "no filter specified"
|
190
|
-
click.echo(output)
|
191
|
-
|
192
|
-
|
193
273
|
@cli.command()
|
194
274
|
@click.argument("id", type=str, required=True)
|
195
275
|
@click.option("--api-key", "-k", type=str, required=True)
|
commonmeta/constants.py
CHANGED
@@ -190,6 +190,7 @@ CM_TO_CR_TRANSLATIONS = {
|
|
190
190
|
"JournalIssue": "JournalIssue",
|
191
191
|
"JournalVolume": "JournalVolume",
|
192
192
|
"Journal": "Journal",
|
193
|
+
"PeerReview": "PeerReview",
|
193
194
|
"ProceedingsArticle": "ProceedingsArticle",
|
194
195
|
"ProceedingsSeries": "ProceedingsSeries",
|
195
196
|
"Proceedings": "Proceedings",
|
@@ -698,3 +699,22 @@ ROR_TO_CROSSREF_FUNDER_ID_TRANSLATIONS = {
|
|
698
699
|
"https://ror.org/00yjd3n13": "https://doi.org/10.13039/501100001711",
|
699
700
|
"https://ror.org/04wfr2810": "https://doi.org/10.13039/501100003043",
|
700
701
|
}
|
702
|
+
|
703
|
+
COMMUNITY_TRANSLATIONS = {
|
704
|
+
"ai": "artificialintelligence",
|
705
|
+
"llms": "artificialintelligence",
|
706
|
+
"book%20review": "bookreview",
|
707
|
+
"bjps%20review%20of%20books": "bookreview",
|
708
|
+
"books": "bookreview",
|
709
|
+
"nachrichten": "news",
|
710
|
+
"opencitations": "researchassessment",
|
711
|
+
"papers": "researchblogging",
|
712
|
+
"urheberrecht": "copyright",
|
713
|
+
"workshop": "events",
|
714
|
+
"veranstaltungen": "events",
|
715
|
+
"veranstaltungshinweise": "events",
|
716
|
+
"asapbio": "preprints",
|
717
|
+
"biorxiv": "preprints",
|
718
|
+
"runiverse": "r",
|
719
|
+
"bericht": "report",
|
720
|
+
}
|
commonmeta/file_utils.py
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
"""File utils module for commonmeta-py"""
|
2
|
+
|
3
|
+
import gzip
|
4
|
+
import io
|
5
|
+
import zipfile
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Optional, Union
|
8
|
+
|
9
|
+
import requests
|
10
|
+
|
11
|
+
|
12
|
+
def read_file(filename: str) -> bytes:
|
13
|
+
with open(filename, "rb") as f:
|
14
|
+
return f.read()
|
15
|
+
|
16
|
+
|
17
|
+
def uncompress_content(input: bytes) -> bytes:
|
18
|
+
with gzip.GzipFile(fileobj=io.BytesIO(input)) as gz:
|
19
|
+
return gz.read()
|
20
|
+
|
21
|
+
|
22
|
+
def unzip_content(input: bytes, filename: Optional[str] = None) -> bytes:
|
23
|
+
output = b""
|
24
|
+
with zipfile.ZipFile(io.BytesIO(input)) as zf:
|
25
|
+
for info in zf.infolist():
|
26
|
+
if filename and info.filename != filename:
|
27
|
+
continue
|
28
|
+
with zf.open(info) as file:
|
29
|
+
output += file.read()
|
30
|
+
return output
|
31
|
+
|
32
|
+
|
33
|
+
def read_gz_file(filename: str) -> bytes:
|
34
|
+
input_bytes = read_file(filename)
|
35
|
+
return uncompress_content(input_bytes)
|
36
|
+
|
37
|
+
|
38
|
+
def read_zip_file(filename: str, name: Optional[str] = None) -> bytes:
|
39
|
+
input_bytes = read_file(filename)
|
40
|
+
return unzip_content(input_bytes, name)
|
41
|
+
|
42
|
+
|
43
|
+
def download_file(url: str) -> bytes:
|
44
|
+
resp = requests.get(url, stream=True)
|
45
|
+
resp.raise_for_status()
|
46
|
+
return resp.content
|
47
|
+
# # Progress bar
|
48
|
+
# total = int(resp.headers.get("content-length", 0))
|
49
|
+
|
50
|
+
# buf = io.BytesIO()
|
51
|
+
# with tqdm(total=total, unit="B", unit_scale=True, desc="downloading") as bar:
|
52
|
+
# for chunk in resp.iter_content(chunk_size=8192):
|
53
|
+
# if chunk:
|
54
|
+
# buf.write(chunk)
|
55
|
+
# bar.update(len(chunk))
|
56
|
+
# return buf.getvalue()
|
57
|
+
|
58
|
+
|
59
|
+
def write_file(filename: str, output: bytes) -> None:
|
60
|
+
with open(filename, "xb") as f:
|
61
|
+
f.write(output)
|
62
|
+
|
63
|
+
|
64
|
+
def write_gz_file(filename: str, output: bytes) -> None:
|
65
|
+
with gzip.open(filename, "xb") as gzfile:
|
66
|
+
gzfile.write(output)
|
67
|
+
|
68
|
+
|
69
|
+
def write_zip_file(filename: str, output: bytes) -> None:
|
70
|
+
path = Path(filename)
|
71
|
+
with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zipf:
|
72
|
+
zipf.writestr(path.name, output)
|
73
|
+
|
74
|
+
|
75
|
+
def get_extension(filename: str) -> tuple[str, str, Optional[str]]:
|
76
|
+
"""Extract extension and compression from filename"""
|
77
|
+
extension = Path(filename).suffix
|
78
|
+
if extension == ".gz":
|
79
|
+
compress = ".gz"
|
80
|
+
filename = filename[:-3]
|
81
|
+
extension = Path(filename).suffix
|
82
|
+
elif extension == ".zip":
|
83
|
+
compress = ".zip"
|
84
|
+
filename = filename[:-4]
|
85
|
+
extension = Path(filename).suffix
|
86
|
+
elif extension == "":
|
87
|
+
compress = None
|
88
|
+
filename = filename + ".json"
|
89
|
+
extension = ".json"
|
90
|
+
else:
|
91
|
+
compress = None
|
92
|
+
return filename, extension, compress
|
93
|
+
|
94
|
+
|
95
|
+
def write_output(filename: str, input: Union[bytes, str], ext: list[str]) -> None:
|
96
|
+
"""Write output to file with supported extension"""
|
97
|
+
|
98
|
+
# Convert string to bytes if necessary
|
99
|
+
if isinstance(input, str):
|
100
|
+
input = input.encode("utf-8")
|
101
|
+
|
102
|
+
filename, extension, compress = get_extension(filename)
|
103
|
+
if extension not in ext:
|
104
|
+
raise ValueError(
|
105
|
+
f"File format not supported. Please provide a filename with {ext} extension."
|
106
|
+
)
|
107
|
+
if compress == ".gz":
|
108
|
+
write_gz_file(filename + compress, input)
|
109
|
+
elif compress == ".zip":
|
110
|
+
write_zip_file(filename + compress, input)
|
111
|
+
else:
|
112
|
+
write_file(filename, input)
|