dump-things-pyclient 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dump_things_pyclient/commands/dtc_plugins/auto_curate.py +1 -2
- dump_things_pyclient/commands/dtc_plugins/export.py +124 -32
- dump_things_pyclient/commands/dtc_plugins/get_records.py +5 -6
- {dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/METADATA +2 -1
- {dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/RECORD +8 -8
- {dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/WHEEL +0 -0
- {dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/entry_points.txt +0 -0
- {dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,6 @@ from typing import (
|
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
import rich_click as click
|
|
15
|
-
from rich import print as rprint
|
|
16
15
|
from rich.console import Console
|
|
17
16
|
from rich.progress import track
|
|
18
17
|
|
|
@@ -300,7 +299,7 @@ def auto_curate(
|
|
|
300
299
|
return 1
|
|
301
300
|
|
|
302
301
|
if output is not None:
|
|
303
|
-
|
|
302
|
+
click.echo(json.dumps(output, ensure_ascii=False))
|
|
304
303
|
|
|
305
304
|
return 0
|
|
306
305
|
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import json
|
|
2
3
|
import sys
|
|
3
4
|
from collections import defaultdict
|
|
4
|
-
from itertools import
|
|
5
|
+
from itertools import (
|
|
6
|
+
chain,
|
|
7
|
+
count,
|
|
8
|
+
)
|
|
5
9
|
from pathlib import Path
|
|
6
10
|
from typing import (
|
|
7
11
|
Any,
|
|
8
|
-
|
|
12
|
+
Generator,
|
|
9
13
|
)
|
|
10
14
|
|
|
11
15
|
import rich_click as click
|
|
16
|
+
import yaml
|
|
12
17
|
from rich.console import Console
|
|
13
18
|
from rich.progress import track
|
|
14
19
|
|
|
@@ -49,18 +54,34 @@ console = Console(file=sys.stderr)
|
|
|
49
54
|
),
|
|
50
55
|
metavar='DESTINATION_DIR',
|
|
51
56
|
)
|
|
57
|
+
@click.option(
|
|
58
|
+
'--format', '-f', 'output_format',
|
|
59
|
+
type=click.Choice(('json', 'yaml'), case_sensitive=True),
|
|
60
|
+
default='json',
|
|
61
|
+
help='select output format for the exported records (default: json)',
|
|
62
|
+
)
|
|
52
63
|
@click.option(
|
|
53
64
|
'--ignore-errors',
|
|
54
65
|
default=False,
|
|
55
66
|
is_flag=True,
|
|
56
67
|
help='ignore records with missing `schema_type` instead of raising an error',
|
|
57
68
|
)
|
|
69
|
+
@click.option(
|
|
70
|
+
'--keep-schema-type', '-k',
|
|
71
|
+
default=False,
|
|
72
|
+
is_flag=True,
|
|
73
|
+
help='keep `schema_type`-attribute in records on file-system. By default the '
|
|
74
|
+
'schema_type-attribute is removed because the class is encoded in the '
|
|
75
|
+
'storage path of the records.'
|
|
76
|
+
)
|
|
58
77
|
def cli(
|
|
59
78
|
obj: Any,
|
|
60
79
|
service_url: str,
|
|
61
80
|
collection: str,
|
|
62
81
|
destination: Path,
|
|
63
|
-
|
|
82
|
+
output_format: str,
|
|
83
|
+
ignore_errors: bool,
|
|
84
|
+
keep_schema_type: bool,
|
|
64
85
|
):
|
|
65
86
|
"""Export a collection to disk
|
|
66
87
|
|
|
@@ -79,7 +100,9 @@ def cli(
|
|
|
79
100
|
service_url,
|
|
80
101
|
collection,
|
|
81
102
|
destination,
|
|
103
|
+
output_format,
|
|
82
104
|
ignore_errors,
|
|
105
|
+
keep_schema_type,
|
|
83
106
|
)
|
|
84
107
|
except HTTPError as e:
|
|
85
108
|
console.print(f'[red]Error[/red]: {e}: {e.response.text}')
|
|
@@ -93,7 +116,9 @@ def export(
|
|
|
93
116
|
service_url: str,
|
|
94
117
|
collection: str,
|
|
95
118
|
destination: Path,
|
|
119
|
+
output_format: str,
|
|
96
120
|
ignore_errors: bool,
|
|
121
|
+
keep_schema_type: bool,
|
|
97
122
|
):
|
|
98
123
|
token = obj
|
|
99
124
|
|
|
@@ -125,17 +150,17 @@ def export(
|
|
|
125
150
|
|
|
126
151
|
console.print('Exporting records from curated area')
|
|
127
152
|
_store_records(
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
token=token,
|
|
134
|
-
session=session,
|
|
135
|
-
)
|
|
153
|
+
curated_read_records(
|
|
154
|
+
service_url=service_url,
|
|
155
|
+
collection=collection,
|
|
156
|
+
token=token,
|
|
157
|
+
session=session,
|
|
136
158
|
),
|
|
137
159
|
curated_destination,
|
|
160
|
+
output_format,
|
|
138
161
|
ignore_errors,
|
|
162
|
+
keep_schema_type,
|
|
163
|
+
source_name='curated area',
|
|
139
164
|
)
|
|
140
165
|
|
|
141
166
|
# Store the incoming records
|
|
@@ -149,55 +174,122 @@ def export(
|
|
|
149
174
|
incoming_destination = destination / 'incoming' / label
|
|
150
175
|
incoming_destination.mkdir(parents=True, exist_ok=False)
|
|
151
176
|
_store_records(
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
token=token,
|
|
159
|
-
session=session,
|
|
160
|
-
)
|
|
177
|
+
incoming_read_records(
|
|
178
|
+
service_url=service_url,
|
|
179
|
+
collection=collection,
|
|
180
|
+
label=label,
|
|
181
|
+
token=token,
|
|
182
|
+
session=session,
|
|
161
183
|
),
|
|
162
184
|
incoming_destination,
|
|
185
|
+
output_format,
|
|
163
186
|
ignore_errors,
|
|
187
|
+
keep_schema_type,
|
|
188
|
+
source_name=f'incoming area: {label}'
|
|
164
189
|
)
|
|
165
190
|
|
|
166
191
|
return 0
|
|
167
192
|
|
|
168
193
|
|
|
169
194
|
def _store_records(
|
|
170
|
-
source:
|
|
195
|
+
source: Generator,
|
|
171
196
|
destination: Path,
|
|
172
|
-
|
|
197
|
+
output_format: str,
|
|
198
|
+
ignore_errors: bool,
|
|
199
|
+
keep_schema_type: bool,
|
|
200
|
+
source_name: str,
|
|
173
201
|
):
|
|
174
202
|
created_dirs = set()
|
|
175
203
|
class_counters = defaultdict(count)
|
|
176
204
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
205
|
+
# Get the first result from the source to determine the total number
|
|
206
|
+
# of records.
|
|
207
|
+
try:
|
|
208
|
+
first_tuple = next(source)
|
|
209
|
+
except StopIteration:
|
|
210
|
+
console.print(f'no records in incoming [green]{source_name}[/green], skipping it')
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
total = first_tuple[4]
|
|
214
|
+
for record, _, _, _, _ in track(chain([first_tuple], source), total=total, console=console):
|
|
215
|
+
schema_type = record.get('schema_type', None)
|
|
216
|
+
if schema_type is None:
|
|
180
217
|
if ignore_errors:
|
|
181
|
-
console.print(f'[red]Error[/red]: no `schema type` in record {record["pid"]}')
|
|
218
|
+
console.print(f'[red]Error[/red]: no `schema type` in record [red]{record["pid"]}[/red] in {source_name}')
|
|
182
219
|
continue
|
|
183
220
|
msg = f'no `schema_type` in record {record["pid"]}'
|
|
184
221
|
raise ValueError(msg)
|
|
185
222
|
|
|
186
|
-
|
|
223
|
+
class_name = _de_prefix(schema_type)
|
|
224
|
+
if not keep_schema_type:
|
|
225
|
+
del record['schema_type']
|
|
226
|
+
|
|
227
|
+
hash_dir, hash_name = _hash_p3(record['pid'])
|
|
187
228
|
file_dir, file_name = (
|
|
188
|
-
destination / class_name /
|
|
189
|
-
|
|
229
|
+
destination / class_name / hash_dir,
|
|
230
|
+
hash_name,
|
|
190
231
|
)
|
|
191
232
|
if file_dir not in created_dirs:
|
|
192
233
|
file_dir.mkdir(parents=True, exist_ok=False)
|
|
193
234
|
created_dirs.add(file_dir)
|
|
194
235
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
236
|
+
try:
|
|
237
|
+
writer[output_format](
|
|
238
|
+
file_dir=file_dir,
|
|
239
|
+
file_name=file_name,
|
|
240
|
+
record=record,
|
|
241
|
+
)
|
|
242
|
+
except KeyError as e:
|
|
243
|
+
msg = f'unsupported output format: {output_format}'
|
|
244
|
+
raise ValueError(msg)
|
|
198
245
|
|
|
199
246
|
|
|
200
247
|
def _de_prefix(
|
|
201
248
|
name: str,
|
|
202
249
|
):
|
|
203
250
|
return name.split(':', 1)[-1]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _get_hex_digest(
|
|
254
|
+
data: str,
|
|
255
|
+
) -> str:
|
|
256
|
+
hash_context = hashlib.md5(data.encode())
|
|
257
|
+
return hash_context.hexdigest()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _hash_p3(
|
|
261
|
+
pid: str,
|
|
262
|
+
) -> tuple[str, str]:
|
|
263
|
+
hex_digest = _get_hex_digest(pid)
|
|
264
|
+
return hex_digest[:3], hex_digest[3:]
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def write_json(
|
|
268
|
+
file_dir: Path,
|
|
269
|
+
file_name: str,
|
|
270
|
+
record: dict,
|
|
271
|
+
):
|
|
272
|
+
(file_dir / (file_name + '.json')).write_text(
|
|
273
|
+
json.dumps(record, indent=2, ensure_ascii=False) + '\n',
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def write_yaml(
|
|
278
|
+
file_dir: Path,
|
|
279
|
+
file_name: str,
|
|
280
|
+
record: dict,
|
|
281
|
+
):
|
|
282
|
+
(file_dir / (file_name + '.yaml')).write_text(
|
|
283
|
+
yaml.dump(
|
|
284
|
+
data=record,
|
|
285
|
+
sort_keys=False,
|
|
286
|
+
allow_unicode=True,
|
|
287
|
+
default_flow_style=False,
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
writer = {
|
|
293
|
+
'json': write_json,
|
|
294
|
+
'yaml': write_yaml,
|
|
295
|
+
}
|
|
@@ -68,13 +68,13 @@ console = Console(file=sys.stderr)
|
|
|
68
68
|
@click.option(
|
|
69
69
|
'--page-size', '-s',
|
|
70
70
|
type=click.IntRange(1, 100),
|
|
71
|
-
default=
|
|
71
|
+
default=None,
|
|
72
72
|
help='set the page size (default: 100). (ignored if "--pid" is provided)'
|
|
73
73
|
)
|
|
74
74
|
@click.option(
|
|
75
75
|
'--first-page', '-F',
|
|
76
76
|
type=click.INT,
|
|
77
|
-
default=
|
|
77
|
+
default=None,
|
|
78
78
|
help='the first page to return (default: 1). (ignored if "--pid" is provided)'
|
|
79
79
|
)
|
|
80
80
|
@click.option(
|
|
@@ -206,9 +206,8 @@ def get_records(
|
|
|
206
206
|
(cls, '-c/--class'),
|
|
207
207
|
):
|
|
208
208
|
if argument_value:
|
|
209
|
-
|
|
210
|
-
f'
|
|
211
|
-
err=True,
|
|
209
|
+
console.print(
|
|
210
|
+
f'[yellow]Warning[/yellow]: {argument_name} ignored because "-p/--pid" is provided',
|
|
212
211
|
)
|
|
213
212
|
|
|
214
213
|
kwargs['pid'] = pid
|
|
@@ -242,7 +241,7 @@ def get_records(
|
|
|
242
241
|
else:
|
|
243
242
|
kwargs.update(dict(
|
|
244
243
|
matching=matching,
|
|
245
|
-
page=first_page,
|
|
244
|
+
page=first_page or 1,
|
|
246
245
|
size=page_size or 100,
|
|
247
246
|
last_page=last_page,
|
|
248
247
|
))
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dump-things-pyclient
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: A client library and CLI commands for dump-things-services
|
|
5
5
|
Author-email: Christian Mönch <christian.moench@web.de>
|
|
6
6
|
Requires-Python: >=3.11
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
Requires-Dist: click>=8.3.1
|
|
9
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
9
10
|
Requires-Dist: requests>=2.32.5
|
|
10
11
|
Requires-Dist: rich-click>=1.9.6
|
|
11
12
|
Provides-Extra: ttl
|
|
@@ -5,17 +5,17 @@ dump_things_pyclient/commands/dtc.py,sha256=dxW5RuogqwhzfVujZ_EEsQMk8BcVMbZyMdg5
|
|
|
5
5
|
dump_things_pyclient/commands/json2ttl.py,sha256=8BkvdjLWZ_H0L6fTmuR2M2MglKiMUiuNUcuWr_w6_dQ,2133
|
|
6
6
|
dump_things_pyclient/commands/redirect.py,sha256=kl8pGj8khjxk4lhk8AJLfgtCIm5PtjeMAl0J6K5FB7M,264
|
|
7
7
|
dump_things_pyclient/commands/dtc_plugins/__init__.py,sha256=0YLByLiofhHkhJcDCkokldcCw3Jj0rsKJinRX4tt3Hc,514
|
|
8
|
-
dump_things_pyclient/commands/dtc_plugins/auto_curate.py,sha256=
|
|
8
|
+
dump_things_pyclient/commands/dtc_plugins/auto_curate.py,sha256=fWylrzsrBWzJthWJNd_NAy3KSfxKfdFQun6EACpymnw,9968
|
|
9
9
|
dump_things_pyclient/commands/dtc_plugins/clean_incoming.py,sha256=ikSPNTt254ax2tXhMK_gTgDCVkxMYJ_0NTAP8XsaRjk,2188
|
|
10
10
|
dump_things_pyclient/commands/dtc_plugins/delete_records.py,sha256=SRQTHz4cWofI-RVx_p_mUex3amTaGZ9xP_S4F12Pw64,3849
|
|
11
|
-
dump_things_pyclient/commands/dtc_plugins/export.py,sha256=
|
|
12
|
-
dump_things_pyclient/commands/dtc_plugins/get_records.py,sha256=
|
|
11
|
+
dump_things_pyclient/commands/dtc_plugins/export.py,sha256=Nbq-o1hq_6ZroBctKxzMyA1BSBnoqIkpSY8BO1aWuoA,7522
|
|
12
|
+
dump_things_pyclient/commands/dtc_plugins/get_records.py,sha256=5yzkZFrYIlN6O-QmAZjr6L5SFhpncC6lgQaqgZzg_TE,7614
|
|
13
13
|
dump_things_pyclient/commands/dtc_plugins/list_incoming.py,sha256=tmM0Qs4MVwMMLyERsWCxWGTM90rSNOShLpHH32wObd8,1959
|
|
14
14
|
dump_things_pyclient/commands/dtc_plugins/maintenance.py,sha256=yTw1T_cvVTmwuzrTPteu6O6qiNCMxL5ZQoVF8yb72-M,1707
|
|
15
15
|
dump_things_pyclient/commands/dtc_plugins/post_records.py,sha256=0676miD7VTMmokBAo7JdA9Dr9FZwhs0auNuRltulcBw,3469
|
|
16
16
|
dump_things_pyclient/commands/dtc_plugins/read_pages.py,sha256=Libxf36L-0wUqAqfavotZPRMy5LjWJ37n_zSae1TgTA,3546
|
|
17
|
-
dump_things_pyclient-0.2.
|
|
18
|
-
dump_things_pyclient-0.2.
|
|
19
|
-
dump_things_pyclient-0.2.
|
|
20
|
-
dump_things_pyclient-0.2.
|
|
21
|
-
dump_things_pyclient-0.2.
|
|
17
|
+
dump_things_pyclient-0.2.8.dist-info/METADATA,sha256=KAgBvgq-AlPkjmh-r5KwXkGNw9Wp0emCH3A8OCFNXJA,1028
|
|
18
|
+
dump_things_pyclient-0.2.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
19
|
+
dump_things_pyclient-0.2.8.dist-info/entry_points.txt,sha256=U1QhQtk767G_OXdZwPdTXYbIPfcDU13Z2u1d6exX8uE,470
|
|
20
|
+
dump_things_pyclient-0.2.8.dist-info/top_level.txt,sha256=Asvruw-SyLoYhWis1CFOx89RGxpjXoTZVGoq4JSGt88,21
|
|
21
|
+
dump_things_pyclient-0.2.8.dist-info/RECORD,,
|
|
File without changes
|
{dump_things_pyclient-0.2.6.dist-info → dump_things_pyclient-0.2.8.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|