dump-things-pyclient 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,6 @@ from typing import (
12
12
  )
13
13
 
14
14
  import rich_click as click
15
- from rich import print as rprint
16
15
  from rich.console import Console
17
16
  from rich.progress import track
18
17
 
@@ -300,7 +299,7 @@ def auto_curate(
300
299
  return 1
301
300
 
302
301
  if output is not None:
303
- rprint(json.dumps(output, ensure_ascii=False))
302
+ click.echo(json.dumps(output, ensure_ascii=False))
304
303
 
305
304
  return 0
306
305
 
@@ -1,14 +1,19 @@
1
+ import hashlib
1
2
  import json
2
3
  import sys
3
4
  from collections import defaultdict
4
- from itertools import count
5
+ from itertools import (
6
+ chain,
7
+ count,
8
+ )
5
9
  from pathlib import Path
6
10
  from typing import (
7
11
  Any,
8
- Iterable,
12
+ Generator,
9
13
  )
10
14
 
11
15
  import rich_click as click
16
+ import yaml
12
17
  from rich.console import Console
13
18
  from rich.progress import track
14
19
 
@@ -49,18 +54,34 @@ console = Console(file=sys.stderr)
49
54
  ),
50
55
  metavar='DESTINATION_DIR',
51
56
  )
57
+ @click.option(
58
+ '--format', '-f', 'output_format',
59
+ type=click.Choice(('json', 'yaml'), case_sensitive=True),
60
+ default='json',
61
+ help='select output format for the exported records (default: json)',
62
+ )
52
63
  @click.option(
53
64
  '--ignore-errors',
54
65
  default=False,
55
66
  is_flag=True,
56
67
  help='ignore records with missing `schema_type` instead of raising an error',
57
68
  )
69
+ @click.option(
70
+ '--keep-schema-type', '-k',
71
+ default=False,
72
+ is_flag=True,
73
+ help='keep `schema_type`-attribute in records on file-system. By default the '
74
+ 'schema_type-attribute is removed because the class is encoded in the '
75
+ 'storage path of the records.'
76
+ )
58
77
  def cli(
59
78
  obj: Any,
60
79
  service_url: str,
61
80
  collection: str,
62
81
  destination: Path,
63
- ignore_errors,
82
+ output_format: str,
83
+ ignore_errors: bool,
84
+ keep_schema_type: bool,
64
85
  ):
65
86
  """Export a collection to disk
66
87
 
@@ -79,7 +100,9 @@ def cli(
79
100
  service_url,
80
101
  collection,
81
102
  destination,
103
+ output_format,
82
104
  ignore_errors,
105
+ keep_schema_type,
83
106
  )
84
107
  except HTTPError as e:
85
108
  console.print(f'[red]Error[/red]: {e}: {e.response.text}')
@@ -93,7 +116,9 @@ def export(
93
116
  service_url: str,
94
117
  collection: str,
95
118
  destination: Path,
119
+ output_format: str,
96
120
  ignore_errors: bool,
121
+ keep_schema_type: bool,
97
122
  ):
98
123
  token = obj
99
124
 
@@ -125,17 +150,17 @@ def export(
125
150
 
126
151
  console.print('Exporting records from curated area')
127
152
  _store_records(
128
- map(
129
- lambda x: x[0],
130
- curated_read_records(
131
- service_url=service_url,
132
- collection=collection,
133
- token=token,
134
- session=session,
135
- )
153
+ curated_read_records(
154
+ service_url=service_url,
155
+ collection=collection,
156
+ token=token,
157
+ session=session,
136
158
  ),
137
159
  curated_destination,
160
+ output_format,
138
161
  ignore_errors,
162
+ keep_schema_type,
163
+ source_name='curated area',
139
164
  )
140
165
 
141
166
  # Store the incoming records
@@ -149,55 +174,122 @@ def export(
149
174
  incoming_destination = destination / 'incoming' / label
150
175
  incoming_destination.mkdir(parents=True, exist_ok=False)
151
176
  _store_records(
152
- map(
153
- lambda x: x[0],
154
- incoming_read_records(
155
- service_url=service_url,
156
- collection=collection,
157
- label=label,
158
- token=token,
159
- session=session,
160
- )
177
+ incoming_read_records(
178
+ service_url=service_url,
179
+ collection=collection,
180
+ label=label,
181
+ token=token,
182
+ session=session,
161
183
  ),
162
184
  incoming_destination,
185
+ output_format,
163
186
  ignore_errors,
187
+ keep_schema_type,
188
+ source_name=f'incoming area: {label}'
164
189
  )
165
190
 
166
191
  return 0
167
192
 
168
193
 
169
194
  def _store_records(
170
- source: Iterable,
195
+ source: Generator,
171
196
  destination: Path,
172
- ignore_errors: bool = False,
197
+ output_format: str,
198
+ ignore_errors: bool,
199
+ keep_schema_type: bool,
200
+ source_name: str,
173
201
  ):
174
202
  created_dirs = set()
175
203
  class_counters = defaultdict(count)
176
204
 
177
- for record in track(source, console=console):
178
- class_name = _de_prefix(record.get('schema_type', None))
179
- if class_name is None:
205
+ # Get the first result from the source to determine the total number
206
+ # of records.
207
+ try:
208
+ first_tuple = next(source)
209
+ except StopIteration:
210
+ console.print(f'no records in incoming [green]{source_name}[/green], skipping it')
211
+ return
212
+
213
+ total = first_tuple[4]
214
+ for record, _, _, _, _ in track(chain([first_tuple], source), total=total, console=console):
215
+ schema_type = record.get('schema_type', None)
216
+ if schema_type is None:
180
217
  if ignore_errors:
181
- console.print(f'[red]Error[/red]: no `schema type` in record {record["pid"]}')
218
+ console.print(f'[red]Error[/red]: no `schema type` in record [red]{record["pid"]}[/red] in {source_name}')
182
219
  continue
183
220
  msg = f'no `schema_type` in record {record["pid"]}'
184
221
  raise ValueError(msg)
185
222
 
186
- next_name_for_class = f'{next(class_counters[class_name]):09d}.json'
223
+ class_name = _de_prefix(schema_type)
224
+ if not keep_schema_type:
225
+ del record['schema_type']
226
+
227
+ hash_dir, hash_name = _hash_p3(record['pid'])
187
228
  file_dir, file_name = (
188
- destination / class_name / next_name_for_class[:3],
189
- next_name_for_class[3:]
229
+ destination / class_name / hash_dir,
230
+ hash_name,
190
231
  )
191
232
  if file_dir not in created_dirs:
192
233
  file_dir.mkdir(parents=True, exist_ok=False)
193
234
  created_dirs.add(file_dir)
194
235
 
195
- (file_dir / file_name).write_text(
196
- json.dumps(record, indent=2, ensure_ascii=False),
197
- )
236
+ try:
237
+ writer[output_format](
238
+ file_dir=file_dir,
239
+ file_name=file_name,
240
+ record=record,
241
+ )
242
+ except KeyError as e:
243
+ msg = f'unsupported output format: {output_format}'
244
+ raise ValueError(msg)
198
245
 
199
246
 
200
247
  def _de_prefix(
201
248
  name: str,
202
249
  ):
203
250
  return name.split(':', 1)[-1]
251
+
252
+
253
+ def _get_hex_digest(
254
+ data: str,
255
+ ) -> str:
256
+ hash_context = hashlib.md5(data.encode())
257
+ return hash_context.hexdigest()
258
+
259
+
260
+ def _hash_p3(
261
+ pid: str,
262
+ ) -> tuple[str, str]:
263
+ hex_digest = _get_hex_digest(pid)
264
+ return hex_digest[:3], hex_digest[3:]
265
+
266
+
267
+ def write_json(
268
+ file_dir: Path,
269
+ file_name: str,
270
+ record: dict,
271
+ ):
272
+ (file_dir / (file_name + '.json')).write_text(
273
+ json.dumps(record, indent=2, ensure_ascii=False) + '\n',
274
+ )
275
+
276
+
277
+ def write_yaml(
278
+ file_dir: Path,
279
+ file_name: str,
280
+ record: dict,
281
+ ):
282
+ (file_dir / (file_name + '.yaml')).write_text(
283
+ yaml.dump(
284
+ data=record,
285
+ sort_keys=False,
286
+ allow_unicode=True,
287
+ default_flow_style=False,
288
+ ),
289
+ )
290
+
291
+
292
+ writer = {
293
+ 'json': write_json,
294
+ 'yaml': write_yaml,
295
+ }
@@ -68,13 +68,13 @@ console = Console(file=sys.stderr)
68
68
  @click.option(
69
69
  '--page-size', '-s',
70
70
  type=click.IntRange(1, 100),
71
- default=100,
71
+ default=None,
72
72
  help='set the page size (default: 100). (ignored if "--pid" is provided)'
73
73
  )
74
74
  @click.option(
75
75
  '--first-page', '-F',
76
76
  type=click.INT,
77
- default=1,
77
+ default=None,
78
78
  help='the first page to return (default: 1). (ignored if "--pid" is provided)'
79
79
  )
80
80
  @click.option(
@@ -206,9 +206,8 @@ def get_records(
206
206
  (cls, '-c/--class'),
207
207
  ):
208
208
  if argument_value:
209
- click.echo(
210
- f'WARNING: {argument_name} ignored because "-p/--pid" is provided',
211
- err=True,
209
+ console.print(
210
+ f'[yellow]Warning[/yellow]: {argument_name} ignored because "-p/--pid" is provided',
212
211
  )
213
212
 
214
213
  kwargs['pid'] = pid
@@ -242,7 +241,7 @@ def get_records(
242
241
  else:
243
242
  kwargs.update(dict(
244
243
  matching=matching,
245
- page=first_page,
244
+ page=first_page or 1,
246
245
  size=page_size or 100,
247
246
  last_page=last_page,
248
247
  ))
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dump-things-pyclient
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: A client library and CLI commands for dump-things-services
5
5
  Author-email: Christian Mönch <christian.moench@web.de>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  Requires-Dist: click>=8.3.1
9
+ Requires-Dist: pyyaml>=6.0.3
9
10
  Requires-Dist: requests>=2.32.5
10
11
  Requires-Dist: rich-click>=1.9.6
11
12
  Provides-Extra: ttl
@@ -5,17 +5,17 @@ dump_things_pyclient/commands/dtc.py,sha256=dxW5RuogqwhzfVujZ_EEsQMk8BcVMbZyMdg5
5
5
  dump_things_pyclient/commands/json2ttl.py,sha256=8BkvdjLWZ_H0L6fTmuR2M2MglKiMUiuNUcuWr_w6_dQ,2133
6
6
  dump_things_pyclient/commands/redirect.py,sha256=kl8pGj8khjxk4lhk8AJLfgtCIm5PtjeMAl0J6K5FB7M,264
7
7
  dump_things_pyclient/commands/dtc_plugins/__init__.py,sha256=0YLByLiofhHkhJcDCkokldcCw3Jj0rsKJinRX4tt3Hc,514
8
- dump_things_pyclient/commands/dtc_plugins/auto_curate.py,sha256=dIpTJ4IqaORqMH0j-h7dr-7aoxjsJADNALpUKbVskcs,9997
8
+ dump_things_pyclient/commands/dtc_plugins/auto_curate.py,sha256=fWylrzsrBWzJthWJNd_NAy3KSfxKfdFQun6EACpymnw,9968
9
9
  dump_things_pyclient/commands/dtc_plugins/clean_incoming.py,sha256=ikSPNTt254ax2tXhMK_gTgDCVkxMYJ_0NTAP8XsaRjk,2188
10
10
  dump_things_pyclient/commands/dtc_plugins/delete_records.py,sha256=SRQTHz4cWofI-RVx_p_mUex3amTaGZ9xP_S4F12Pw64,3849
11
- dump_things_pyclient/commands/dtc_plugins/export.py,sha256=s-uPWGLoZbyd4oyicNlk1_2f9MJWYDtSmLq7nTiKvE0,5232
12
- dump_things_pyclient/commands/dtc_plugins/get_records.py,sha256=5v3RUrNjEO4T5Ku0ZwGBy39dNxG5K-UcOwUigL8N4Vo,7615
11
+ dump_things_pyclient/commands/dtc_plugins/export.py,sha256=Nbq-o1hq_6ZroBctKxzMyA1BSBnoqIkpSY8BO1aWuoA,7522
12
+ dump_things_pyclient/commands/dtc_plugins/get_records.py,sha256=5yzkZFrYIlN6O-QmAZjr6L5SFhpncC6lgQaqgZzg_TE,7614
13
13
  dump_things_pyclient/commands/dtc_plugins/list_incoming.py,sha256=tmM0Qs4MVwMMLyERsWCxWGTM90rSNOShLpHH32wObd8,1959
14
14
  dump_things_pyclient/commands/dtc_plugins/maintenance.py,sha256=yTw1T_cvVTmwuzrTPteu6O6qiNCMxL5ZQoVF8yb72-M,1707
15
15
  dump_things_pyclient/commands/dtc_plugins/post_records.py,sha256=0676miD7VTMmokBAo7JdA9Dr9FZwhs0auNuRltulcBw,3469
16
16
  dump_things_pyclient/commands/dtc_plugins/read_pages.py,sha256=Libxf36L-0wUqAqfavotZPRMy5LjWJ37n_zSae1TgTA,3546
17
- dump_things_pyclient-0.2.6.dist-info/METADATA,sha256=9U_JvtQrlduHw2UZboU4uWfcbbAryqwBvnxCe3hoCg4,999
18
- dump_things_pyclient-0.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
19
- dump_things_pyclient-0.2.6.dist-info/entry_points.txt,sha256=U1QhQtk767G_OXdZwPdTXYbIPfcDU13Z2u1d6exX8uE,470
20
- dump_things_pyclient-0.2.6.dist-info/top_level.txt,sha256=Asvruw-SyLoYhWis1CFOx89RGxpjXoTZVGoq4JSGt88,21
21
- dump_things_pyclient-0.2.6.dist-info/RECORD,,
17
+ dump_things_pyclient-0.2.8.dist-info/METADATA,sha256=KAgBvgq-AlPkjmh-r5KwXkGNw9Wp0emCH3A8OCFNXJA,1028
18
+ dump_things_pyclient-0.2.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
19
+ dump_things_pyclient-0.2.8.dist-info/entry_points.txt,sha256=U1QhQtk767G_OXdZwPdTXYbIPfcDU13Z2u1d6exX8uE,470
20
+ dump_things_pyclient-0.2.8.dist-info/top_level.txt,sha256=Asvruw-SyLoYhWis1CFOx89RGxpjXoTZVGoq4JSGt88,21
21
+ dump_things_pyclient-0.2.8.dist-info/RECORD,,