dump-things-pyclient 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ from rich.progress import track
19
19
  from ...communicate import (
20
20
  HTTPError,
21
21
  curated_write_record,
22
+ get_session,
22
23
  incoming_delete_record,
23
24
  incoming_read_labels,
24
25
  incoming_read_records,
@@ -126,10 +127,8 @@ def cli(
126
127
  dry_run,
127
128
  )
128
129
  except HTTPError as e:
129
- rprint(
130
+ console.print(
130
131
  f'[red]Error[/red]: {e}: {e.response.text}',
131
- file=sys.stderr,
132
- flush=True,
133
132
  )
134
133
  return 1
135
134
 
@@ -154,6 +153,9 @@ def auto_curate(
154
153
  console.print(f'[red]Error[/red]: no token was provided (use --token or DTC_TOKEN environment variable)')
155
154
  return 1
156
155
 
156
+ if destination_collection is None:
157
+ destination_collection = collection
158
+
157
159
  if destination_service_url is None:
158
160
  destination_service_url = service_url
159
161
 
@@ -173,10 +175,12 @@ def auto_curate(
173
175
  if list_labels:
174
176
  output = []
175
177
 
178
+ session = get_session()
176
179
  all_labels = incoming_read_labels(
177
180
  service_url=service_url,
178
181
  collection=collection,
179
182
  token=obj,
183
+ session=session,
180
184
  )
181
185
  for label in all_labels:
182
186
  if include and label not in include:
@@ -200,6 +204,7 @@ def auto_curate(
200
204
  collection=collection,
201
205
  label=label,
202
206
  token=obj,
207
+ session=session,
203
208
  )
204
209
 
205
210
  # Get the first entry to find the total number of records
@@ -241,19 +246,25 @@ def auto_curate(
241
246
  class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
242
247
  except (IndexError, KeyError):
243
248
  global stl_info
244
- console.print(f'[yellow]Warning[/yellow]: ignoring record with pid {record["pid"]} because `schema_type` attribute is missing.')
249
+ console.print(f'[yellow]Warning[/yellow]: ignoring record with pid [yellow]{record["pid"]}[/yellow] because `schema_type` attribute is missing.')
245
250
  if not stl_info:
246
251
  console.print(
247
- ' Please ensure that `schema_type` is stored in the records '
248
- 'or that the associated incoming area store has a backend with a '
249
- '"Schema Type Layer", i.e., "record_dir+stl" or "sqlite+stl"."',
252
+ ' [yellow]Please ensure that `schema_type` is stored in the records. Note: '
253
+ 'if the incoming area store has a backend with a "Schema Type Layer", i.e., '
254
+ '"record_dir+stl" or "sqlite+stl", `schema_type` will not be stored on persistent '
255
+ 'storage and will not be returned when retrieving records from the incoming area. '
256
+ 'dump-things-service <= 5.4.0 circumvented the "Schema Type Layer", therefore they '
257
+ 'will return records without `schema_type` attributes on curator access to '
258
+ 'incoming areas or curated areas. Therefore it might be a good idea to NOT use a '
259
+ '"Schema Type Layer" in collections that shall be auto-curated, when using '
260
+ 'dump-things-service <= 5.4.0.[/yellow]',
250
261
  )
251
262
  stl_info = True
252
263
  continue
253
264
 
254
265
  if dry_run:
255
- console.print(f'WRITE record [green]"{record["pid"]}"[/green] of class "{class_name}" to "{destination_collection}@{destination_service_url}"')
256
- console.print(f'DELETE record [green]"{record["pid"]}"[/green] from inbox "{label}" of "{collection}@{service_url}"')
266
+ console.print(f'WRITE record [green]"{record["pid"]}"[/green] of class "{class_name}" to collection "{destination_collection}" on "{destination_service_url}"')
267
+ console.print(f'DELETE record [green]"{record["pid"]}"[/green] from inbox "{label}" of collection "{collection}" on "{service_url}"')
257
268
  continue
258
269
 
259
270
  # Store record in destination collection
@@ -264,12 +275,13 @@ def auto_curate(
264
275
  class_name=class_name,
265
276
  record=record,
266
277
  token=destination_token,
278
+ session=session,
267
279
  )
268
280
  except HTTPError as e:
269
281
  console.print(
270
282
  f'[red]Error[/red]: writing record with pid {record["pid"]} failed: {e}: {e.response.text}',
271
283
  )
272
- raise
284
+ return 1
273
285
 
274
286
  # Delete record from incoming area
275
287
  try:
@@ -279,12 +291,13 @@ def auto_curate(
279
291
  label=label,
280
292
  pid=record['pid'],
281
293
  token=curator_token,
294
+ session=session,
282
295
  )
283
296
  except HTTPError as e:
284
297
  console.print(
285
298
  f'[red]ERROR[/red]: deleting record with pid {record["pid"]} failed: {e}: {e.response.text}',
286
299
  )
287
- raise
300
+ return 1
288
301
 
289
302
  if output is not None:
290
303
  rprint(json.dumps(output, ensure_ascii=False))
@@ -4,6 +4,7 @@ import rich_click as click
4
4
 
5
5
  from ...communicate import (
6
6
  HTTPError,
7
+ get_session,
7
8
  incoming_delete_record,
8
9
  incoming_read_records,
9
10
  )
@@ -71,11 +72,13 @@ def clean_incoming(
71
72
  click.echo('ERROR: token not provided', err=True)
72
73
  return 1
73
74
 
75
+ session = get_session()
74
76
  for record, _, _, _, _ in incoming_read_records(
75
77
  service_url=service_url,
76
78
  collection=collection,
77
79
  label=inbox_label,
78
80
  token=token,
81
+ session=session,
79
82
  ):
80
83
  if list_only:
81
84
  click.echo(json.dumps(record, ensure_ascii=False))
@@ -88,6 +91,6 @@ def clean_incoming(
88
91
  label=inbox_label,
89
92
  pid=record['pid'],
90
93
  token=token,
91
-
94
+ session=session,
92
95
  )
93
96
  return 0
@@ -4,9 +4,12 @@ import sys
4
4
  from functools import partial
5
5
 
6
6
  import rich_click as click
7
+ from rich.progress import track
8
+ from rich.console import Console
7
9
 
8
10
  from ...communicate import (
9
11
  HTTPError,
12
+ get_session,
10
13
  collection_delete_record,
11
14
  curated_delete_record,
12
15
  incoming_delete_record,
@@ -18,6 +21,8 @@ subcommand_name = 'delete-records'
18
21
 
19
22
  logger = logging.getLogger('delete-records')
20
23
 
24
+ console = Console(file=sys.stderr)
25
+
21
26
 
22
27
  @click.command(short_help='Delete records from a dump-things collection')
23
28
  @click.pass_obj
@@ -83,7 +88,7 @@ def cli(
83
88
  ignore_errors,
84
89
  )
85
90
  except HTTPError as e:
86
- click.echo(f'ERROR: {e}: {e.response.text}', err=True)
91
+ console.print(f'[red]Error[/red]: {e}: {e.response.text}')
87
92
  return 1
88
93
 
89
94
 
@@ -102,16 +107,15 @@ def delete_records(
102
107
  click.echo(f'WARNING: no token provided', err=True)
103
108
 
104
109
  if incoming and curated:
105
- click.echo(
106
- 'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
107
- err=True,
108
- )
110
+ console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
109
111
  return 1
110
112
 
113
+ session = get_session()
111
114
  kwargs = dict(
112
115
  service_url=service_url,
113
116
  collection=collection,
114
117
  token=token,
118
+ session=session,
115
119
  )
116
120
 
117
121
  if incoming == '-':
@@ -134,17 +138,18 @@ def delete_records(
134
138
  if not pids:
135
139
  pids = sys.stdin
136
140
 
137
- for pid in pids:
141
+ for pid in track(pids, console=console):
138
142
  try:
139
143
  operation(
140
144
  service_url=service_url,
141
145
  collection=collection,
142
146
  pid=pid.strip(),
143
147
  token=token,
148
+ session=session,
144
149
  )
145
150
  except HTTPError as e:
151
+ console.print(f'[red]Error[/red]: while deleting pid {pid}: {e}, {e.response.text}')
146
152
  if ignore_errors:
147
- click.echo(f'ERROR: while deleting pid {pid}: {e}', err=True)
148
153
  continue
149
- raise
154
+ return 1
150
155
  return 0
@@ -1,17 +1,26 @@
1
+ import hashlib
1
2
  import json
3
+ import sys
2
4
  from collections import defaultdict
3
- from itertools import count
5
+ from itertools import (
6
+ chain,
7
+ count,
8
+ )
4
9
  from pathlib import Path
5
10
  from typing import (
6
11
  Any,
7
- Iterable,
12
+ Generator,
8
13
  )
9
14
 
10
15
  import rich_click as click
16
+ import yaml
17
+ from rich.console import Console
18
+ from rich.progress import track
11
19
 
12
20
  from ...communicate import (
13
21
  HTTPError,
14
22
  curated_read_records,
23
+ get_session,
15
24
  incoming_read_labels,
16
25
  incoming_read_records,
17
26
  server,
@@ -20,6 +29,8 @@ from ...communicate import (
20
29
 
21
30
  subcommand_name = 'export'
22
31
 
32
+ console = Console(file=sys.stderr)
33
+
23
34
 
24
35
  @click.command(short_help='Export a collection to the file system')
25
36
  @click.pass_obj
@@ -43,18 +54,34 @@ subcommand_name = 'export'
43
54
  ),
44
55
  metavar='DESTINATION_DIR',
45
56
  )
57
+ @click.option(
58
+ '--format', '-f', 'output_format',
59
+ type=click.Choice(('json', 'yaml'), case_sensitive=True),
60
+ default='json',
61
+ help='select output format for the exported records (default: json)',
62
+ )
46
63
  @click.option(
47
64
  '--ignore-errors',
48
65
  default=False,
49
66
  is_flag=True,
50
67
  help='ignore records with missing `schema_type` instead of raising an error',
51
68
  )
69
+ @click.option(
70
+ '--keep-schema-type', '-k',
71
+ default=False,
72
+ is_flag=True,
73
+ help='keep `schema_type`-attribute in records on file-system. By default the '
74
+ 'schema_type-attribute is removed because the class is encoded in the '
75
+ 'storage path of the records.'
76
+ )
52
77
  def cli(
53
78
  obj: Any,
54
79
  service_url: str,
55
80
  collection: str,
56
81
  destination: Path,
57
- ignore_errors,
82
+ output_format: str,
83
+ ignore_errors: bool,
84
+ keep_schema_type: bool,
58
85
  ):
59
86
  """Export a collection to disk
60
87
 
@@ -73,12 +100,14 @@ def cli(
73
100
  service_url,
74
101
  collection,
75
102
  destination,
103
+ output_format,
76
104
  ignore_errors,
105
+ keep_schema_type,
77
106
  )
78
107
  except HTTPError as e:
79
- click.echo(f'ERROR: {e}: {e.response.text}', err=True)
108
+ console.print(f'[red]Error[/red]: {e}: {e.response.text}')
80
109
  except ValueError as e:
81
- click.echo(f'ERROR: {e}', err=True)
110
+ console.print(f'[red]Error[/red]: {e}')
82
111
  return 1
83
112
 
84
113
 
@@ -87,19 +116,22 @@ def export(
87
116
  service_url: str,
88
117
  collection: str,
89
118
  destination: Path,
119
+ output_format: str,
90
120
  ignore_errors: bool,
121
+ keep_schema_type: bool,
91
122
  ):
92
123
  token = obj
93
124
 
94
125
  if token is None:
95
- click.echo(f'ERROR: no token provided', err=True)
126
+ console.print(f'[red]Error[/red]: no token provided')
96
127
  return 1
97
128
 
98
- server_info = server(service_url)
129
+ session = get_session()
130
+ server_info = server(service_url, session=session)
99
131
  collection_info = ([c for c in server_info['collections'] if c['name'] == collection] or None)[0]
100
132
 
101
133
  if not collection_info:
102
- click.echo(f'ERROR: no collection {collection} on service', err=True)
134
+ console.print(f'[red]Error[/red]: no collection {collection} on service')
103
135
  return 1
104
136
 
105
137
  description = {
@@ -116,17 +148,19 @@ def export(
116
148
  curated_destination = destination / 'curated'
117
149
  curated_destination.mkdir()
118
150
 
151
+ console.print('Exporting records from curated area')
119
152
  _store_records(
120
- map(
121
- lambda x: x[0],
122
- curated_read_records(
123
- service_url=service_url,
124
- collection=collection,
125
- token=token,
126
- )
153
+ curated_read_records(
154
+ service_url=service_url,
155
+ collection=collection,
156
+ token=token,
157
+ session=session,
127
158
  ),
128
159
  curated_destination,
160
+ output_format,
129
161
  ignore_errors,
162
+ keep_schema_type,
163
+ source_name='curated area',
130
164
  )
131
165
 
132
166
  # Store the incoming records
@@ -134,61 +168,128 @@ def export(
134
168
  service_url=service_url,
135
169
  collection=collection,
136
170
  token=token,
171
+ session=session,
137
172
  ):
173
+ console.print(f'Exporting records from incoming area: {label}')
138
174
  incoming_destination = destination / 'incoming' / label
139
175
  incoming_destination.mkdir(parents=True, exist_ok=False)
140
176
  _store_records(
141
- map(
142
- lambda x: x[0],
143
- incoming_read_records(
144
- service_url=service_url,
145
- collection=collection,
146
- label=label,
147
- token=token,
148
- )
177
+ incoming_read_records(
178
+ service_url=service_url,
179
+ collection=collection,
180
+ label=label,
181
+ token=token,
182
+ session=session,
149
183
  ),
150
184
  incoming_destination,
185
+ output_format,
151
186
  ignore_errors,
187
+ keep_schema_type,
188
+ source_name=f'incoming area: {label}'
152
189
  )
153
190
 
154
191
  return 0
155
192
 
156
193
 
157
194
  def _store_records(
158
- source: Iterable,
195
+ source: Generator,
159
196
  destination: Path,
160
- ignore_errors: bool = False,
197
+ output_format: str,
198
+ ignore_errors: bool,
199
+ keep_schema_type: bool,
200
+ source_name: str,
161
201
  ):
162
202
  created_dirs = set()
163
203
  class_counters = defaultdict(count)
164
204
 
165
- for record in source:
166
- class_name = _de_prefix(record.get('schema_type', None))
167
- if class_name is None:
205
+ # Get the first result from the source to determine the total number
206
+ # of records.
207
+ try:
208
+ first_tuple = next(source)
209
+ except StopIteration:
210
+ console.print(f'no records in incoming [green]{source_name}[/green], skipping it')
211
+ return
212
+
213
+ total = first_tuple[4]
214
+ for record, _, _, _, _ in track(chain([first_tuple], source), total=total, console=console):
215
+ schema_type = record.get('schema_type', None)
216
+ if schema_type is None:
168
217
  if ignore_errors:
169
- click.echo(
170
- f'WARNING: no `schema_type` in record `{record["pid"]}`',
171
- err=True
172
- )
218
+ console.print(f'[red]Error[/red]: no `schema type` in record [red]{record["pid"]}[/red] in {source_name}')
173
219
  continue
174
- msg = f'no `schema_type` in record `{record["pid"]}`'
220
+ msg = f'no `schema_type` in record {record["pid"]}'
175
221
  raise ValueError(msg)
176
222
 
177
- next_name_for_class = f'{next(class_counters[class_name]):09d}.json'
223
+ class_name = _de_prefix(schema_type)
224
+ if not keep_schema_type:
225
+ del record['schema_type']
226
+
227
+ hash_dir, hash_name = _hash_p3(record['pid'])
178
228
  file_dir, file_name = (
179
- destination / class_name / next_name_for_class[:3],
180
- next_name_for_class[3:]
229
+ destination / class_name / hash_dir,
230
+ hash_name,
181
231
  )
182
232
  if file_dir not in created_dirs:
183
233
  file_dir.mkdir(parents=True, exist_ok=False)
184
234
  created_dirs.add(file_dir)
185
235
 
186
- (file_dir / file_name).write_text(
187
- json.dumps(record, indent=2, ensure_ascii=False),
188
- )
236
+ try:
237
+ writer[output_format](
238
+ file_dir=file_dir,
239
+ file_name=file_name,
240
+ record=record,
241
+ )
242
+ except KeyError as e:
243
+ msg = f'unsupported output format: {output_format}'
244
+ raise ValueError(msg)
189
245
 
190
246
 
191
247
  def _de_prefix(
192
248
  name: str,
193
249
  ):
194
250
  return name.split(':', 1)[-1]
251
+
252
+
253
+ def _get_hex_digest(
254
+ data: str,
255
+ ) -> str:
256
+ hash_context = hashlib.md5(data.encode())
257
+ return hash_context.hexdigest()
258
+
259
+
260
+ def _hash_p3(
261
+ pid: str,
262
+ ) -> tuple[str, str]:
263
+ hex_digest = _get_hex_digest(pid)
264
+ return hex_digest[:3], hex_digest[3:]
265
+
266
+
267
+ def write_json(
268
+ file_dir: Path,
269
+ file_name: str,
270
+ record: dict,
271
+ ):
272
+ (file_dir / (file_name + '.json')).write_text(
273
+ json.dumps(record, indent=2, ensure_ascii=False) + '\n',
274
+ )
275
+
276
+
277
+ def write_yaml(
278
+ file_dir: Path,
279
+ file_name: str,
280
+ record: dict,
281
+ ):
282
+ (file_dir / (file_name + '.yaml')).write_text(
283
+ yaml.dump(
284
+ data=record,
285
+ sort_keys=False,
286
+ allow_unicode=True,
287
+ default_flow_style=False,
288
+ ),
289
+ )
290
+
291
+
292
+ writer = {
293
+ 'json': write_json,
294
+ 'yaml': write_yaml,
295
+ }
@@ -1,7 +1,9 @@
1
1
  import json
2
+ import sys
2
3
  from functools import partial
3
4
 
4
5
  import rich_click as click
6
+ from rich.console import Console
5
7
 
6
8
  from ...communicate import (
7
9
  HTTPError,
@@ -11,6 +13,7 @@ from ...communicate import (
11
13
  curated_read_records,
12
14
  curated_read_records_of_class,
13
15
  curated_read_record_with_pid,
16
+ get_session,
14
17
  incoming_read_labels,
15
18
  incoming_read_records,
16
19
  incoming_read_records_of_class,
@@ -20,6 +23,8 @@ from ...communicate import (
20
23
 
21
24
  subcommand_name = 'get-records'
22
25
 
26
+ console = Console(file=sys.stderr)
27
+
23
28
 
24
29
  @click.command(short_help='Get records from a dump-things collection')
25
30
  @click.pass_obj
@@ -48,7 +53,7 @@ subcommand_name = 'get-records'
48
53
  @click.option(
49
54
  '--incoming', '-i',
50
55
  metavar='LABEL',
51
- help='read from the collection\'s inbox with label LABEL, if LABEL is "-", return labels of all collection inboxes and exit',
56
+ help='read from the collection\'s inbox with label LABEL, if LABEL is "-", print labels of all collection inboxes and exit',
52
57
  )
53
58
  @click.option(
54
59
  '--curated', '-c',
@@ -144,7 +149,7 @@ def cli(
144
149
  pagination,
145
150
  )
146
151
  except HTTPError as e:
147
- click.echo(f'ERROR: {e}: {e.response.text}', err=True)
152
+ console.print(f'[red]Error[/red]: {e}: {e.response.text}')
148
153
  return 1
149
154
 
150
155
 
@@ -167,19 +172,18 @@ def get_records(
167
172
  token = obj
168
173
 
169
174
  if token is None:
170
- click.echo(f'WARNING: no token provided', err=True)
175
+ console.print(f'[yellow]Warning[/yellow]: no token provided')
171
176
 
172
177
  if incoming and curated:
173
- click.echo(
174
- 'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
175
- err=True,
176
- )
178
+ console.print('[red]Error[/red]: -i/--incoming and -c/--curated are mutually exclusive')
177
179
  return 1
178
180
 
181
+ session = get_session()
179
182
  kwargs = dict(
180
183
  service_url=service_url,
181
184
  collection=collection,
182
185
  token=token,
186
+ session=session,
183
187
  )
184
188
 
185
189
  if incoming == '-':
@@ -187,7 +191,9 @@ def get_records(
187
191
  click.echo('\n'.join(
188
192
  map(
189
193
  partial(json.dumps, ensure_ascii=False),
190
- result)))
194
+ result
195
+ )
196
+ ))
191
197
  return 0
192
198
 
193
199
  elif pid:
@@ -251,8 +257,8 @@ def get_records(
251
257
 
252
258
  if pagination:
253
259
  for record in result:
254
- print(json.dumps(record, ensure_ascii=False))
260
+ click.echo(json.dumps(record, ensure_ascii=False))
255
261
  else:
256
262
  for record in result:
257
- print(json.dumps(record[0], ensure_ascii=False))
263
+ click.echo(json.dumps(record[0], ensure_ascii=False))
258
264
  return 0
@@ -1,15 +1,20 @@
1
1
  import logging
2
+ import sys
2
3
 
3
4
  import rich_click as click
5
+ from rich.console import Console
4
6
 
5
7
  from ...communicate import (
6
8
  HTTPError,
9
+ get_session,
7
10
  maintenance as communicate_maintenance,
8
11
  )
9
12
 
10
13
 
11
14
  logger = logging.getLogger('maintenance')
12
15
 
16
+ console = Console(file=sys.stderr)
17
+
13
18
  subcommand_name = 'maintenance'
14
19
 
15
20
 
@@ -61,13 +66,15 @@ def maintenance(
61
66
  ):
62
67
  token = obj
63
68
  if token is None:
64
- click.echo('ERROR: no token provided', err=True)
69
+ console.print('[red]Error[/red]: no token provided')
65
70
  return 1
66
71
 
72
+ session = get_session()
67
73
  communicate_maintenance(
68
74
  service_url=service_url,
69
75
  collection=collection,
70
76
  active=active,
71
77
  token=token,
78
+ session=session,
72
79
  )
73
80
  return 0