dump-things-pyclient 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ import logging
2
+ import importlib
3
+ import pkgutil
4
+ from pathlib import Path
5
+
6
+ import rich_click as click
7
+
8
+
9
+ dtc_plugins_dir = Path(__file__).parent / 'dtc_plugins'
10
+
11
+ # This will add a stream handler
12
+ logging.basicConfig(level=logging.WARNING)
13
+
14
+
15
+ def load_subcommands(group):
16
+ """Load all sub-command plugins and register them with the group"""
17
+
18
+ for module_info in pkgutil.iter_modules([dtc_plugins_dir]):
19
+ try:
20
+ module = importlib.import_module(
21
+ '.' + module_info.name,
22
+ package='dump_things_pyclient.commands.dtc_plugins',
23
+ )
24
+ except:
25
+ logging.exception('failed to load plugin module %s', module_info)
26
+ exit(1)
27
+
28
+ # get the plugin attributes
29
+ plugin_cli = getattr(module, 'subcommand_name', None)
30
+ command_name = getattr(module, 'subcommand_name', None)
31
+
32
+ # skip non-plugin files
33
+ if plugin_cli is None or command_name is None:
34
+ continue
35
+
36
+ group.add_command(cmd=getattr(module, 'cli'), name=command_name)
37
+
38
+
39
+ @click.group()
40
+ @click.option('--token', envvar='DTC_TOKEN', default=None, help='provide a token on the command line, NOTE: on multiuser systems you should use the environment variable DTC_TOKEN instead')
41
+ @click.option('--debug', envvar='DTC_DEBUG', default=False, is_flag=True, help='show debug output')
42
+ @click.pass_context
43
+ def cli(ctx, token: str, debug: bool):
44
+ initialize_logging(debug)
45
+ ctx.obj = token
46
+
47
+
48
+ def initialize_logging(debug: bool):
49
+ logging.basicConfig(
50
+ level=logging.DEBUG if debug else logging.INFO,
51
+ force=True,
52
+ )
53
+
54
+
55
+ # Load all command plugins from submodule .dtc_plugins`.
56
+ load_subcommands(cli)
57
+
58
+
59
+ if __name__ == '__main__':
60
+ cli()
@@ -0,0 +1,24 @@
1
+ """Subcommands for dtc
2
+
3
+ Each module implements a subcommand. To add a new subcommand, add a
4
+ module with the following attributes:
5
+
6
+ - `cli`: a `click.command`
7
+ - `subcommand_name`: the name of the subcommand
8
+
9
+ The following example shows the implementation of the subcommand `demo`
10
+
11
+
12
+ ```python
13
+ import click
14
+
15
+ @click.command()
16
+ @click.pass_obj
17
+ def cli(obj):
18
+ click.echo(f'demo with custom object: {obj}')
19
+
20
+ subcommand_name = 'demo'
21
+ ```
22
+
23
+ The parameter `obj` will contain a token --if given by the user-- or `None`.
24
+ """
@@ -0,0 +1,248 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ import sys
5
+
6
+ import rich_click as click
7
+
8
+ from ...communicate import (
9
+ HTTPError,
10
+ curated_write_record,
11
+ incoming_delete_record,
12
+ incoming_read_labels,
13
+ incoming_read_records,
14
+ )
15
+
16
+
17
+ logger = logging.getLogger('auto-curate')
18
+
19
+ stl_info = False
20
+
21
+
22
+ @click.command(short_help='Move records from inbox to curate area of a collection')
23
+ @click.pass_obj
24
+ @click.argument(
25
+ 'service_url',
26
+ metavar='SERVICE_URL',
27
+ )
28
+ @click.argument(
29
+ 'collection',
30
+ metavar='COLLECTION',
31
+ )
32
+ @click.option(
33
+ '--destination-service-url',
34
+ metavar='DEST_SERVICE_URL',
35
+ help='select a different dump-thing-service, i.e. not SERVICE_URL, as destination for auto-curated records',
36
+ )
37
+ @click.option(
38
+ '--destination-collection',
39
+ metavar='DEST_COLLECTION',
40
+ help='select a different collection, i.e. not the COLLECTION of SERVICE_URL, as destination for auto-curated records',
41
+ )
42
+ @click.option(
43
+ '--destination-token',
44
+ metavar='DEST_TOKEN',
45
+ help='if provided, this token will be used the authenticate again DEST_SERVICE_URL, otherwise the token for SERVICE_URL will be used',
46
+ )
47
+ @click.option(
48
+ '--pid', '-p',
49
+ metavar='PID',
50
+ help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution',
51
+ )
52
+ @click.option(
53
+ '--exclude', '-e',
54
+ help='exclude an inbox on the source collection (repeatable)',
55
+ multiple=True,
56
+ )
57
+ @click.option(
58
+ '--include', '-i',
59
+ help='process only the given inbox, all other inboxes are ignored (repeatable, -e/--exclude is applied after inclusion)',
60
+ multiple=True,
61
+ )
62
+ @click.option(
63
+ '--list-labels', '-l',
64
+ help='list the inbox labels of the given source collection, do not perform any curation',
65
+ default=False,
66
+ is_flag=True,
67
+ )
68
+ @click.option(
69
+ '--list-records', '-r',
70
+ help='list records in the inboxes of the given source collection, do not perform any curation',
71
+ default=False,
72
+ is_flag=True,
73
+ )
74
+ @click.option(
75
+ '--dry-run', '-d',
76
+ help='if provided, do not alter any data, instead print what would be done',
77
+ default=False,
78
+ is_flag=True,
79
+ )
80
+ def cli(
81
+ obj,
82
+ service_url,
83
+ collection,
84
+ destination_service_url,
85
+ destination_collection,
86
+ destination_token,
87
+ pid,
88
+ exclude,
89
+ include,
90
+ list_labels,
91
+ list_records,
92
+ dry_run,
93
+ ):
94
+ """Automatically move records from the incoming areas of the collection
95
+ COLLECTION in the service SERVICE_URL to the curated area of the same
96
+ collection, or to the curated area of another collection, possibly on
97
+ another service.
98
+
99
+ A token is required and will be used to authenticate the requests.
100
+ The token must have curator-rights."""
101
+ try:
102
+ return auto_curate(
103
+ obj,
104
+ service_url,
105
+ collection,
106
+ destination_service_url,
107
+ destination_collection,
108
+ destination_token,
109
+ pid,
110
+ exclude,
111
+ include,
112
+ list_labels,
113
+ list_records,
114
+ dry_run,
115
+ )
116
+ except HTTPError as e:
117
+ print(f'ERROR: {e}: {e.response.text}', file=sys.stderr, flush=True)
118
+ return 1
119
+
120
+
121
+ def auto_curate(
122
+ obj,
123
+ service_url,
124
+ collection,
125
+ destination_service_url,
126
+ destination_collection,
127
+ destination_token,
128
+ pid,
129
+ exclude,
130
+ include,
131
+ list_labels,
132
+ list_records,
133
+ dry_run,
134
+ ):
135
+ curator_token = obj
136
+
137
+ if curator_token is None:
138
+ print(
139
+ f'ERROR: no token was provided (use --token or DTC_TOKEN environment variable)',
140
+ file=sys.stderr,
141
+ flush=True,
142
+ )
143
+ return 1
144
+
145
+ click.echo(f'auto curate: {obj}')
146
+
147
+ output = None
148
+
149
+ # If --list-labels and --list-records are provided, keep only the latter,
150
+ # because it includes listing of labels
151
+ if list_records:
152
+ if list_labels:
153
+ logger.warning('`-l/--list-labels` and `-r/--list-records` defined, ignoring `-l/--list-labels`')
154
+ list_labels = False
155
+ output = {}
156
+
157
+ if list_labels:
158
+ output = []
159
+
160
+ for label in incoming_read_labels(
161
+ service_url=service_url,
162
+ collection=collection,
163
+ token=obj,
164
+ ):
165
+
166
+ if include and label not in include:
167
+ logger.debug('ignoring non-included incoming label: %s', label)
168
+ continue
169
+
170
+ if label in exclude:
171
+ logger.debug('ignoring excluded incoming label: %s', label)
172
+ continue
173
+
174
+ if list_labels:
175
+ output.append(label)
176
+ continue
177
+
178
+ if list_records:
179
+ output[label] = []
180
+
181
+ for record, _, _, _, _ in incoming_read_records(
182
+ service_url=service_url,
183
+ collection=collection,
184
+ label=label,
185
+ token=obj,
186
+ ):
187
+
188
+ if list_records:
189
+ output[label].append(record)
190
+ continue
191
+
192
+ if pid:
193
+ if record['pid'] not in pid:
194
+ logger.debug(
195
+ 'ignoring record with non-matching pid: %s',
196
+ record['pid'])
197
+ continue
198
+
199
+ # Get the class name from the `schema_type` attribute. This requires
200
+ # that the schema type is either stored in the record or that the
201
+ # store has a "Schema Type Layer", i.e., the store type is
202
+ # `record_dir+stl`, or `sqlite+stl`.
203
+ try:
204
+ class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
205
+ except (IndexError, KeyError):
206
+ global stl_info
207
+ if not stl_info:
208
+ logger.warning(
209
+ f"""Could not find `schema_type` attribute in record with
210
+ pid {record['pid']}. Please ensure that `schema_type` is stored in
211
+ the records or that the associated incoming area store has a backend
212
+ with a "Schema Type Layer", i.e., "record_dir+stl" or
213
+ "sqlite+stl".""",
214
+ )
215
+ stl_info = True
216
+ else:
217
+ logger.warning(f'ignoring record with pid {record["pid"]}, `schema_type` attribute is missing.')
218
+ continue
219
+
220
+ if dry_run:
221
+ print(f'WRITE record "{record["pid"]}" of class "{class_name}" to "{destination_collection}@{destination_service_url}"')
222
+ print(f'DELETE record "{record["pid"]}" from inbox "{label}" of "{collection}@{service_url}"')
223
+ continue
224
+
225
+ # Store record in destination collection
226
+ curated_write_record(
227
+ service_url=destination_service_url,
228
+ collection=destination_collection,
229
+ class_name=class_name,
230
+ record=record,
231
+ token=destination_token)
232
+
233
+ # Delete record from incoming area
234
+ incoming_delete_record(
235
+ service_url=service_url,
236
+ collection=collection,
237
+ label=label,
238
+ pid=record['pid'],
239
+ token=obj,
240
+ )
241
+
242
+ if output is not None:
243
+ print(json.dumps(output, ensure_ascii=False))
244
+
245
+ return 0
246
+
247
+
248
+ subcommand_name = 'auto-curate'
@@ -0,0 +1,93 @@
1
+ import json
2
+
3
+ import rich_click as click
4
+
5
+ from ...communicate import (
6
+ HTTPError,
7
+ incoming_delete_record,
8
+ incoming_read_records,
9
+ )
10
+
11
+
12
+ subcommand_name = 'clean-incoming'
13
+
14
+
15
+ @click.command(short_help='Remove records from an inbox of a dump-things collection')
16
+ @click.pass_obj
17
+ @click.argument(
18
+ 'service_url',
19
+ metavar='SERVICE_URL',
20
+ )
21
+ @click.argument(
22
+ 'collection',
23
+ metavar='COLLECTION',
24
+ )
25
+ @click.argument(
26
+ 'inbox_label',
27
+ metavar='INBOX_LABEL',
28
+ )
29
+ @click.option(
30
+ '--list-only', '-l',
31
+ default=False,
32
+ is_flag=True,
33
+ help='only list records in the inbox, do not remove them',
34
+ )
35
+ def cli(
36
+ obj,
37
+ service_url,
38
+ collection,
39
+ inbox_label,
40
+ list_only,
41
+ ):
42
+ """Remove all records from an incoming areas of a collection on a dump-things-service
43
+
44
+ This command removes all records from the inbox with label INBOX_LABEL in
45
+ the collection COLLECTION on the dump-things service given by SERVICE_URL.
46
+
47
+ A token with curator rights has to be provided.
48
+ """
49
+ try:
50
+ return clean_incoming(
51
+ obj,
52
+ service_url,
53
+ collection,
54
+ inbox_label,
55
+ list_only,
56
+ )
57
+ except HTTPError as e:
58
+ click.echo(f'ERROR: {e}: {e.response.text}', err=True)
59
+ return 1
60
+
61
+
62
+ def clean_incoming(
63
+ obj,
64
+ service_url,
65
+ collection,
66
+ inbox_label,
67
+ list_only,
68
+ ):
69
+ token = obj
70
+ if token is None:
71
+ click.echo('ERROR: token not provided', err=True)
72
+ return 1
73
+
74
+ for record, _, _, _, _ in incoming_read_records(
75
+ service_url=service_url,
76
+ collection=collection,
77
+ label=inbox_label,
78
+ token=token,
79
+ ):
80
+ if list_only:
81
+ click.echo(json.dumps(record, ensure_ascii=False))
82
+ continue
83
+
84
+ # Delete record from incoming area
85
+ incoming_delete_record(
86
+ service_url=service_url,
87
+ collection=collection,
88
+ label=inbox_label,
89
+ pid=record['pid'],
90
+ token=token,
91
+
92
+ )
93
+ return 0
@@ -0,0 +1,262 @@
1
+ import json
2
+ from functools import partial
3
+
4
+ import rich_click as click
5
+
6
+ from ...communicate import (
7
+ HTTPError,
8
+ collection_read_records,
9
+ collection_read_record_with_pid,
10
+ collection_read_records_of_class,
11
+ curated_read_records,
12
+ curated_read_records_of_class,
13
+ curated_read_record_with_pid,
14
+ incoming_read_labels,
15
+ incoming_read_records,
16
+ incoming_read_records_of_class,
17
+ incoming_read_record_with_pid,
18
+ )
19
+
20
+
21
+ subcommand_name = 'get-records'
22
+
23
+
24
+ @click.command(short_help='Get records from a dump-things collection')
25
+ @click.pass_obj
26
+ @click.argument(
27
+ 'service_url',
28
+ metavar='SERVICE_URL',
29
+ )
30
+ @click.argument(
31
+ 'collection',
32
+ metavar='COLLECTION',
33
+ )
34
+ @click.option(
35
+ '--class', '-C', 'cls',
36
+ default=False,
37
+ is_flag=True,
38
+ help='only read records of this class, ignored if "--pid" is provided',
39
+ )
40
+ @click.option(
41
+ '--format', '-f', 'format_',
42
+ type=click.Choice(('json', 'ttl'), case_sensitive=False),
43
+ default='json',
44
+ help='request records in a specific format. (NOTE: not all endpoints support the "format"-parameter)',
45
+ )
46
+ @click.option(
47
+ '--pid', '-p',
48
+ help='the pid of the record that should be read',
49
+ )
50
+ @click.option(
51
+ '--incoming', '-i',
52
+ metavar='LABEL',
53
+ help='read from the collection inbox with label LABEL, if LABEL is "-", return labels of all collection inboxes',
54
+ )
55
+ @click.option(
56
+ '--curated', '-c',
57
+ default=False,
58
+ is_flag=True,
59
+ help='read from the curated area of the collection. (Note: requires a token with curator rights)',
60
+ )
61
+ @click.option(
62
+ '--matching', '-m',
63
+ default=False,
64
+ is_flag=True,
65
+ help='return only records that have a matching value (use % as wildcard). Ignored if "--pid" is provided. (Note: not all endpoints and backends support matching)',
66
+ )
67
+ @click.option(
68
+ '--page-size', '-s',
69
+ type=click.IntRange(1, 100),
70
+ default=100,
71
+ help='set the page size (default: 100). (ignored if "--pid" is provided)'
72
+ )
73
+ @click.option(
74
+ '--first-page', '-F',
75
+ type=click.INT,
76
+ default=1,
77
+ help='the first page to return (default: 1). (ignored if "--pid" is provided)'
78
+ )
79
+ @click.option(
80
+ '--last-page', '-l',
81
+ type=click.INT,
82
+ help='the last page to return, if not given, all pages will be returned. (ignored if "--pid" is provided)',
83
+ default=None,
84
+ )
85
+ @click.option(
86
+ '--stats',
87
+ default=False,
88
+ is_flag=True,
89
+ help='show the number of records and pages and exit. (ignored if "--pid" is provided)',
90
+ )
91
+ @click.option(
92
+ '--pagination', '-P',
93
+ default=False,
94
+ is_flag=True,
95
+ help='show pagination information (each record from an paginated endpoint is returned as [<record>, <current page number>, <total number of pages>, <page size>, <total number of items>]. (ignored if "--pid" is provided)',
96
+ )
97
+ def cli(
98
+ obj,
99
+ service_url,
100
+ collection,
101
+ cls,
102
+ format_,
103
+ pid,
104
+ incoming,
105
+ curated,
106
+ matching,
107
+ page_size,
108
+ first_page,
109
+ last_page,
110
+ stats,
111
+ pagination,
112
+ ):
113
+ """Get records from a collection on a dump-things-service
114
+
115
+ This command lists records that are stored in collection COLLECTION of the
116
+ dump-things service SERVICE_URL. By
117
+ default, all records that are readable with the given token, or the default
118
+ token, will be displayed. The output format is JSONL (JSON lines), where
119
+ every line contains a record or a record with paging information. If `ttl`
120
+ is chosen as format of the output records, the record content will be a string
121
+ that contains a TTL-documents.
122
+
123
+ The command supports reading from the curated area only, reading from incoming
124
+ areas, or reading a record with a given PID.
125
+
126
+ Pagination information is returned for paginated results, when requested with
127
+ `-P/--pagination`. All results are paginated except "get a record with a given PID"
128
+ and "get the list of incoming zone labels".
129
+
130
+ For reading from curated or incoming areas, a token with curator rights has
131
+ to be provided.
132
+ """
133
+ try:
134
+ return get_records(
135
+ obj,
136
+ service_url,
137
+ collection,
138
+ cls,
139
+ format_,
140
+ pid,
141
+ incoming,
142
+ curated,
143
+ matching,
144
+ page_size,
145
+ first_page,
146
+ last_page,
147
+ stats,
148
+ pagination,
149
+ )
150
+ except HTTPError as e:
151
+ click.echo(f'ERROR: {e}: {e.response.text}', err=True)
152
+ return 1
153
+
154
+
155
+ def get_records(
156
+ obj,
157
+ service_url,
158
+ collection,
159
+ cls,
160
+ format_,
161
+ pid,
162
+ incoming,
163
+ curated,
164
+ matching,
165
+ page_size,
166
+ first_page,
167
+ last_page,
168
+ stats,
169
+ pagination,
170
+ ):
171
+ token = obj
172
+
173
+ if token is None:
174
+ click.echo(f'WARNING: no token provided', err=True)
175
+
176
+ if incoming and curated:
177
+ click.echo(
178
+ 'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
179
+ err=True,
180
+ )
181
+ return 1
182
+
183
+ kwargs = dict(
184
+ service_url=service_url,
185
+ collection=collection,
186
+ token=token,
187
+ )
188
+
189
+ if incoming == '-':
190
+ result = incoming_read_labels(**kwargs)
191
+ print('\n'.join(
192
+ map(
193
+ partial(json.dumps, ensure_ascii=False),
194
+ result)))
195
+ return 0
196
+
197
+ elif pid:
198
+ for argument_value, argument_name in (
199
+ (matching, '-m/--matching'),
200
+ (page_size, '-s/--page_size'),
201
+ (first_page, '-f/--first_page'),
202
+ (last_page, '-l/--last_page'),
203
+ (stats, '--stats'),
204
+ (cls, '-c/--class'),
205
+ ):
206
+ if argument_value:
207
+ click.echo(
208
+ f'WARNING: {argument_name} ignored because "-p/--pid" is provided',
209
+ err=True,
210
+ )
211
+
212
+ kwargs['pid'] = pid
213
+ if curated:
214
+ result = curated_read_record_with_pid(**kwargs)
215
+ elif incoming:
216
+ kwargs['label'] = incoming
217
+ result = incoming_read_record_with_pid(**kwargs)
218
+ else:
219
+ kwargs['format'] = format_
220
+ result = collection_read_record_with_pid(**kwargs)
221
+ print(json.dumps(result, ensure_ascii=False))
222
+ return 0
223
+
224
+ elif cls:
225
+ kwargs.update(dict(
226
+ class_name=cls,
227
+ matching=matching,
228
+ page=first_page,
229
+ size=page_size,
230
+ last_page=last_page,
231
+ ))
232
+ if curated:
233
+ result = curated_read_records_of_class(**kwargs)
234
+ elif incoming:
235
+ kwargs['label'] = incoming
236
+ result = incoming_read_records_of_class(**kwargs)
237
+ else:
238
+ kwargs['format'] = format_
239
+ result = collection_read_records_of_class(**kwargs)
240
+ else:
241
+ kwargs.update(dict(
242
+ matching=matching,
243
+ page=first_page,
244
+ size=page_size or 100,
245
+ last_page=last_page,
246
+ ))
247
+ if curated:
248
+ result = curated_read_records(**kwargs)
249
+ elif incoming:
250
+ kwargs['label'] = incoming
251
+ result = incoming_read_records(**kwargs)
252
+ else:
253
+ kwargs['format'] = format_
254
+ result = collection_read_records(**kwargs)
255
+
256
+ if pagination:
257
+ for record in result:
258
+ print(json.dumps(record, ensure_ascii=False))
259
+ else:
260
+ for record in result:
261
+ print(json.dumps(record[0], ensure_ascii=False))
262
+ return 0