dump-things-pyclient 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ import json
2
+
3
+ import rich_click as click
4
+
5
+ from ...communicate import (
6
+ HTTPError,
7
+ incoming_read_labels,
8
+ incoming_read_records,
9
+ )
10
+
11
+
12
+ subcommand_name = 'list-incoming'
13
+
14
+
15
+ @click.command(short_help='List inboxes of a dump-things collection')
16
+ @click.pass_obj
17
+ @click.argument(
18
+ 'service_url',
19
+ metavar='SERVICE_URL',
20
+ )
21
+ @click.argument(
22
+ 'collection',
23
+ metavar='COLLECTION',
24
+ )
25
+ @click.option(
26
+ '--show-records', '-s',
27
+ default=False,
28
+ is_flag=True,
29
+ help='list records in inboxes',
30
+ )
31
+ def cli(
32
+ obj,
33
+ service_url,
34
+ collection,
35
+ show_records,
36
+ ):
37
+ """List labels of incoming areas of a collection on a dump-things-service
38
+
39
+ This command lists the labels of the incoming areas of the collection
40
+ COLLECTION on the dump-things service given by SERVICE_URL.
41
+
42
+ A token with curator rights has to be provided.
43
+ """
44
+ try:
45
+ return list_incoming(
46
+ obj,
47
+ service_url,
48
+ collection,
49
+ show_records,
50
+ )
51
+ except HTTPError as e:
52
+ click.echo(f'ERROR: {e}: {e.response.text}', err=True)
53
+ return 1
54
+
55
+
56
+ def list_incoming(
57
+ obj,
58
+ service_url,
59
+ collection,
60
+ show_records,
61
+ ):
62
+ token = obj
63
+ if token is None:
64
+ click.echo('ERROR: token not provided', err=True)
65
+ return 1
66
+
67
+ result = {}
68
+ for label in incoming_read_labels(
69
+ service_url=service_url,
70
+ collection=collection,
71
+ token=token,
72
+ ):
73
+ result[label] = []
74
+ if show_records:
75
+ for record, _, _, _, _ in incoming_read_records(
76
+ service_url=service_url,
77
+ collection=collection,
78
+ label=label,
79
+ token=token,
80
+ ):
81
+ result[label].append(record)
82
+
83
+ if show_records is False:
84
+ result = list(result)
85
+ click.echo(json.dumps(result, indent=2, ensure_ascii=False))
86
+ return 0
@@ -0,0 +1,116 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+
5
+ import rich_click as click
6
+
7
+ from ...communicate import (
8
+ HTTPError,
9
+ curated_write_record,
10
+ collection_write_record,
11
+ )
12
+
13
+
14
+ logger = logging.getLogger('post-records')
15
+
16
+
17
+ @click.command(short_help='Post records to an inbox or the curated area of a dump-things collection')
18
+ @click.pass_obj
19
+ @click.argument(
20
+ 'service_url',
21
+ metavar='SERVICE_URL',
22
+ )
23
+ @click.argument(
24
+ 'collection',
25
+ metavar='COLLECTION',
26
+ )
27
+ @click.argument(
28
+ 'cls',
29
+ metavar='CLASS',
30
+ )
31
+ @click.option(
32
+ '--curated',
33
+ default=False,
34
+ is_flag=True,
35
+ help='store record directly in curated area instead of an inbox. (Note: requires a token with curator rights)'
36
+ )
37
+ def cli(
38
+ obj,
39
+ service_url,
40
+ collection,
41
+ cls,
42
+ curated,
43
+ ):
44
+ """Read records of class CLASS from standard input and store them in
45
+ the collection COLLECTION on the service SERVICE_URL. Records should be
46
+ provided in JSON-lines format. Note: all records are assumed to be of class
47
+ CLASS. To submit records of multiple classes, the subcommand has to be
48
+ invoked multiple times, once for each class.
49
+
50
+ If the `--curated`-option is provided, the records will be stored directly
51
+ in the curated area of the collection without any alterations, i.e, no
52
+ annotations will be added.
53
+
54
+ If no `--curated`-option is provided, the record will be stored in the
55
+ inbox of the user that is associated with the token, and the record will be
56
+ annotated with the submission time and the user that performed
57
+ the submission.
58
+
59
+ A token is required and will be used to authenticate the requests.
60
+ If the `--curated`-option is provided, the token must have
61
+ curator-rights."""
62
+ try:
63
+ return post_records(
64
+ obj,
65
+ service_url,
66
+ collection,
67
+ cls,
68
+ curated,
69
+ )
70
+ except HTTPError as e:
71
+ click.echo(f'ERROR: {e}: {e.response.text}', err=True)
72
+ return 1
73
+
74
+
75
+ def post_records(
76
+ obj,
77
+ service_url,
78
+ collection,
79
+ cls,
80
+ curated,
81
+ ):
82
+ token = obj
83
+ if token is None:
84
+ click.echo('ERROR: no token provided', err=True)
85
+ return 1
86
+
87
+ if curated:
88
+ write_record = curated_write_record
89
+ else:
90
+ write_record = collection_write_record
91
+
92
+ posted = False
93
+ for line in sys.stdin:
94
+ record = json.loads(line)
95
+ try:
96
+ write_record(
97
+ service_url=service_url,
98
+ collection=collection,
99
+ class_name=cls,
100
+ record=record,
101
+ token=token,
102
+ )
103
+ except Exception as e:
104
+ click.echo(f'ERROR: {e}', err=True)
105
+ else:
106
+ posted = True
107
+ click.echo('.', nl=False)
108
+
109
+ if posted:
110
+ # echo a final newline
111
+ click.echo('')
112
+
113
+ return 0
114
+
115
+
116
+ subcommand_name = 'post-records'
@@ -0,0 +1,141 @@
1
+ import json
2
+ import logging
3
+
4
+ import rich_click as click
5
+
6
+ from ...communicate import (
7
+ HTTPError,
8
+ get_paginated,
9
+ )
10
+
11
+
12
+ logger = logging.getLogger('read-pages')
13
+
14
+
15
+ @click.command(short_help='Read records from paginated dump-things endpoints')
16
+ @click.pass_obj
17
+ @click.argument(
18
+ 'url',
19
+ metavar='URL',
20
+ )
21
+ @click.option(
22
+ '--page-size', '-s',
23
+ type=click.INT,
24
+ default=100,
25
+ help='set the page size (1 - 100) (default: 100)'
26
+ )
27
+ @click.option(
28
+ '--first-page', '-F',
29
+ type=click.INT,
30
+ default=1,
31
+ help='the first page to return (default: 1)'
32
+ )
33
+ @click.option(
34
+ '--last-page', '-l',
35
+ type=click.INT,
36
+ help='the last page to return (default: None (return all pages)',
37
+ )
38
+ @click.option(
39
+ '--stats',
40
+ is_flag=True,
41
+ default=False,
42
+ help='show information about the number of records and pages and exit, the format is is returned as [<total number of pages>, <page size>, <total number of items>]',
43
+ )
44
+ @click.option(
45
+ '--format', '-f', 'format_',
46
+ type=click.Choice(('json', 'ttl'), case_sensitive=False),
47
+ default='json',
48
+ help='request output records in a specific format. (NOTE: not all endpoints support the "format"-parameter)',
49
+ )
50
+ @click.option(
51
+ '--matching', '-m',
52
+ help='return only records that have a matching value (use % as wildcard). (NOTE: not all endpoints and storage-backends support matching.)',
53
+ )
54
+ @click.option(
55
+ '--pagination', '-P',
56
+ is_flag=True,
57
+ help='show pagination information (each record from an paginated endpoint is returned as [<record>, <current page number>, <total number of pages>, <page size>, <total number of items>]',
58
+ )
59
+ def cli(
60
+ obj,
61
+ url,
62
+ page_size,
63
+ first_page,
64
+ last_page,
65
+ stats,
66
+ format_,
67
+ matching,
68
+ pagination,
69
+ ):
70
+ """Read paginated endpoint
71
+
72
+ This command lists all records that are available via a paginated endpoints from
73
+ a dump-things-service, e.g., given by URL
74
+
75
+ https://<service-location>/<collection>/records/p/
76
+
77
+ """
78
+ try:
79
+ return read_pages(
80
+ obj,
81
+ url,
82
+ page_size,
83
+ first_page,
84
+ last_page,
85
+ stats,
86
+ format_,
87
+ matching,
88
+ pagination,
89
+ )
90
+ except HTTPError as e:
91
+ click.echo(f'ERROR: {e}: {e.response.text}', err=True)
92
+ return 1
93
+
94
+
95
+ def read_pages(
96
+ obj,
97
+ url,
98
+ page_size,
99
+ first_page,
100
+ last_page,
101
+ stats,
102
+ format_,
103
+ matching,
104
+ pagination,
105
+ ):
106
+ token = obj
107
+
108
+ if token is None:
109
+ click.echo(f'WARNING: no token provided', err=True)
110
+
111
+ result = get_paginated(
112
+ url=url,
113
+ token=token,
114
+ first_page=first_page,
115
+ page_size=page_size,
116
+ last_page=last_page,
117
+ parameters={
118
+ 'format': format_,
119
+ **(
120
+ {'matching': matching}
121
+ if matching is not None
122
+ else {}
123
+ ),
124
+ }
125
+ )
126
+
127
+ if stats:
128
+ record = next(result)
129
+ click.echo(json.dumps(record[2:], ensure_ascii=False))
130
+ return 0
131
+
132
+ if pagination:
133
+ for record in result:
134
+ click.echo(json.dumps(record, ensure_ascii=False))
135
+ else:
136
+ for record in result:
137
+ click.echo(json.dumps(record[0], ensure_ascii=False))
138
+ return 0
139
+
140
+
141
+ subcommand_name = 'read-pages'
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dump-things-pyclient
3
- Version: 0.1.4
4
- Summary: A client library and some CLI command for dump-things-services
3
+ Version: 0.2.1
4
+ Summary: A client library and CLI commands for dump-things-services
5
5
  Author-email: Christian Mönch <christian.moench@web.de>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
+ Requires-Dist: click>=8.3.1
8
9
  Requires-Dist: requests>=2.32.5
10
+ Requires-Dist: rich-click>=1.9.6
9
11
  Provides-Extra: ttl
10
12
  Requires-Dist: dump-things-service>=5.3.0; extra == "ttl"
11
13
  Provides-Extra: tests
@@ -0,0 +1,17 @@
1
+ dump_things_pyclient/__init__.py,sha256=cn-U3TRIalN6aYHp1cMBRkQm1x98XBwquLFbgFEIf_Q,113
2
+ dump_things_pyclient/communicate.py,sha256=CTkgEigu16BmLrlJScMQKhHLI_W4SywDb4HCwAICWzA,30523
3
+ dump_things_pyclient/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ dump_things_pyclient/commands/dtc.py,sha256=dxW5RuogqwhzfVujZ_EEsQMk8BcVMbZyMdg5c8EvYIA,1726
5
+ dump_things_pyclient/commands/json2ttl.py,sha256=8BkvdjLWZ_H0L6fTmuR2M2MglKiMUiuNUcuWr_w6_dQ,2133
6
+ dump_things_pyclient/commands/dtc_plugins/__init__.py,sha256=0YLByLiofhHkhJcDCkokldcCw3Jj0rsKJinRX4tt3Hc,514
7
+ dump_things_pyclient/commands/dtc_plugins/auto_curate.py,sha256=3_SHXPQCXmY6GqTMTNVkKh5vvshfiZpMGFY0gvJxRbo,7411
8
+ dump_things_pyclient/commands/dtc_plugins/clean_incoming.py,sha256=slk3xn1-DgMl88WZqgyemyscwof97TMXt3rley4mU1w,2086
9
+ dump_things_pyclient/commands/dtc_plugins/get_records.py,sha256=YBRNo7HUCWZ-EOv8EU_yGMRRoXqqKy1XTcsfQ64ymuk,7508
10
+ dump_things_pyclient/commands/dtc_plugins/list_incoming.py,sha256=tmM0Qs4MVwMMLyERsWCxWGTM90rSNOShLpHH32wObd8,1959
11
+ dump_things_pyclient/commands/dtc_plugins/post_records.py,sha256=s3j9THe-RszKxyIISkQZRCTKplWWLlomHbS5dyRlep0,2908
12
+ dump_things_pyclient/commands/dtc_plugins/read_pages.py,sha256=hpw7vtG7joIMrNqEqZFCwzbQFd3ATzv7iyySsX8nKWk,3385
13
+ dump_things_pyclient-0.2.1.dist-info/METADATA,sha256=7TPuI97WTVrgJrPBWOzfYOFAOo7_lNXzbzPD-OkskrY,999
14
+ dump_things_pyclient-0.2.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
15
+ dump_things_pyclient-0.2.1.dist-info/entry_points.txt,sha256=UciGaqSUivgH8oFLO8vNhHqKLmVFhjdM1tdfqCwgOok,117
16
+ dump_things_pyclient-0.2.1.dist-info/top_level.txt,sha256=Asvruw-SyLoYhWis1CFOx89RGxpjXoTZVGoq4JSGt88,21
17
+ dump_things_pyclient-0.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ dtc = dump_things_pyclient.commands.dtc:cli
3
+ json2ttl = dump_things_pyclient.commands.json2ttl:main
@@ -1,214 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import argparse
4
- import json
5
- import logging
6
- import os
7
- import re
8
- import sys
9
-
10
- from ..communicate import (
11
- HTTPError,
12
- curated_write_record,
13
- incoming_delete_record,
14
- incoming_read_labels,
15
- incoming_read_records,
16
- )
17
-
18
-
19
- logger = logging.getLogger('auto-curate')
20
-
21
- token_name = 'DUMPTHINGS_TOKEN'
22
-
23
- stl_info = False
24
-
25
- description=f"""
26
- Automatically move records from the incoming areas of a
27
- collection to the curated area of the same collection, or to
28
- the curated area of another collection.
29
-
30
- The environment variable "{token_name}" must contain a token
31
- which used to authenticate the requests. The token must have
32
- curator-rights.
33
- """
34
-
35
-
36
- def _main():
37
- argument_parser = argparse.ArgumentParser(
38
- description=description,
39
- formatter_class=argparse.RawDescriptionHelpFormatter,
40
- )
41
- argument_parser.add_argument('service_url', metavar='SOURCE_SERVICE_URL')
42
- argument_parser.add_argument('collection', metavar='SOURCE_COLLECTION')
43
- argument_parser.add_argument(
44
- '--destination-service-url',
45
- default=None,
46
- metavar='DEST_SERVICE_URL',
47
- help='select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records',
48
- )
49
- argument_parser.add_argument(
50
- '--destination-collection',
51
- default=None,
52
- metavar='DEST_COLLECTION',
53
- help='select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records',
54
- ),
55
- argument_parser.add_argument(
56
- '--destination-token',
57
- default=None,
58
- metavar='DEST_TOKEN',
59
- help='if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used',
60
- )
61
- argument_parser.add_argument(
62
- '-e', '--exclude',
63
- action='append',
64
- default=[],
65
- help='exclude an inbox on the source collection (repeatable)',
66
- )
67
- argument_parser.add_argument(
68
- '-i', '--include',
69
- action='append',
70
- default=[],
71
- help='process only the given inbox, all other inboxes are ignored (repeatable, -e/--exclude is applied after inclusion)',
72
- )
73
- argument_parser.add_argument(
74
- '-l', '--list-labels',
75
- action='store_true',
76
- help='list the inbox labels of the given source collection, do not perform any curation',
77
- )
78
- argument_parser.add_argument(
79
- '-r', '--list-records',
80
- action='store_true',
81
- help='list records in the inboxes of the given source collection, do not perform any curation',
82
- )
83
- argument_parser.add_argument(
84
- '-p', '--pid',
85
- action='append',
86
- help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
87
- )
88
- argument_parser.add_argument(
89
- '-d', '--dry-run',
90
- action='store_true',
91
- help='if provided, do not alter any data, instead print what would be done',
92
- )
93
- arguments = argument_parser.parse_args()
94
-
95
- curator_token = os.environ.get(token_name)
96
- if curator_token is None:
97
- print(f'ERROR: environment variable "{token_name}" not set', file=sys.stderr, flush=True)
98
- return 1
99
-
100
- destination_url = arguments.destination_service_url or arguments.service_url
101
- destination_collection = arguments.destination_collection or arguments.collection
102
- destination_token = arguments.destination_token or curator_token
103
-
104
- output = None
105
-
106
- # If --list-labels and --list-records are provided, keep only the latter,
107
- # because it includes listing of labels
108
- if arguments.list_records:
109
- if arguments.list_labels:
110
- print('WARNING: `-l/--list-labels` and `-r/--list-records` defined, ignoring `-l/--list-labels`', file=sys.stderr, flush=True)
111
- arguments.list_labels = False
112
- output = {}
113
- if arguments.list_labels:
114
- output = []
115
-
116
- for label in incoming_read_labels(
117
- service_url=arguments.service_url,
118
- collection=arguments.collection,
119
- token=curator_token):
120
-
121
- if arguments.include and label not in arguments.include:
122
- logger.debug('ignoring non-included incoming label: %s', label)
123
- continue
124
-
125
- if label in arguments.exclude:
126
- logger.debug('ignoring excluded incoming label: %s', label)
127
- continue
128
-
129
- if arguments.list_labels:
130
- output.append(label)
131
- continue
132
-
133
- if arguments.list_records:
134
- output[label] = []
135
-
136
- for record, _, _, _, _ in incoming_read_records(
137
- service_url=arguments.service_url,
138
- collection=arguments.collection,
139
- label=label,
140
- token=curator_token):
141
-
142
- if arguments.pid:
143
- if record['pid'] not in arguments.pid:
144
- logger.debug(
145
- 'ignoring record with non-matching pid: %s',
146
- record['pid'])
147
- continue
148
-
149
- if arguments.list_records:
150
- output[label].append(record)
151
- continue
152
-
153
- # Get the class name from the `schema_type` attribute. This requires
154
- # that the schema type is either stored in the record or that the
155
- # store has a "Schema Type Layer", i.e., the store type is
156
- # `record_dir+stl`, or `sqlite+stl`.
157
- try:
158
- class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
159
- except (IndexError, KeyError):
160
- global stl_info
161
- if not stl_info:
162
- print(
163
- f"""Could not find `schema_type` attribute in record with
164
- pid {record['pid']}. Please ensure that `schema_type` is stored in
165
- the records or that the associated incoming area store has a backend
166
- with a "Schema Type Layer", i.e., "record_dir+stl" or
167
- "sqlite+stl".""",
168
- file=sys.stderr,
169
- flush=True)
170
- stl_info = True
171
- print(
172
- f'WARNING: ignoring record with pid {record["pid"]}, `schema_type` attribute is missing.',
173
- file=sys.stderr,
174
- flush=True)
175
- continue
176
-
177
- if arguments.dry_run:
178
- print(f'WRITE record "{record["pid"]}" of class "{class_name}" to "{destination_collection}@{destination_url}"')
179
- print(f'DELETE record "{record["pid"]}" from inbox "{label}" of "{arguments.collection}@{arguments.service_url}"')
180
- continue
181
-
182
- # Store record in destination collection
183
- curated_write_record(
184
- service_url=destination_url,
185
- collection=destination_collection,
186
- class_name=class_name,
187
- record=record,
188
- token=destination_token)
189
-
190
- # Delete record from incoming area
191
- incoming_delete_record(
192
- service_url=arguments.service_url,
193
- collection=arguments.collection,
194
- label=label,
195
- pid=record['pid'],
196
- token=curator_token,
197
- )
198
-
199
- if output is not None:
200
- print(json.dumps(output, ensure_ascii=False))
201
-
202
- return 0
203
-
204
-
205
- def main():
206
- try:
207
- return _main()
208
- except HTTPError as e:
209
- print(f'ERROR: {e}: {e.response.text}', file=sys.stderr, flush=True)
210
- return 1
211
-
212
-
213
- if __name__ == '__main__':
214
- sys.exit(main())