dump-things-pyclient 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dump_things_pyclient-0.1.0/PKG-INFO +27 -0
- dump_things_pyclient-0.1.0/README.md +19 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/__init__.py +6 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/commands/__init__.py +0 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/commands/auto_curate.py +194 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/commands/get_records.py +171 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/commands/read_pages.py +86 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient/communicate.py +719 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/PKG-INFO +27 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/SOURCES.txt +14 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/dependency_links.txt +1 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/entry_points.txt +4 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/requires.txt +1 -0
- dump_things_pyclient-0.1.0/dump_things_pyclient.egg-info/top_level.txt +1 -0
- dump_things_pyclient-0.1.0/pyproject.toml +19 -0
- dump_things_pyclient-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dump-things-pyclient
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A client library and some CLI command for dump-things-services
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: requests>=2.32.5
|
|
8
|
+
|
|
9
|
+
# Dump Things Python Client
|
|
10
|
+
|
|
11
|
+
A simple client library for dump-things-service in Python
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## Tech Stack
|
|
15
|
+
|
|
16
|
+
- Python >= 3.11
|
|
17
|
+
|
|
18
|
+
- [uv](https://astral.sh/uv) for dependency management
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## Acknowledgements
|
|
22
|
+
|
|
23
|
+
This work was funded, in part, by:
|
|
24
|
+
|
|
25
|
+
- Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant TRR 379 (546006540, Q02 project)
|
|
26
|
+
|
|
27
|
+
- MKW-NRW: Ministerium für Kultur und Wissenschaft des Landes Nordrhein-Westfalen under the Kooperationsplattformen 2022 program, grant number: KP22-106A
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Dump Things Python Client
|
|
2
|
+
|
|
3
|
+
A simple client library for dump-things-service in Python
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## Tech Stack
|
|
7
|
+
|
|
8
|
+
- Python >= 3.11
|
|
9
|
+
|
|
10
|
+
- [uv](https://astral.sh/uv) for dependency management
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Acknowledgements
|
|
14
|
+
|
|
15
|
+
This work was funded, in part, by:
|
|
16
|
+
|
|
17
|
+
- Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant TRR 379 (546006540, Q02 project)
|
|
18
|
+
|
|
19
|
+
- MKW-NRW: Ministerium für Kultur und Wissenschaft des Landes Nordrhein-Westfalen under the Kooperationsplattformen 2022 program, grant number: KP22-106A
|
|
File without changes
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
from ..communicate import (
|
|
11
|
+
HTTPError,
|
|
12
|
+
curated_write_record,
|
|
13
|
+
incoming_delete_record,
|
|
14
|
+
incoming_read_labels,
|
|
15
|
+
incoming_read_records,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger('auto-curate')
|
|
20
|
+
|
|
21
|
+
token_name = 'DUMPTHINGS_TOKEN'
|
|
22
|
+
|
|
23
|
+
stl_info = False
|
|
24
|
+
|
|
25
|
+
description=f"""
|
|
26
|
+
Automatically move records from the incoming areas of a
|
|
27
|
+
collection to the curated area of the same collection, or to
|
|
28
|
+
the curated area of another collection.
|
|
29
|
+
|
|
30
|
+
The environment variable "{token_name}" must contain a token
|
|
31
|
+
which used to authenticate the requests. The token must have
|
|
32
|
+
curator-rights.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _main():
|
|
37
|
+
argument_parser = argparse.ArgumentParser(
|
|
38
|
+
description=description,
|
|
39
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
40
|
+
)
|
|
41
|
+
argument_parser.add_argument('service_url', metavar='SOURCE_SERVICE_URL')
|
|
42
|
+
argument_parser.add_argument('collection', metavar='SOURCE_COLLECTION')
|
|
43
|
+
argument_parser.add_argument(
|
|
44
|
+
'--destination-service-url',
|
|
45
|
+
default=None,
|
|
46
|
+
metavar='DEST_SERVICE_URL',
|
|
47
|
+
help='select a different dump-thing-service, i.e. not SOURCE_SERVICE_URL, as destination for auto-curated records',
|
|
48
|
+
)
|
|
49
|
+
argument_parser.add_argument(
|
|
50
|
+
'--destination-collection',
|
|
51
|
+
default=None,
|
|
52
|
+
metavar='DEST_COLLECTION',
|
|
53
|
+
help='select a different collection, i.e. not the SOURCE_COLLECTION of SOURCE_SERVICE_URL, as destination for auto-curated records',
|
|
54
|
+
),
|
|
55
|
+
argument_parser.add_argument(
|
|
56
|
+
'--destination-token',
|
|
57
|
+
default=None,
|
|
58
|
+
metavar='DEST_TOKEN',
|
|
59
|
+
help='if provided, this token will be used for the destination service, otherwise ${CURATOR_TOKEN} will be used',
|
|
60
|
+
)
|
|
61
|
+
argument_parser.add_argument(
|
|
62
|
+
'-e', '--exclude',
|
|
63
|
+
action='append',
|
|
64
|
+
default=[],
|
|
65
|
+
help='exclude an inbox on the source collection (repeatable)',
|
|
66
|
+
)
|
|
67
|
+
argument_parser.add_argument(
|
|
68
|
+
'-l', '--list-labels',
|
|
69
|
+
action='store_true',
|
|
70
|
+
help='list the inbox labels of the given source collection, do not perform any curation',
|
|
71
|
+
)
|
|
72
|
+
argument_parser.add_argument(
|
|
73
|
+
'-r', '--list-records',
|
|
74
|
+
action='store_true',
|
|
75
|
+
help='list records in the inboxes of the given source collection, do not perform any curation',
|
|
76
|
+
)
|
|
77
|
+
argument_parser.add_argument(
|
|
78
|
+
'-p', '--pid',
|
|
79
|
+
action='append',
|
|
80
|
+
help='if provided, process only records that match the given PIDs. NOTE: matching does not involve CURIE-resolution!',
|
|
81
|
+
)
|
|
82
|
+
arguments = argument_parser.parse_args()
|
|
83
|
+
|
|
84
|
+
curator_token = os.environ.get(token_name)
|
|
85
|
+
if curator_token is None:
|
|
86
|
+
print(f'ERROR: environment variable "{token_name}" not set', file=sys.stderr, flush=True)
|
|
87
|
+
return 1
|
|
88
|
+
|
|
89
|
+
destination_url = arguments.destination_service_url or arguments.service_url
|
|
90
|
+
destination_collection = arguments.destination_collection or arguments.collection
|
|
91
|
+
destination_token = arguments.destination_token or curator_token
|
|
92
|
+
|
|
93
|
+
output = None
|
|
94
|
+
|
|
95
|
+
# If --list-labels and --list-records are provided, keep only the latter,
|
|
96
|
+
# because it includes listing of labels
|
|
97
|
+
if arguments.list_records:
|
|
98
|
+
if arguments.list_labels:
|
|
99
|
+
print('WARNING: `-l/--list-labels` and `-r/--list-records` defined, ignoring `-l/--list-labels`', file=sys.stderr, flush=True)
|
|
100
|
+
arguments.list_labels = False
|
|
101
|
+
output = {}
|
|
102
|
+
if arguments.list_labels:
|
|
103
|
+
output = []
|
|
104
|
+
|
|
105
|
+
for label in incoming_read_labels(
|
|
106
|
+
service_url=arguments.service_url,
|
|
107
|
+
collection=arguments.collection,
|
|
108
|
+
token=curator_token):
|
|
109
|
+
|
|
110
|
+
if label in arguments.exclude:
|
|
111
|
+
logger.debug('ignoring excluded incoming label: %s', label)
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
if arguments.list_labels:
|
|
115
|
+
output.append(label)
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
if arguments.list_records:
|
|
119
|
+
output[label] = []
|
|
120
|
+
|
|
121
|
+
for record, _, _, _, _ in incoming_read_records(
|
|
122
|
+
service_url=arguments.service_url,
|
|
123
|
+
collection=arguments.collection,
|
|
124
|
+
label=label,
|
|
125
|
+
token=curator_token):
|
|
126
|
+
|
|
127
|
+
if arguments.pid:
|
|
128
|
+
if record['pid'] not in arguments.pid:
|
|
129
|
+
logger.debug(
|
|
130
|
+
'ignoring record with non-matching pid: %s',
|
|
131
|
+
record['pid'])
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
if arguments.list_records:
|
|
135
|
+
output[label].append(record)
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# Get the class name from the `schema_type` attribute. This requires
|
|
139
|
+
# that the schema type is either stored in the record or that the
|
|
140
|
+
# store has a "Schema Type Layer", i.e., the store type is
|
|
141
|
+
# `record_dir+stl`, or `sqlite+stl`.
|
|
142
|
+
try:
|
|
143
|
+
class_name = re.search('([_A-Za-z0-9]*$)', record['schema_type']).group(0)
|
|
144
|
+
except IndexError:
|
|
145
|
+
global stl_info
|
|
146
|
+
if not stl_info:
|
|
147
|
+
print(
|
|
148
|
+
f"""Could not find `schema_type` attribute in record with
|
|
149
|
+
pid {record['pid']}. Please ensure that `schema_type` is stored in
|
|
150
|
+
the records or that the associated incoming area store has a backend
|
|
151
|
+
with a "Schema Type Layer", i.e., "record_dir+stl" or
|
|
152
|
+
"sqlite+stl".""",
|
|
153
|
+
file=sys.stderr,
|
|
154
|
+
flush=True)
|
|
155
|
+
stl_info = True
|
|
156
|
+
print(
|
|
157
|
+
f'WARNING: ignoring record with pid {record["pid"]}, `schema_type` attribute is missing.',
|
|
158
|
+
file=sys.stderr,
|
|
159
|
+
flush=True)
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Store record in destination collection
|
|
163
|
+
curated_write_record(
|
|
164
|
+
service_url=destination_url,
|
|
165
|
+
collection=destination_collection,
|
|
166
|
+
class_name=class_name,
|
|
167
|
+
record=record,
|
|
168
|
+
token=destination_token)
|
|
169
|
+
|
|
170
|
+
# Delete record from incoming area
|
|
171
|
+
incoming_delete_record(
|
|
172
|
+
service_url=arguments.service_url,
|
|
173
|
+
collection=arguments.collection,
|
|
174
|
+
label=label,
|
|
175
|
+
pid=record['pid'],
|
|
176
|
+
token=curator_token,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if output is not None:
|
|
180
|
+
print(json.dumps(output, ensure_ascii=False))
|
|
181
|
+
|
|
182
|
+
return 0
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def main():
|
|
186
|
+
try:
|
|
187
|
+
return _main()
|
|
188
|
+
except HTTPError as e:
|
|
189
|
+
print(f'ERROR: {e}: {e.response.text}', file=sys.stderr, flush=True)
|
|
190
|
+
return 1
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
if __name__ == '__main__':
|
|
194
|
+
sys.exit(main())
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from functools import partial
|
|
8
|
+
|
|
9
|
+
from ..communicate import (
|
|
10
|
+
HTTPError,
|
|
11
|
+
collection_read_records,
|
|
12
|
+
collection_read_records_of_class,
|
|
13
|
+
collection_read_record_with_pid,
|
|
14
|
+
curated_read_records,
|
|
15
|
+
curated_read_records_of_class,
|
|
16
|
+
curated_read_record_with_pid,
|
|
17
|
+
incoming_read_labels,
|
|
18
|
+
incoming_read_records,
|
|
19
|
+
incoming_read_records_of_class,
|
|
20
|
+
incoming_read_record_with_pid,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
token_name = 'DUMPTHINGS_TOKEN'
|
|
25
|
+
|
|
26
|
+
description = f"""Get records from a collection on a dump-things-service
|
|
27
|
+
|
|
28
|
+
This command lists records that are stored in a dump-things-service. By
|
|
29
|
+
default all records that are readable with the given token, or the default
|
|
30
|
+
token, will be displayed. The output format is JSONL (JSON lines), where
|
|
31
|
+
every line contains a record or a record with paging information. If `ttl`
|
|
32
|
+
is chosen as format of the output records, the record content will be a string
|
|
33
|
+
that contains a TTL-documents.
|
|
34
|
+
|
|
35
|
+
The command supports to read from the curated area only, to read from incoming
|
|
36
|
+
areas, or to read records with a given PID.
|
|
37
|
+
|
|
38
|
+
Pagination information is returned for paginated results, when requested with
|
|
39
|
+
`-P/--pagination`. All results are paginated except "get a record with a given PID"
|
|
40
|
+
and "get the list of incoming zone labels".
|
|
41
|
+
|
|
42
|
+
If the environment variable "{token_name}" is set, its content will be used
|
|
43
|
+
as token to authenticate against the dump-things-service.
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _main():
|
|
49
|
+
argument_parser = argparse.ArgumentParser(
|
|
50
|
+
description=description,
|
|
51
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
52
|
+
)
|
|
53
|
+
argument_parser.add_argument('service_url')
|
|
54
|
+
argument_parser.add_argument('collection')
|
|
55
|
+
argument_parser.add_argument('-C', '--class', dest='class_name', help='only read records of this class, ignored if "--pid" is provided')
|
|
56
|
+
argument_parser.add_argument('-f', '--format', help='format of the output records ("json" or "ttl")')
|
|
57
|
+
argument_parser.add_argument('-p', '--pid', help='the pid of the record that should be read')
|
|
58
|
+
argument_parser.add_argument('-i', '--incoming', metavar='LABEL', help='read from incoming area with the given label in the collection, if LABEL is "-", return the labels')
|
|
59
|
+
argument_parser.add_argument('-c', '--curated', action='store_true', help='read from the curated area of the collection')
|
|
60
|
+
argument_parser.add_argument('-m', '--matching', help='return only records that have a matching value (use % as wildcard). Ignored if "--pid" is provided. (NOTE: not all endpoints and backends support matching.)')
|
|
61
|
+
argument_parser.add_argument('-s', '--page-size', type=int, help='set the page size (1 - 100) (default: 100), ignored if "--pid" is provided')
|
|
62
|
+
argument_parser.add_argument('-F', '--first-page', type=int, help='the first page to return (default: 1), ignored if "--pid" is provided')
|
|
63
|
+
argument_parser.add_argument('-l', '--last-page', type=int, default=None, help='the last page to return (default: None (return all pages), ignored if "--pid" is provided')
|
|
64
|
+
argument_parser.add_argument('--stats', action='store_true', help='show the number of records and pages and exit, ignored if "--pid" is provided')
|
|
65
|
+
argument_parser.add_argument('-P', '--pagination', action='store_true', help='show pagination information (each record from an paginated endpoint is returned as [<record>, <current page number>, <total number of pages>, <page size>, <total number of items>]')
|
|
66
|
+
|
|
67
|
+
arguments = argument_parser.parse_args()
|
|
68
|
+
|
|
69
|
+
token = os.environ.get(token_name)
|
|
70
|
+
if token is None:
|
|
71
|
+
print(f'WARNING: {token_name} not set', file=sys.stderr, flush=True)
|
|
72
|
+
|
|
73
|
+
if arguments.incoming and arguments.curated:
|
|
74
|
+
print(
|
|
75
|
+
'ERROR: -i/--incoming and -c/--curated are mutually exclusive',
|
|
76
|
+
file=sys.stderr,
|
|
77
|
+
flush=True)
|
|
78
|
+
return 1
|
|
79
|
+
|
|
80
|
+
kwargs = dict(
|
|
81
|
+
service_url=arguments.service_url,
|
|
82
|
+
collection=arguments.collection,
|
|
83
|
+
token=token,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
if arguments.incoming == '-':
|
|
87
|
+
result = incoming_read_labels(**kwargs)
|
|
88
|
+
print('\n'.join(
|
|
89
|
+
map(
|
|
90
|
+
partial(json.dumps, ensure_ascii=False),
|
|
91
|
+
result)))
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
elif arguments.pid:
|
|
95
|
+
for argument_value, argument_name in (
|
|
96
|
+
(arguments.matching, '-m/--matching'),
|
|
97
|
+
(arguments.page_size, '-s/--page_size'),
|
|
98
|
+
(arguments.first_page, '-f/--first_page'),
|
|
99
|
+
(arguments.last_page, '-l/--last_page'),
|
|
100
|
+
(arguments.stats, '--stats'),
|
|
101
|
+
(arguments.class_name, '-c/--class'),
|
|
102
|
+
):
|
|
103
|
+
if argument_value:
|
|
104
|
+
print(
|
|
105
|
+
f'WARNING: {argument_name} ignored because "-p/--pid" is provided',
|
|
106
|
+
file=sys.stderr,
|
|
107
|
+
flush=True)
|
|
108
|
+
|
|
109
|
+
kwargs['pid'] = arguments.pid
|
|
110
|
+
if arguments.curated:
|
|
111
|
+
result = curated_read_record_with_pid(**kwargs)
|
|
112
|
+
elif arguments.incoming:
|
|
113
|
+
kwargs['label'] = arguments.incoming
|
|
114
|
+
result = incoming_read_record_with_pid(**kwargs)
|
|
115
|
+
else:
|
|
116
|
+
kwargs['format'] = arguments.format
|
|
117
|
+
result = collection_read_record_with_pid(**kwargs)
|
|
118
|
+
print(json.dumps(result, ensure_ascii=False))
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
elif arguments.class_name:
|
|
122
|
+
kwargs.update(dict(
|
|
123
|
+
class_name=arguments.class_name,
|
|
124
|
+
matching=arguments.matching,
|
|
125
|
+
page=arguments.first_page or 1,
|
|
126
|
+
size=arguments.page_size or 100,
|
|
127
|
+
last_page=arguments.last_page,
|
|
128
|
+
))
|
|
129
|
+
if arguments.curated:
|
|
130
|
+
result = curated_read_records_of_class(**kwargs)
|
|
131
|
+
elif arguments.incoming:
|
|
132
|
+
kwargs['label'] = arguments.incoming
|
|
133
|
+
result = incoming_read_records_of_class(**kwargs)
|
|
134
|
+
else:
|
|
135
|
+
kwargs['format'] = arguments.format
|
|
136
|
+
result = collection_read_records_of_class(**kwargs)
|
|
137
|
+
else:
|
|
138
|
+
kwargs.update(dict(
|
|
139
|
+
matching=arguments.matching,
|
|
140
|
+
page=arguments.first_page or 1,
|
|
141
|
+
size=arguments.page_size or 100,
|
|
142
|
+
last_page=arguments.last_page,
|
|
143
|
+
))
|
|
144
|
+
if arguments.curated:
|
|
145
|
+
result = curated_read_records(**kwargs)
|
|
146
|
+
elif arguments.incoming:
|
|
147
|
+
kwargs['label'] = arguments.incoming
|
|
148
|
+
result = incoming_read_records(**kwargs)
|
|
149
|
+
else:
|
|
150
|
+
kwargs['format'] = arguments.format
|
|
151
|
+
result = collection_read_records(**kwargs)
|
|
152
|
+
|
|
153
|
+
if arguments.pagination:
|
|
154
|
+
for record in result:
|
|
155
|
+
print(json.dumps(record, ensure_ascii=False))
|
|
156
|
+
else:
|
|
157
|
+
for record in result:
|
|
158
|
+
print(json.dumps(record[0], ensure_ascii=False))
|
|
159
|
+
return 0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def main():
|
|
163
|
+
try:
|
|
164
|
+
return _main()
|
|
165
|
+
except HTTPError as e:
|
|
166
|
+
print(f'ERROR: {e}: {e.response.text}', file=sys.stderr, flush=True)
|
|
167
|
+
return 1
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if __name__ == '__main__':
|
|
171
|
+
sys.exit(main())
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from ..communicate import (
|
|
9
|
+
HTTPError,
|
|
10
|
+
get_paginated,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
token_name = 'DUMPTHINGS_TOKEN'
|
|
15
|
+
|
|
16
|
+
description = f"""Read paginated endpoint
|
|
17
|
+
|
|
18
|
+
This command lists all records that are available via paginated endpoints from
|
|
19
|
+
a dump-things-service, e.g., from:
|
|
20
|
+
|
|
21
|
+
https://<service-location>/<collection>/records/p/
|
|
22
|
+
|
|
23
|
+
If the environment variable "{token_name}" is set, its content will be used
|
|
24
|
+
as token to authenticate against the dump-things-service.
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _main():
|
|
30
|
+
argument_parser = argparse.ArgumentParser(
|
|
31
|
+
description=description,
|
|
32
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
33
|
+
)
|
|
34
|
+
argument_parser.add_argument('url', help='url of the paginated endpoint of the dump-things-service')
|
|
35
|
+
argument_parser.add_argument('-s', '--page-size', type=int, default=100, help='set the page size (1 - 100) (default: 100)')
|
|
36
|
+
argument_parser.add_argument('-F', '--first-page', type=int, default=1, help='the first page to return (default: 1)')
|
|
37
|
+
argument_parser.add_argument('-l', '--last-page', type=int, default=None, help='the last page to return (default: None (return all pages)')
|
|
38
|
+
argument_parser.add_argument('--stats', action='store_true', help='show information about the number of records and pages and exit, the format is is returned as [<total number of pages>, <page size>, <total number of items>]')
|
|
39
|
+
argument_parser.add_argument('-f', '--format', help='format of the output records ("json" or "ttl"). (NOTE: not all endpoints support the format parameter.)')
|
|
40
|
+
argument_parser.add_argument('-m', '--matching', help='return only records that have a matching value (use % as wildcard). (NOTE: not all endpoints and backends support matching.)')
|
|
41
|
+
argument_parser.add_argument('-P', '--pagination', action='store_true', help='show pagination information (each record from an paginated endpoint is returned as [<record>, <current page number>, <total number of pages>, <page size>, <total number of items>]')
|
|
42
|
+
|
|
43
|
+
arguments = argument_parser.parse_args()
|
|
44
|
+
|
|
45
|
+
token = os.environ.get(token_name)
|
|
46
|
+
if token is None:
|
|
47
|
+
print(f'WARNING: {token_name} not set', file=sys.stderr, flush=True)
|
|
48
|
+
|
|
49
|
+
result = get_paginated(
|
|
50
|
+
url=arguments.url,
|
|
51
|
+
first_page=arguments.first_page,
|
|
52
|
+
page_size=arguments.page_size,
|
|
53
|
+
last_page=arguments.last_page,
|
|
54
|
+
parameters={
|
|
55
|
+
'format': arguments.format,
|
|
56
|
+
**({'matching': arguments.matching}
|
|
57
|
+
if arguments.matching is not None
|
|
58
|
+
else {}
|
|
59
|
+
),
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if arguments.stats:
|
|
64
|
+
record = next(result)
|
|
65
|
+
print(json.dumps(record[2:], ensure_ascii=False))
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
if arguments.pagination:
|
|
69
|
+
for record in result:
|
|
70
|
+
print(json.dumps(record, ensure_ascii=False))
|
|
71
|
+
else:
|
|
72
|
+
for record in result:
|
|
73
|
+
print(json.dumps(record[0], ensure_ascii=False))
|
|
74
|
+
return 0
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def main():
|
|
78
|
+
try:
|
|
79
|
+
return _main()
|
|
80
|
+
except HTTPError as e:
|
|
81
|
+
print(f'ERROR: {e}: {e.response.text}', file=sys.stderr, flush=True)
|
|
82
|
+
return 1
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == '__main__':
|
|
86
|
+
sys.exit(main())
|
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from itertools import count
|
|
5
|
+
from typing import (
|
|
6
|
+
Callable,
|
|
7
|
+
Generator,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from requests.exceptions import HTTPError
|
|
12
|
+
|
|
13
|
+
from . import JSON
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
'HTTPError',
|
|
18
|
+
'JSON',
|
|
19
|
+
'get_paginated',
|
|
20
|
+
'get',
|
|
21
|
+
'collection_delete_record',
|
|
22
|
+
'collection_read_records',
|
|
23
|
+
'collection_read_records_of_class',
|
|
24
|
+
'collection_read_record_with_pid',
|
|
25
|
+
'collection_validate_record',
|
|
26
|
+
'collection_write_record',
|
|
27
|
+
'curated_delete_record',
|
|
28
|
+
'curated_read_records',
|
|
29
|
+
'curated_read_records_of_class',
|
|
30
|
+
'curated_read_record_with_pid',
|
|
31
|
+
'curated_write_record',
|
|
32
|
+
'incoming_delete_record',
|
|
33
|
+
'incoming_read_labels',
|
|
34
|
+
'incoming_read_records',
|
|
35
|
+
'incoming_read_records_of_class',
|
|
36
|
+
'incoming_read_record_with_pid',
|
|
37
|
+
'incoming_write_record',
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger('dump_things_pyclient')
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_paginated(url: str,
|
|
45
|
+
token: str | None = None,
|
|
46
|
+
first_page: int = 1,
|
|
47
|
+
page_size: int = 100,
|
|
48
|
+
last_page: int | None = None,
|
|
49
|
+
parameters: dict[str, str] | None = None,
|
|
50
|
+
) -> Generator[tuple[JSON, int, int, int, int], None, None]:
|
|
51
|
+
"""Read all records from a paginated endpoint
|
|
52
|
+
|
|
53
|
+
:param url: URL of the paginated endpoint, e.g., `https://.../records/p/`
|
|
54
|
+
:param token: [optional] if str: token to authenticate against the endpoint,
|
|
55
|
+
if None: no token will be sent to the endpoint
|
|
56
|
+
:param first_page: [optional] first page to return (default: 1)
|
|
57
|
+
:param page_size: [optional] size of pages (default: 100)
|
|
58
|
+
:param last_page: [optional] last page to return (default: None (return all pages))
|
|
59
|
+
:param parameters: [optional] parameters to pass to the endpoint, the
|
|
60
|
+
parameter `page` is set automatically in this function
|
|
61
|
+
|
|
62
|
+
:return: a Generator yielding tuples containing the current record, the
|
|
63
|
+
current page number, the total number of pages, the size of the pages,
|
|
64
|
+
and total number of records
|
|
65
|
+
"""
|
|
66
|
+
if last_page and last_page < first_page:
|
|
67
|
+
logger.warning('last_page (%d) < first_page (%d)', last_page, first_page)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
for page in count(start=first_page):
|
|
71
|
+
result = _get_page(url, token, first_page=page, page_size=page_size, parameters=parameters)
|
|
72
|
+
total_pages, page_size, total_items = result['pages'], result['size'], result['total']
|
|
73
|
+
if total_pages == 0:
|
|
74
|
+
return
|
|
75
|
+
if last_page is None:
|
|
76
|
+
last_page = total_pages
|
|
77
|
+
|
|
78
|
+
yield from (
|
|
79
|
+
(record, page, total_pages, page_size, total_items)
|
|
80
|
+
for record in result['items'])
|
|
81
|
+
|
|
82
|
+
if page == min(last_page, total_pages):
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get(url: str,
|
|
87
|
+
token: str | None = None,
|
|
88
|
+
parameters: dict[str, str] | None = None,
|
|
89
|
+
) -> JSON:
|
|
90
|
+
"""Read JSON object from a non-paginated endpoint
|
|
91
|
+
|
|
92
|
+
:param url: URL of the endpoint, e.g., `https://.../records/`.
|
|
93
|
+
:param token: [optional] if str: token to authenticate against the endpoint,
|
|
94
|
+
if None: no token will be sent to the endpoint
|
|
95
|
+
:param parameters: [optional] parameters to pass to the endpoint
|
|
96
|
+
|
|
97
|
+
:return: JSON object
|
|
98
|
+
"""
|
|
99
|
+
return _get_from_url(url, token, parameters)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def collection_read_record_with_pid(service_url: str,
|
|
103
|
+
collection: str,
|
|
104
|
+
pid: str,
|
|
105
|
+
format: str = 'json',
|
|
106
|
+
token: str | None = None,
|
|
107
|
+
) -> dict | None:
|
|
108
|
+
"""Read record with the given pid from the collection on the service
|
|
109
|
+
|
|
110
|
+
Records are read from the curated area of the collection and from the
|
|
111
|
+
incoming area of the user identified by token, if a token is given.
|
|
112
|
+
Records from incoming areas take preference.
|
|
113
|
+
|
|
114
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
115
|
+
`/<collection>/...` or `/server`
|
|
116
|
+
:param collection: the name of the collection
|
|
117
|
+
:param pid: the PID of the record that should be retrieved
|
|
118
|
+
:param format: the format in which the result record should be returned,
|
|
119
|
+
either `json` or `ttl`
|
|
120
|
+
:param token: [optional] if set, a token to authenticate against
|
|
121
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
122
|
+
|
|
123
|
+
:return: The record, if it exists, None otherwise.
|
|
124
|
+
"""
|
|
125
|
+
return get(
|
|
126
|
+
url=_build_url(service_url, collection, 'record'),
|
|
127
|
+
token=token,
|
|
128
|
+
parameters={'pid': pid, 'format': format})
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def collection_read_records(service_url: str,
|
|
132
|
+
collection: str,
|
|
133
|
+
matching: str | None = None,
|
|
134
|
+
format: str = 'json',
|
|
135
|
+
token: str | None = None,
|
|
136
|
+
page: int = 1,
|
|
137
|
+
size: int = 100,
|
|
138
|
+
last_page: int | None = None,
|
|
139
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
140
|
+
"""Read records from the collection on the service
|
|
141
|
+
|
|
142
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
143
|
+
`/<collection>/...` or `/server`
|
|
144
|
+
:param collection: the name of the collection
|
|
145
|
+
:param matching: [optional] return only records that have a matching value
|
|
146
|
+
(string comparison with `%` as wildcard)
|
|
147
|
+
:param format: the format in which the result records should be returned,
|
|
148
|
+
either `json` or `ttl` (default: `json`)
|
|
149
|
+
:param token: [optional] if set, a token to authenticate against
|
|
150
|
+
the endpoint, if None: no token will be sent to the endpoint.
|
|
151
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
152
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
153
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
154
|
+
if None, all pages following `page` will be returned
|
|
155
|
+
|
|
156
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
157
|
+
current page number, the total number of pages, the size of the
|
|
158
|
+
pages, the total number of records
|
|
159
|
+
"""
|
|
160
|
+
return get_paginated(
|
|
161
|
+
url=_build_url(service_url, collection, 'records/p/'),
|
|
162
|
+
token=token,
|
|
163
|
+
first_page=page,
|
|
164
|
+
page_size=size,
|
|
165
|
+
last_page=last_page,
|
|
166
|
+
parameters= {
|
|
167
|
+
'format': format,
|
|
168
|
+
**({'matching': matching} if matching else {})})
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def collection_read_records_of_class(
|
|
172
|
+
service_url: str,
|
|
173
|
+
collection: str,
|
|
174
|
+
class_name: str,
|
|
175
|
+
matching: str | None = None,
|
|
176
|
+
format: str = 'json',
|
|
177
|
+
token: str | None = None,
|
|
178
|
+
page: int = 1,
|
|
179
|
+
size: int = 100,
|
|
180
|
+
last_page: int | None = None,
|
|
181
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
182
|
+
"""Read records of the specified class from the collection on the service
|
|
183
|
+
|
|
184
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
185
|
+
`/<collection>/...` or `/server`
|
|
186
|
+
:param collection: the name of the collection
|
|
187
|
+
:param class_name: the name of the class whose instances should be returned
|
|
188
|
+
:param matching: [optional] return only records that have a matching value
|
|
189
|
+
(string comparison with `%` as wildcard)
|
|
190
|
+
:param format: the format in which the result records should be returned,
|
|
191
|
+
either `json` or `ttl` (default: `json`)
|
|
192
|
+
:param token: [optional] if set, a token to authenticate against
|
|
193
|
+
the endpoint, if None: no token will be sent to the endpoint.
|
|
194
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
195
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
196
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
197
|
+
if None, all pages following `page` will be returned
|
|
198
|
+
|
|
199
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
200
|
+
current page number, the total number of pages, the size of the
|
|
201
|
+
pages, the total number of records
|
|
202
|
+
"""
|
|
203
|
+
return get_paginated(
|
|
204
|
+
url=_build_url(service_url, collection, f'records/p/{class_name}'),
|
|
205
|
+
token=token,
|
|
206
|
+
first_page=page,
|
|
207
|
+
page_size=size,
|
|
208
|
+
last_page=last_page,
|
|
209
|
+
parameters= {
|
|
210
|
+
'format': format,
|
|
211
|
+
**({'matching': matching} if matching else {})})
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def collection_write_record(
|
|
215
|
+
service_url: str,
|
|
216
|
+
collection: str,
|
|
217
|
+
class_name: str,
|
|
218
|
+
record: dict | str,
|
|
219
|
+
format: str = 'json',
|
|
220
|
+
token: str | None = None,
|
|
221
|
+
) -> list[JSON]:
|
|
222
|
+
"""Write a record of the specified class to an inbox in the collection on the service
|
|
223
|
+
|
|
224
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
225
|
+
`/<collection>/...` or `/server`
|
|
226
|
+
:param collection: the name of the collection
|
|
227
|
+
:param class_name: the class of the record given in `record`
|
|
228
|
+
:param record: dict | str: the record that should be written
|
|
229
|
+
:param format: the format of `record`, either `json` or `ttl`
|
|
230
|
+
(default: `json`)
|
|
231
|
+
:param token: [optional] if set, a token to authenticate against
|
|
232
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
233
|
+
The token must have write access to incoming area in the collection
|
|
234
|
+
|
|
235
|
+
:return list[JSON]: a list of records that was written. There might be more
|
|
236
|
+
than one record due to inlined-relations extraction. The individual
|
|
237
|
+
records might have annotations added
|
|
238
|
+
"""
|
|
239
|
+
return _post_to_url(
|
|
240
|
+
url=_build_url(service_url, collection, f'record/{class_name}'),
|
|
241
|
+
token=token,
|
|
242
|
+
json=record,
|
|
243
|
+
params={'format': format})
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def collection_validate_record(
|
|
247
|
+
service_url: str,
|
|
248
|
+
collection: str,
|
|
249
|
+
class_name: str,
|
|
250
|
+
record: dict | str,
|
|
251
|
+
format: str = 'json',
|
|
252
|
+
token: str | None = None,
|
|
253
|
+
) -> list[JSON]:
|
|
254
|
+
"""Validate a record of the specified class in the collection on the service
|
|
255
|
+
|
|
256
|
+
Validation involves conversion of the record from json to ttl, or from
|
|
257
|
+
ttl to json.
|
|
258
|
+
|
|
259
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
260
|
+
`/<collection>/...` or `/server`
|
|
261
|
+
:param collection: the name of the collection
|
|
262
|
+
:param class_name: the class of the record given in `record`
|
|
263
|
+
:param record: dict | str: the record that should be validated
|
|
264
|
+
:param format: the format of `record`, either `json` or `ttl`
|
|
265
|
+
(default: `json`)
|
|
266
|
+
:param token: [optional] if set, a token to authenticate against
|
|
267
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
268
|
+
The token must have write access to incoming area in the collection
|
|
269
|
+
|
|
270
|
+
:return: True
|
|
271
|
+
"""
|
|
272
|
+
service_url = f'{service_url[:-1]}' if service_url.endswith('/') else service_url
|
|
273
|
+
return _post_to_url(
|
|
274
|
+
url=_build_url(service_url, collection, f'validate/{class_name}'),
|
|
275
|
+
token=token,
|
|
276
|
+
json=record,
|
|
277
|
+
params={'format': format})
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def collection_delete_record(
|
|
281
|
+
service_url: str,
|
|
282
|
+
collection: str,
|
|
283
|
+
pid: str,
|
|
284
|
+
token: str | None = None,
|
|
285
|
+
) -> bool:
|
|
286
|
+
"""Delete the record with the given pid from the collection on the service
|
|
287
|
+
|
|
288
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
289
|
+
`/<collection>/...` or `/server`
|
|
290
|
+
:param collection: the name of the collection
|
|
291
|
+
:param pid: the PID of the record that should be deleted
|
|
292
|
+
:param token: [optional] if set, a token to authenticate against
|
|
293
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
294
|
+
|
|
295
|
+
:return: True if the record was deleted, False otherwise
|
|
296
|
+
"""
|
|
297
|
+
return _delete_url(
|
|
298
|
+
url=_build_url(service_url, collection, 'record'),
|
|
299
|
+
token=token,
|
|
300
|
+
params={'pid': pid})
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def curated_read_record_with_pid(service_url: str,
|
|
304
|
+
collection: str,
|
|
305
|
+
pid: str,
|
|
306
|
+
token: str | None = None,
|
|
307
|
+
) -> dict | None:
|
|
308
|
+
"""Read record with the given pid from curated area of the collection on the service
|
|
309
|
+
|
|
310
|
+
The record will be returned as it is stored in the backend. That means
|
|
311
|
+
there is no "Schema-Type-Layer" involved.
|
|
312
|
+
|
|
313
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
314
|
+
`/<collection>/...` or `/server`
|
|
315
|
+
:param collection: the name of the collection
|
|
316
|
+
:param pid: the PID of the record that should be retrieved
|
|
317
|
+
:param token: [optional] if set, a token to authenticate against
|
|
318
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
319
|
+
token must have curator-rights
|
|
320
|
+
|
|
321
|
+
:return: The record, if it exists, None otherwise
|
|
322
|
+
"""
|
|
323
|
+
return get(
|
|
324
|
+
url=_build_url(service_url, collection, 'curated/record'),
|
|
325
|
+
token=token,
|
|
326
|
+
parameters={'pid': pid})
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def curated_read_records(service_url: str,
|
|
330
|
+
collection: str,
|
|
331
|
+
matching: str | None = None,
|
|
332
|
+
token: str | None = None,
|
|
333
|
+
page: int = 1,
|
|
334
|
+
size: int = 100,
|
|
335
|
+
last_page: int | None = None,
|
|
336
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
337
|
+
"""Read records from the curated area the collection on the service
|
|
338
|
+
|
|
339
|
+
Records will be returned as they are stored in the backend. That means
|
|
340
|
+
there is no "Schema-Type-Layer" involved.
|
|
341
|
+
|
|
342
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
343
|
+
`/<collection>/...` or `/server`
|
|
344
|
+
:param collection: the name of the collection
|
|
345
|
+
:param matching: [optional] return only records that have a matching value
|
|
346
|
+
(string comparison with `%` as wildcard)
|
|
347
|
+
:param token: [optional] if set, a token to authenticate against
|
|
348
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
349
|
+
token must have curator-rights
|
|
350
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
351
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
352
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
353
|
+
if None, all pages following `page` will be returned
|
|
354
|
+
|
|
355
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
356
|
+
current page number, the total number of pages, the size of the
|
|
357
|
+
pages, the total number of records
|
|
358
|
+
"""
|
|
359
|
+
return get_paginated(
|
|
360
|
+
url=_build_url(service_url, collection, 'curated/records/p/'),
|
|
361
|
+
token=token,
|
|
362
|
+
first_page=page,
|
|
363
|
+
page_size=size,
|
|
364
|
+
last_page=last_page,
|
|
365
|
+
parameters={'matching': matching} if matching else {})
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def curated_read_records_of_class(
|
|
369
|
+
service_url: str,
|
|
370
|
+
collection: str,
|
|
371
|
+
class_name: str,
|
|
372
|
+
matching: str | None = None,
|
|
373
|
+
token: str | None = None,
|
|
374
|
+
page: int = 1,
|
|
375
|
+
size: int = 100,
|
|
376
|
+
last_page: int | None = None,
|
|
377
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
378
|
+
"""Read records of class `class_name` from the curated area the collection on the service
|
|
379
|
+
|
|
380
|
+
Records will be returned as they are stored in the backend. That means
|
|
381
|
+
there is no "Schema-Type-Layer" involved.
|
|
382
|
+
|
|
383
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
384
|
+
`/<collection>/...` or `/server`
|
|
385
|
+
:param collection: the name of the collection
|
|
386
|
+
:param class_name: the name of the class whose instances should be returned
|
|
387
|
+
:param matching: [optional] return only records that have a matching value
|
|
388
|
+
(string comparison with `%` as wildcard)
|
|
389
|
+
:param token: [optional] if set, a token to authenticate against
|
|
390
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
391
|
+
token must have curator-rights for the collection
|
|
392
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
393
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
394
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
395
|
+
if None, all pages following `page` will be returned
|
|
396
|
+
|
|
397
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
398
|
+
current page number, the total number of pages, the size of the
|
|
399
|
+
pages, the total number of records
|
|
400
|
+
"""
|
|
401
|
+
return get_paginated(
|
|
402
|
+
url=_build_url(service_url, collection, f'curated/records/p/{class_name}'),
|
|
403
|
+
token=token,
|
|
404
|
+
first_page=page,
|
|
405
|
+
page_size=size,
|
|
406
|
+
last_page=last_page,
|
|
407
|
+
parameters={'matching': matching} if matching else {})
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def curated_write_record(
|
|
411
|
+
service_url: str,
|
|
412
|
+
collection: str,
|
|
413
|
+
class_name: str,
|
|
414
|
+
record: dict,
|
|
415
|
+
token: str | None = None,
|
|
416
|
+
) -> list[JSON]:
|
|
417
|
+
"""Write a record of the specified class to the curated area of the collection on the service
|
|
418
|
+
|
|
419
|
+
Records will be written without modification, i.e. there is no
|
|
420
|
+
"Schema-Type-Layer", there is no extraction of inlined records, and there
|
|
421
|
+
is no annotation-adding.
|
|
422
|
+
|
|
423
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
424
|
+
`/<collection>/...` or `/server`
|
|
425
|
+
:param collection: the name of the collection
|
|
426
|
+
:param class_name: the class of the record given in `record`
|
|
427
|
+
:param record: dict: the record that should be written
|
|
428
|
+
:param token: [optional] if set, a token to authenticate against
|
|
429
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
430
|
+
A given token must have curator-rights for the collection
|
|
431
|
+
|
|
432
|
+
:return list[JSON]: a list containing the record that was written
|
|
433
|
+
"""
|
|
434
|
+
return _post_to_url(
|
|
435
|
+
url=_build_url(service_url, collection, f'curated/record/{class_name}'),
|
|
436
|
+
token=token,
|
|
437
|
+
json=record)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def curated_delete_record(
|
|
441
|
+
service_url: str,
|
|
442
|
+
collection: str,
|
|
443
|
+
pid: str,
|
|
444
|
+
token: str | None = None,
|
|
445
|
+
) -> bool:
|
|
446
|
+
"""Delete the record with the given pid from the curated area of the collection on the service
|
|
447
|
+
|
|
448
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
449
|
+
`/<collection>/...` or `/server`
|
|
450
|
+
:param collection: the name of the collection
|
|
451
|
+
:param pid: the PID of the record that should be deleted
|
|
452
|
+
:param token: [optional] if set, a token to authenticate against
|
|
453
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
454
|
+
A given token must have curator-rights for the collection
|
|
455
|
+
:return: True if the record was deleted, False otherwise
|
|
456
|
+
"""
|
|
457
|
+
return _delete_url(
|
|
458
|
+
url=_build_url(service_url, collection, 'curated/record'),
|
|
459
|
+
token=token,
|
|
460
|
+
params={'pid': pid})
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def incoming_read_labels(service_url: str,
|
|
464
|
+
collection: str,
|
|
465
|
+
token: str | None = None,
|
|
466
|
+
) -> Generator[str, None, None]:
|
|
467
|
+
"""Read all incoming labels for the collection on the service.
|
|
468
|
+
|
|
469
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
470
|
+
`/<collection>/...` or `/server`
|
|
471
|
+
:param collection: the name of the collection
|
|
472
|
+
:param token: [optional] if set, a token to authenticate against
|
|
473
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
474
|
+
A given token must have curator-rights for the collection
|
|
475
|
+
|
|
476
|
+
:return: list[str]: a list of incoming area labels
|
|
477
|
+
"""
|
|
478
|
+
yield from _get_from_url(
|
|
479
|
+
url=_build_url(service_url, collection,'incoming/'),
|
|
480
|
+
token=token)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def incoming_read_record_with_pid(service_url: str,
|
|
484
|
+
collection: str,
|
|
485
|
+
label: str,
|
|
486
|
+
pid: str,
|
|
487
|
+
token: str | None = None,
|
|
488
|
+
) -> dict | None:
|
|
489
|
+
"""Read record with the given pid from the specified incoming area of the collection on the service
|
|
490
|
+
|
|
491
|
+
The record will be returned as it is stored in the backend. That means
|
|
492
|
+
there is no "Schema-Type-Layer" involved.
|
|
493
|
+
|
|
494
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
495
|
+
`/<collection>/...` or `/server`
|
|
496
|
+
:param collection: the name of the collection
|
|
497
|
+
:param label: the label of the incoming area in the collection
|
|
498
|
+
:param pid: the PID of the record that should be retrieved
|
|
499
|
+
:param token: [optional] if set, a token to authenticate against
|
|
500
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
501
|
+
token must have curator-rights
|
|
502
|
+
|
|
503
|
+
:return: The record, if it exists, None otherwise
|
|
504
|
+
"""
|
|
505
|
+
return get(
|
|
506
|
+
url=_build_incoming_url(service_url, collection, label, 'record'),
|
|
507
|
+
token=token,
|
|
508
|
+
parameters={'pid': pid})
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def incoming_read_records(service_url: str,
|
|
512
|
+
collection: str,
|
|
513
|
+
label: str,
|
|
514
|
+
matching: str | None = None,
|
|
515
|
+
token: str | None = None,
|
|
516
|
+
page: int = 1,
|
|
517
|
+
size: int = 100,
|
|
518
|
+
last_page: int | None = None,
|
|
519
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
520
|
+
"""Read records from the specified incoming area the collection on the service
|
|
521
|
+
|
|
522
|
+
Records will be returned as they are stored in the backend. That means
|
|
523
|
+
there is no "Schema-Type-Layer" involved.
|
|
524
|
+
|
|
525
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
526
|
+
`/<collection>/...` or `/server`
|
|
527
|
+
:param collection: the name of the collection
|
|
528
|
+
:param label: the label of the incoming area in the collection
|
|
529
|
+
:param matching: [optional] return only records that have a matching value
|
|
530
|
+
(string comparison with `%` as wildcard)
|
|
531
|
+
:param token: [optional] if set, a token to authenticate against
|
|
532
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
533
|
+
token must have curator-rights for the collection
|
|
534
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
535
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
536
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
537
|
+
if None, all pages following `page` will be returned
|
|
538
|
+
|
|
539
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
540
|
+
current page number, the total number of pages, the size of the
|
|
541
|
+
pages, the total number of records
|
|
542
|
+
"""
|
|
543
|
+
return get_paginated(
|
|
544
|
+
url=_build_incoming_url(service_url, collection, label,'records/p/'),
|
|
545
|
+
token=token,
|
|
546
|
+
first_page=page,
|
|
547
|
+
page_size=size,
|
|
548
|
+
last_page=last_page,
|
|
549
|
+
parameters={'matching': matching} if matching else {})
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def incoming_read_records_of_class(
|
|
553
|
+
service_url: str,
|
|
554
|
+
collection: str,
|
|
555
|
+
label: str,
|
|
556
|
+
class_name: str,
|
|
557
|
+
matching: str | None = None,
|
|
558
|
+
token: str | None = None,
|
|
559
|
+
page: int = 1,
|
|
560
|
+
size: int = 100,
|
|
561
|
+
last_page: int | None = None,
|
|
562
|
+
) -> Generator[tuple[dict, int, int, int, int], None, None]:
|
|
563
|
+
"""Read records of the specified class from the specified incoming area the collection on the service
|
|
564
|
+
|
|
565
|
+
Records will be returned as they are stored in the backend. That means
|
|
566
|
+
there is no "Schema-Type-Layer" involved.
|
|
567
|
+
|
|
568
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
569
|
+
`/<collection>/...` or `/server`
|
|
570
|
+
:param collection: the name of the collection
|
|
571
|
+
:param label: the label of the incoming area in the collection
|
|
572
|
+
:param class_name: the name of the class whose instances should be returned
|
|
573
|
+
:param matching: [optional] return only records that have a matching value
|
|
574
|
+
(string comparison with `%` as wildcard)
|
|
575
|
+
:param token: [optional] if set, a token to authenticate against
|
|
576
|
+
the endpoint, if None: no token will be sent to the endpoint. A
|
|
577
|
+
token must have curator-rights for the collection
|
|
578
|
+
:param page: int: the first page that should be returned (default: 1)
|
|
579
|
+
:param size: int: the number of records in an individual pages (default: 100)
|
|
580
|
+
:param last_page: int | None: if int, the last page that should be returned
|
|
581
|
+
if None, all pages following `page` will be returned
|
|
582
|
+
|
|
583
|
+
:return: A generator yielding tuples containing: the current record, the
|
|
584
|
+
current page number, the total number of pages, the size of the
|
|
585
|
+
pages, the total number of records
|
|
586
|
+
"""
|
|
587
|
+
return get_paginated(
|
|
588
|
+
url=_build_incoming_url(service_url, collection, label,f'records/p/{class_name}'),
|
|
589
|
+
token=token,
|
|
590
|
+
first_page=page,
|
|
591
|
+
page_size=size,
|
|
592
|
+
last_page=last_page,
|
|
593
|
+
parameters={'matching': matching} if matching else {})
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def incoming_write_record(
|
|
597
|
+
service_url: str,
|
|
598
|
+
collection: str,
|
|
599
|
+
label: str,
|
|
600
|
+
class_name: str,
|
|
601
|
+
record: dict,
|
|
602
|
+
token: str | None = None,
|
|
603
|
+
) -> list[JSON]:
|
|
604
|
+
"""Write a record of the specified class to the specified incoming area of the collection on the service
|
|
605
|
+
|
|
606
|
+
Records will be written without modification, i.e. there is no
|
|
607
|
+
"Schema-Type-Layer", there is no extraction of inlined records, and there
|
|
608
|
+
is no annotation-adding.
|
|
609
|
+
|
|
610
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
611
|
+
`/<collection>/...` or `/server`
|
|
612
|
+
:param collection: the name of the collection
|
|
613
|
+
:param label: the label of the incoming area in the collection
|
|
614
|
+
:param class_name: the class of the record given in `record`
|
|
615
|
+
:param record: dict: the record that should be written
|
|
616
|
+
:param token: [optional] if set, a token to authenticate against
|
|
617
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
618
|
+
A given token must have curator-rights for the collection
|
|
619
|
+
|
|
620
|
+
:return list[JSON]: a list containing the record that was written
|
|
621
|
+
"""
|
|
622
|
+
return _post_to_url(
|
|
623
|
+
url=_build_incoming_url(service_url, collection, label, f'record/{class_name}'),
|
|
624
|
+
token=token,
|
|
625
|
+
json=record)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def incoming_delete_record(
|
|
629
|
+
service_url: str,
|
|
630
|
+
collection: str,
|
|
631
|
+
label: str,
|
|
632
|
+
pid: str,
|
|
633
|
+
token: str | None = None,
|
|
634
|
+
) -> bool:
|
|
635
|
+
"""Delete the record with the given pid from the specified incoming area of the collection on the service
|
|
636
|
+
|
|
637
|
+
:param service_url: the base URL of the service, i.e., the URL up to
|
|
638
|
+
`/<collection>/...` or `/server`
|
|
639
|
+
:param collection: the name of the collection
|
|
640
|
+
:param label: the label of the incoming area in the collection
|
|
641
|
+
:param pid: the PID of the record that should be deleted
|
|
642
|
+
:param token: [optional] if set, a token to authenticate against
|
|
643
|
+
the endpoint, if None: no token will be sent to the endpoint
|
|
644
|
+
A given token must have curator-rights for the collection
|
|
645
|
+
|
|
646
|
+
:return: True if the record was deleted, False otherwise
|
|
647
|
+
"""
|
|
648
|
+
return _delete_url(
|
|
649
|
+
url=_build_incoming_url(service_url, collection, label,'record'),
|
|
650
|
+
token=token,
|
|
651
|
+
params={'pid': pid})
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _get_from_url(url: str,
|
|
655
|
+
token: str | None,
|
|
656
|
+
params: dict[str, str] | None = None,
|
|
657
|
+
) -> JSON:
|
|
658
|
+
return _do_request(requests.get, url, token, params=params)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _post_to_url(url: str,
|
|
662
|
+
token: str | None,
|
|
663
|
+
json: JSON,
|
|
664
|
+
params: dict[str, str] | None = None,
|
|
665
|
+
) -> JSON:
|
|
666
|
+
return _do_request(requests.post, url, token, params, json=json)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _delete_url(url: str,
|
|
670
|
+
token: str | None,
|
|
671
|
+
params: dict[str, str] | None = None,
|
|
672
|
+
) -> JSON:
|
|
673
|
+
return _do_request(requests.delete, url, token, params=params)
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def _do_request(method: Callable,
|
|
677
|
+
url: str,
|
|
678
|
+
token: str | object | None,
|
|
679
|
+
params: dict[str, str] | None,
|
|
680
|
+
**kwargs,
|
|
681
|
+
) -> JSON:
|
|
682
|
+
headers = {'x-dumpthings-token': token} if token is not None else {}
|
|
683
|
+
response = method(url, headers=headers, params=params or {}, **kwargs)
|
|
684
|
+
response.raise_for_status()
|
|
685
|
+
if response.headers.get('content-type', '').strip().startswith('text/turtle'):
|
|
686
|
+
return response.text
|
|
687
|
+
return response.json()
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def _build_url(
|
|
691
|
+
service_url: str,
|
|
692
|
+
collection: str,
|
|
693
|
+
tail: str,
|
|
694
|
+
) -> str:
|
|
695
|
+
service_url = f'{service_url[:-1]}' if service_url.endswith('/') else service_url
|
|
696
|
+
collection = f'{collection[:-1]}' if collection.endswith('/') else collection
|
|
697
|
+
return f'{service_url}/{collection}/{tail}'
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def _build_incoming_url(
|
|
701
|
+
service_url: str,
|
|
702
|
+
collection: str,
|
|
703
|
+
label: str,
|
|
704
|
+
tail: str,
|
|
705
|
+
) -> str:
|
|
706
|
+
label = f'{label[:-1]}' if label.endswith('/') else label
|
|
707
|
+
return _build_url(service_url, collection, f'incoming/{label}/{tail}')
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _get_page(url_base: str,
|
|
711
|
+
token: str | None = None,
|
|
712
|
+
first_page: int = 1,
|
|
713
|
+
page_size: int = 100,
|
|
714
|
+
parameters: dict | None = None,
|
|
715
|
+
) -> JSON:
|
|
716
|
+
parameters = parameters or {}
|
|
717
|
+
parameters['page'] = first_page
|
|
718
|
+
parameters['size'] = page_size
|
|
719
|
+
return _get_from_url(url_base, token, parameters)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dump-things-pyclient
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A client library and some CLI command for dump-things-services
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: requests>=2.32.5
|
|
8
|
+
|
|
9
|
+
# Dump Things Python Client
|
|
10
|
+
|
|
11
|
+
A simple client library for dump-things-service in Python
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## Tech Stack
|
|
15
|
+
|
|
16
|
+
- Python >= 3.11
|
|
17
|
+
|
|
18
|
+
- [uv](https://astral.sh/uv) for dependency management
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## Acknowledgements
|
|
22
|
+
|
|
23
|
+
This work was funded, in part, by:
|
|
24
|
+
|
|
25
|
+
- Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant TRR 379 (546006540, Q02 project)
|
|
26
|
+
|
|
27
|
+
- MKW-NRW: Ministerium für Kultur und Wissenschaft des Landes Nordrhein-Westfalen under the Kooperationsplattformen 2022 program, grant number: KP22-106A
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
dump_things_pyclient/__init__.py
|
|
4
|
+
dump_things_pyclient/communicate.py
|
|
5
|
+
dump_things_pyclient.egg-info/PKG-INFO
|
|
6
|
+
dump_things_pyclient.egg-info/SOURCES.txt
|
|
7
|
+
dump_things_pyclient.egg-info/dependency_links.txt
|
|
8
|
+
dump_things_pyclient.egg-info/entry_points.txt
|
|
9
|
+
dump_things_pyclient.egg-info/requires.txt
|
|
10
|
+
dump_things_pyclient.egg-info/top_level.txt
|
|
11
|
+
dump_things_pyclient/commands/__init__.py
|
|
12
|
+
dump_things_pyclient/commands/auto_curate.py
|
|
13
|
+
dump_things_pyclient/commands/get_records.py
|
|
14
|
+
dump_things_pyclient/commands/read_pages.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests>=2.32.5
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dump_things_pyclient
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dump-things-pyclient"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A client library and some CLI command for dump-things-services"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"requests>=2.32.5",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[dependency-groups]
|
|
12
|
+
tests = [
|
|
13
|
+
"pytest>=9.0.1",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
auto-curate = "dump_things_pyclient.commands.auto_curate:main"
|
|
18
|
+
read-pages = "dump_things_pyclient.commands.read_pages:main"
|
|
19
|
+
get-records = "dump_things_pyclient.commands.get_records:main"
|