ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +8 -6
- ocrd/cli/bashlib.py +8 -114
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/mets_server.py +11 -15
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
- ocrd-3.7.0.dist-info/RECORD +123 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +31 -33
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +5 -5
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +15 -7
- ocrd_utils/os.py +17 -4
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd/cli/log.py +0 -51
- ocrd/lib.bash +0 -317
- ocrd-3.5.1.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd_network/cli/client.py
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
+
import sys
|
|
1
2
|
import click
|
|
2
3
|
from json import dumps
|
|
3
4
|
from typing import List, Optional, Tuple
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
from tempfile import NamedTemporaryFile
|
|
7
|
+
|
|
4
8
|
from ocrd.decorators.parameter_option import parameter_option, parameter_override_option
|
|
5
9
|
from ocrd_network.constants import JobState
|
|
6
10
|
from ocrd_utils import DEFAULT_METS_BASENAME
|
|
7
11
|
from ocrd_utils.introspect import set_json_key_value_overrides
|
|
8
12
|
from ocrd_utils.str import parse_json_string_or_file
|
|
9
13
|
from ..client import Client
|
|
14
|
+
from requests import RequestException
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
ADDRESS_HELP = 'The URL of the Processing Server. If not provided, ' + \
|
|
18
|
+
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" environment variable is used by default'
|
|
10
19
|
|
|
11
20
|
|
|
21
|
+
class URLType(click.types.StringParamType):
|
|
22
|
+
name = "url"
|
|
23
|
+
def convert(self, value, param, ctx):
|
|
24
|
+
try:
|
|
25
|
+
parsed = urlparse(value)
|
|
26
|
+
if parsed.scheme not in ("http", "https"):
|
|
27
|
+
self.fail(f"invalid URL scheme ({parsed.scheme}): only HTTP allowed",
|
|
28
|
+
param, ctx)
|
|
29
|
+
return value
|
|
30
|
+
except ValueError as err:
|
|
31
|
+
self.fail(err, param, ctx)
|
|
32
|
+
URL = URLType()
|
|
33
|
+
|
|
12
34
|
@click.group('client')
|
|
13
35
|
def client_cli():
|
|
14
36
|
"""
|
|
@@ -27,30 +49,40 @@ def discovery_cli():
|
|
|
27
49
|
|
|
28
50
|
|
|
29
51
|
@discovery_cli.command('processors')
|
|
30
|
-
@click.option('--address',
|
|
31
|
-
help='The address of the Processing Server. If not provided, '
|
|
32
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
52
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
33
53
|
def check_deployed_processors(address: Optional[str]):
|
|
34
54
|
"""
|
|
35
|
-
Get a list of deployed processing workers
|
|
55
|
+
Get a list of deployed processing workers.
|
|
36
56
|
Each processor is shown only once regardless of the amount of deployed instances.
|
|
37
57
|
"""
|
|
38
58
|
client = Client(server_addr_processing=address)
|
|
39
|
-
|
|
59
|
+
try:
|
|
60
|
+
processors_list = client.check_deployed_processors()
|
|
61
|
+
except RequestException as e:
|
|
62
|
+
print(
|
|
63
|
+
getattr(e, 'detail_message', str(e)),
|
|
64
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
65
|
+
)
|
|
66
|
+
sys.exit(1)
|
|
40
67
|
print(dumps(processors_list, indent=4))
|
|
41
68
|
|
|
42
69
|
|
|
43
70
|
@discovery_cli.command('processor')
|
|
44
|
-
@click.option('--address',
|
|
45
|
-
help='The address of the Processing Server. If not provided, '
|
|
46
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
71
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
47
72
|
@click.argument('processor_name', required=True, type=click.STRING)
|
|
48
73
|
def check_processor_ocrd_tool(address: Optional[str], processor_name: str):
|
|
49
74
|
"""
|
|
50
75
|
Get the json tool of a deployed processor specified with `processor_name`
|
|
51
76
|
"""
|
|
52
77
|
client = Client(server_addr_processing=address)
|
|
53
|
-
|
|
78
|
+
try:
|
|
79
|
+
ocrd_tool = client.check_deployed_processor_ocrd_tool(processor_name=processor_name)
|
|
80
|
+
except RequestException as e:
|
|
81
|
+
print(
|
|
82
|
+
getattr(e, 'detail_message', str(e)),
|
|
83
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
84
|
+
)
|
|
85
|
+
sys.exit(1)
|
|
54
86
|
print(dumps(ocrd_tool, indent=4))
|
|
55
87
|
|
|
56
88
|
|
|
@@ -63,39 +95,46 @@ def processing_cli():
|
|
|
63
95
|
|
|
64
96
|
|
|
65
97
|
@processing_cli.command('check-log')
|
|
66
|
-
@click.option('--address',
|
|
67
|
-
help='The address of the Processing Server. If not provided, '
|
|
68
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
98
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
69
99
|
@click.option('-j', '--processing-job-id', required=True)
|
|
70
|
-
def
|
|
100
|
+
def check_processing_job_log(address: Optional[str], processing_job_id: str):
|
|
71
101
|
"""
|
|
72
102
|
Check the log of a previously submitted processing job.
|
|
73
103
|
"""
|
|
74
104
|
client = Client(server_addr_processing=address)
|
|
75
|
-
|
|
105
|
+
try:
|
|
106
|
+
response = client.check_job_log(job_id=processing_job_id)
|
|
107
|
+
except RequestException as e:
|
|
108
|
+
print(
|
|
109
|
+
getattr(e, 'detail_message', str(e)),
|
|
110
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
111
|
+
)
|
|
112
|
+
sys.exit(1)
|
|
76
113
|
print(response._content.decode(encoding='utf-8'))
|
|
77
114
|
|
|
78
115
|
|
|
79
116
|
@processing_cli.command('check-status')
|
|
80
|
-
@click.option('--address',
|
|
81
|
-
help='The address of the Processing Server. If not provided, '
|
|
82
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
117
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
83
118
|
@click.option('-j', '--processing-job-id', required=True)
|
|
84
119
|
def check_processing_job_status(address: Optional[str], processing_job_id: str):
|
|
85
120
|
"""
|
|
86
121
|
Check the status of a previously submitted processing job.
|
|
87
122
|
"""
|
|
88
123
|
client = Client(server_addr_processing=address)
|
|
89
|
-
|
|
90
|
-
|
|
124
|
+
try:
|
|
125
|
+
job_status = client.check_job_status(processing_job_id)
|
|
126
|
+
except RequestException as e:
|
|
127
|
+
print(
|
|
128
|
+
getattr(e, 'detail_message', str(e)),
|
|
129
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
130
|
+
)
|
|
131
|
+
sys.exit(1)
|
|
91
132
|
print(f"Processing job status: {job_status}")
|
|
92
133
|
|
|
93
134
|
|
|
94
135
|
@processing_cli.command('run')
|
|
95
136
|
@click.argument('processor_name', required=True, type=click.STRING)
|
|
96
|
-
@click.option('--address',
|
|
97
|
-
help='The address of the Processing Server. If not provided, '
|
|
98
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
137
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
99
138
|
@click.option('-m', '--mets', required=True, default=DEFAULT_METS_BASENAME)
|
|
100
139
|
@click.option('-I', '--input-file-grp', default='OCR-D-INPUT')
|
|
101
140
|
@click.option('-O', '--output-file-grp', default='OCR-D-OUTPUT')
|
|
@@ -104,7 +143,6 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
|
|
|
104
143
|
@parameter_override_option
|
|
105
144
|
@click.option('--result-queue-name')
|
|
106
145
|
@click.option('--callback-url')
|
|
107
|
-
@click.option('--agent-type', default='worker')
|
|
108
146
|
@click.option('-b', '--block', default=False, is_flag=True,
|
|
109
147
|
help='If set, the client will block till job timeout, fail or success.')
|
|
110
148
|
@click.option('-p', '--print-state', default=False, is_flag=True,
|
|
@@ -120,9 +158,6 @@ def send_processing_job_request(
|
|
|
120
158
|
parameter_override: List[Tuple[str, str]],
|
|
121
159
|
result_queue_name: Optional[str],
|
|
122
160
|
callback_url: Optional[str],
|
|
123
|
-
# TODO: This is temporally available to toggle
|
|
124
|
-
# between the ProcessingWorker/ProcessorServer
|
|
125
|
-
agent_type: Optional[str],
|
|
126
161
|
block: Optional[bool],
|
|
127
162
|
print_state: Optional[bool]
|
|
128
163
|
):
|
|
@@ -133,7 +168,6 @@ def send_processing_job_request(
|
|
|
133
168
|
"path_to_mets": mets,
|
|
134
169
|
"description": "OCR-D Network client request",
|
|
135
170
|
"input_file_grps": input_file_grp.split(','),
|
|
136
|
-
"agent_type": agent_type
|
|
137
171
|
}
|
|
138
172
|
if output_file_grp:
|
|
139
173
|
req_params["output_file_grps"] = output_file_grp.split(',')
|
|
@@ -145,12 +179,26 @@ def send_processing_job_request(
|
|
|
145
179
|
if callback_url:
|
|
146
180
|
req_params["callback_url"] = callback_url
|
|
147
181
|
client = Client(server_addr_processing=address)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
182
|
+
try:
|
|
183
|
+
processing_job_id = client.send_processing_job_request(
|
|
184
|
+
processor_name=processor_name, req_params=req_params)
|
|
185
|
+
except RequestException as e:
|
|
186
|
+
print(
|
|
187
|
+
getattr(e, 'detail_message', str(e)),
|
|
188
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
189
|
+
)
|
|
190
|
+
sys.exit(1)
|
|
151
191
|
print(f"Processing job id: {processing_job_id}")
|
|
192
|
+
|
|
152
193
|
if block:
|
|
153
|
-
|
|
194
|
+
try:
|
|
195
|
+
client.poll_job_status(job_id=processing_job_id, print_state=print_state)
|
|
196
|
+
except RequestException as e:
|
|
197
|
+
print(
|
|
198
|
+
getattr(e, 'detail_message', str(e)),
|
|
199
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
200
|
+
)
|
|
201
|
+
sys.exit(1)
|
|
154
202
|
|
|
155
203
|
|
|
156
204
|
@client_cli.group('workflow')
|
|
@@ -162,58 +210,82 @@ def workflow_cli():
|
|
|
162
210
|
|
|
163
211
|
|
|
164
212
|
@workflow_cli.command('check-status')
|
|
165
|
-
@click.option('--address',
|
|
166
|
-
'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
|
|
213
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
167
214
|
@click.option('-j', '--workflow-job-id', required=True)
|
|
168
215
|
def check_workflow_job_status(address: Optional[str], workflow_job_id: str):
|
|
169
216
|
"""
|
|
170
217
|
Check the status of a previously submitted workflow job.
|
|
171
218
|
"""
|
|
172
219
|
client = Client(server_addr_processing=address)
|
|
173
|
-
|
|
174
|
-
|
|
220
|
+
try:
|
|
221
|
+
job_status = client.check_workflow_status(workflow_job_id)
|
|
222
|
+
except RequestException as e:
|
|
223
|
+
print(
|
|
224
|
+
getattr(e, 'detail_message', str(e)),
|
|
225
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
226
|
+
)
|
|
227
|
+
sys.exit(1)
|
|
175
228
|
print(f"Workflow job status: {job_status}")
|
|
176
229
|
|
|
177
230
|
|
|
178
231
|
@workflow_cli.command('run')
|
|
179
|
-
@click.option('--address',
|
|
180
|
-
|
|
181
|
-
@click.option('-
|
|
182
|
-
@click.option('-
|
|
183
|
-
@click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
|
|
232
|
+
@click.option('--address', type=URL, help=ADDRESS_HELP)
|
|
233
|
+
@click.option('-m', '--path-to-mets', required=True, help="path to METS file of workspace to be processed (server-side path)")
|
|
234
|
+
@click.option('-w', '--path-to-workflow', required=False, help="path to workflow file (server- or client-side path)")
|
|
235
|
+
@click.option('--page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
|
|
184
236
|
@click.option('-b', '--block', default=False, is_flag=True,
|
|
185
237
|
help='If set, the client will block till job timeout, fail or success.')
|
|
186
238
|
@click.option('-p', '--print-state', default=False, is_flag=True,
|
|
187
239
|
help='If set, the client will print job states by each iteration.')
|
|
240
|
+
@click.argument('tasks', nargs=-1)
|
|
188
241
|
def send_workflow_job_request(
|
|
189
242
|
address: Optional[str],
|
|
190
243
|
path_to_mets: str,
|
|
191
|
-
path_to_workflow: str,
|
|
244
|
+
path_to_workflow: Optional[str],
|
|
192
245
|
page_wise: bool,
|
|
193
246
|
block: bool,
|
|
194
|
-
print_state: bool
|
|
247
|
+
print_state: bool,
|
|
248
|
+
tasks: List[str]
|
|
195
249
|
):
|
|
196
250
|
"""
|
|
197
251
|
Submit a workflow job to the processing server.
|
|
252
|
+
|
|
253
|
+
Provide workflow either via `tasks` arguments (same syntax
|
|
254
|
+
as in ``ocrd process`` tasks arguments), or via `-w` file path
|
|
255
|
+
(same syntax, but newline separated).
|
|
198
256
|
"""
|
|
257
|
+
if (path_to_workflow) != bool(len(tasks)):
|
|
258
|
+
raise ValueError("either -w/path-to-workflow or task argument(s) is required")
|
|
259
|
+
|
|
199
260
|
client = Client(server_addr_processing=address)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
261
|
+
with NamedTemporaryFile() as workflow_file:
|
|
262
|
+
for task in tasks:
|
|
263
|
+
workflow_file.write((task + '\n').encode('utf-8'))
|
|
264
|
+
workflow_file.flush()
|
|
265
|
+
workflow_job_id = client.send_workflow_job_request(
|
|
266
|
+
path_to_wf=path_to_workflow or workflow_file.name,
|
|
267
|
+
path_to_mets=path_to_mets,
|
|
268
|
+
page_wise=page_wise,
|
|
269
|
+
)
|
|
206
270
|
print(f"Workflow job id: {workflow_job_id}")
|
|
207
271
|
if block:
|
|
208
272
|
print(f"Polling state of workflow job {workflow_job_id}")
|
|
209
|
-
|
|
273
|
+
try:
|
|
274
|
+
state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state)
|
|
275
|
+
except RequestException as e:
|
|
276
|
+
print(
|
|
277
|
+
getattr(e, 'detail_message', str(e)),
|
|
278
|
+
f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
|
|
279
|
+
)
|
|
280
|
+
sys.exit(1)
|
|
210
281
|
if state != JobState.success:
|
|
211
282
|
print(f"Workflow failed with {state}")
|
|
212
283
|
exit(1)
|
|
213
284
|
else:
|
|
214
|
-
print(
|
|
285
|
+
print("Workflow succeeded")
|
|
215
286
|
exit(0)
|
|
216
287
|
|
|
288
|
+
|
|
217
289
|
@client_cli.group('workspace')
|
|
218
290
|
def workspace_cli():
|
|
219
291
|
"""
|
|
@@ -12,8 +12,7 @@ from ocrd_network import ProcessingServer, ServerAddressParamType
|
|
|
12
12
|
def processing_server_cli(path_to_config, address: str):
|
|
13
13
|
"""
|
|
14
14
|
Start the Processing Server
|
|
15
|
-
(proxy between the user and the
|
|
16
|
-
Processing Worker(s) / Processor Server(s))
|
|
15
|
+
(proxy between the user and the Processing Worker(s))
|
|
17
16
|
"""
|
|
18
17
|
|
|
19
18
|
# Note, the address is already validated with the type field
|
ocrd_network/client.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
-
from ocrd_utils import config, getLogger
|
|
2
|
+
from ocrd_utils import config, getLogger
|
|
3
3
|
from .client_utils import (
|
|
4
4
|
get_ps_deployed_processors,
|
|
5
5
|
get_ps_deployed_processor_ocrd_tool,
|
|
@@ -21,7 +21,7 @@ class Client:
|
|
|
21
21
|
timeout: int = config.OCRD_NETWORK_CLIENT_POLLING_TIMEOUT,
|
|
22
22
|
wait: int = config.OCRD_NETWORK_CLIENT_POLLING_SLEEP
|
|
23
23
|
):
|
|
24
|
-
self.log = getLogger(
|
|
24
|
+
self.log = getLogger("ocrd_network.client")
|
|
25
25
|
if not server_addr_processing:
|
|
26
26
|
server_addr_processing = config.OCRD_NETWORK_SERVER_ADDR_PROCESSING
|
|
27
27
|
self.server_addr_processing = server_addr_processing
|
ocrd_network/client_utils.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import os
|
|
3
|
+
from requests import get as request_get, post as request_post, RequestException, Response
|
|
4
|
+
from requests.exceptions import JSONDecodeError
|
|
3
5
|
from time import sleep
|
|
4
6
|
from .constants import JobState, NETWORK_PROTOCOLS
|
|
5
7
|
|
|
6
8
|
|
|
7
|
-
def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int,
|
|
9
|
+
def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int,
|
|
10
|
+
print_state: bool = False) -> JobState:
|
|
8
11
|
if job_type not in ["workflow", "processor"]:
|
|
9
12
|
raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'")
|
|
10
13
|
job_state = JobState.unset
|
|
@@ -22,6 +25,19 @@ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries
|
|
|
22
25
|
return job_state
|
|
23
26
|
|
|
24
27
|
|
|
28
|
+
def _raise_if_error(response: Response) -> None:
|
|
29
|
+
"""Check the requests-response and raise an exception if its status code indicates an error"""
|
|
30
|
+
try:
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
except RequestException as e:
|
|
33
|
+
try:
|
|
34
|
+
message = response.json()["detail"]
|
|
35
|
+
except JSONDecodeError:
|
|
36
|
+
message = response.text
|
|
37
|
+
e.detail_message = message
|
|
38
|
+
raise e
|
|
39
|
+
|
|
40
|
+
|
|
25
41
|
def poll_job_status_till_timeout_fail_or_success(
|
|
26
42
|
ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
|
|
27
43
|
return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait, print_state)
|
|
@@ -35,14 +51,14 @@ def poll_wf_status_till_timeout_fail_or_success(
|
|
|
35
51
|
def get_ps_deployed_processors(ps_server_host: str):
|
|
36
52
|
request_url = f"{ps_server_host}/processor"
|
|
37
53
|
response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
|
|
38
|
-
|
|
54
|
+
_raise_if_error(response)
|
|
39
55
|
return response.json()
|
|
40
56
|
|
|
41
57
|
|
|
42
58
|
def get_ps_deployed_processor_ocrd_tool(ps_server_host: str, processor_name: str):
|
|
43
59
|
request_url = f"{ps_server_host}/processor/info/{processor_name}"
|
|
44
60
|
response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
|
|
45
|
-
|
|
61
|
+
_raise_if_error(response)
|
|
46
62
|
return response.json()
|
|
47
63
|
|
|
48
64
|
|
|
@@ -55,17 +71,18 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str):
|
|
|
55
71
|
def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState:
|
|
56
72
|
request_url = f"{ps_server_host}/processor/job/{processing_job_id}"
|
|
57
73
|
response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
|
|
58
|
-
|
|
74
|
+
_raise_if_error(response)
|
|
59
75
|
job_state = response.json()["state"]
|
|
60
|
-
assert job_state
|
|
76
|
+
assert job_state, "Propery 'state' is expected to always have a value"
|
|
61
77
|
return getattr(JobState, job_state.lower())
|
|
62
78
|
|
|
79
|
+
|
|
63
80
|
def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState:
|
|
64
81
|
request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}"
|
|
65
82
|
response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
|
|
66
|
-
|
|
83
|
+
_raise_if_error(response)
|
|
67
84
|
job_state = response.json()["state"]
|
|
68
|
-
assert job_state
|
|
85
|
+
assert job_state, "Property 'state' is expected to always have a value"
|
|
69
86
|
return getattr(JobState, job_state.lower())
|
|
70
87
|
|
|
71
88
|
|
|
@@ -76,9 +93,9 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d
|
|
|
76
93
|
headers={"accept": "application/json; charset=utf-8"},
|
|
77
94
|
json=job_input
|
|
78
95
|
)
|
|
79
|
-
|
|
96
|
+
_raise_if_error(response)
|
|
80
97
|
processing_job_id = response.json()["job_id"]
|
|
81
|
-
assert processing_job_id
|
|
98
|
+
assert processing_job_id, "Property 'job_id' is expected to always have a value"
|
|
82
99
|
return processing_job_id
|
|
83
100
|
|
|
84
101
|
|
|
@@ -92,16 +109,16 @@ def post_ps_workflow_request(
|
|
|
92
109
|
response = request_post(
|
|
93
110
|
url=request_url,
|
|
94
111
|
headers={"accept": "application/json; charset=utf-8"},
|
|
95
|
-
files={"workflow": open(path_to_wf, "rb")}
|
|
112
|
+
files={"workflow": open(path_to_wf, "rb") if os.path.exists(path_to_wf) else path_to_wf}
|
|
96
113
|
)
|
|
97
114
|
# print(response.json())
|
|
98
115
|
# print(response.__dict__)
|
|
99
116
|
json_resp_raw = response.text
|
|
100
117
|
# print(f'post_ps_workflow_request >> {response.status_code}')
|
|
101
118
|
# print(f'post_ps_workflow_request >> {json_resp_raw}')
|
|
102
|
-
|
|
119
|
+
_raise_if_error(response)
|
|
103
120
|
wf_job_id = json.loads(json_resp_raw)["job_id"]
|
|
104
|
-
assert wf_job_id
|
|
121
|
+
assert wf_job_id, "Property 'job_id' is expected to always have a value"
|
|
105
122
|
return wf_job_id
|
|
106
123
|
|
|
107
124
|
|
ocrd_network/constants.py
CHANGED
|
@@ -15,10 +15,6 @@ class StrEnum(str, Enum):
|
|
|
15
15
|
def __str__(self):
|
|
16
16
|
return self.value
|
|
17
17
|
|
|
18
|
-
class AgentType(StrEnum):
|
|
19
|
-
PROCESSING_WORKER = "worker"
|
|
20
|
-
PROCESSOR_SERVER = "server"
|
|
21
|
-
|
|
22
18
|
|
|
23
19
|
class DeployType(StrEnum):
|
|
24
20
|
# Deployed by the Processing Server config file
|
|
@@ -39,7 +35,7 @@ class JobState(StrEnum):
|
|
|
39
35
|
failed = "FAILED"
|
|
40
36
|
# The processing job is queued inside the RabbitMQ
|
|
41
37
|
queued = "QUEUED"
|
|
42
|
-
# Processing job is currently running
|
|
38
|
+
# Processing job is currently running on a Worker
|
|
43
39
|
running = "RUNNING"
|
|
44
40
|
# Processing job finished successfully
|
|
45
41
|
success = "SUCCESS"
|
|
@@ -52,7 +48,6 @@ class NetworkLoggingDirs(StrEnum):
|
|
|
52
48
|
PROCESSING_JOBS = "processing_jobs"
|
|
53
49
|
PROCESSING_SERVERS = "processing_servers"
|
|
54
50
|
PROCESSING_WORKERS = "processing_workers"
|
|
55
|
-
PROCESSOR_SERVERS = "processor_servers"
|
|
56
51
|
|
|
57
52
|
|
|
58
53
|
class ServerApiTags(StrEnum):
|
ocrd_network/database.py
CHANGED
|
@@ -65,7 +65,7 @@ async def sync_db_create_workspace(mets_path: str) -> DBWorkspace:
|
|
|
65
65
|
async def db_get_workspace(workspace_id: str = None, workspace_mets_path: str = None) -> DBWorkspace:
|
|
66
66
|
workspace = None
|
|
67
67
|
if not workspace_id and not workspace_mets_path:
|
|
68
|
-
raise ValueError(
|
|
68
|
+
raise ValueError('Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
|
|
69
69
|
if workspace_id:
|
|
70
70
|
workspace = await DBWorkspace.find_one(
|
|
71
71
|
DBWorkspace.workspace_id == workspace_id
|
|
@@ -89,7 +89,7 @@ async def sync_db_get_workspace(workspace_id: str = None, workspace_mets_path: s
|
|
|
89
89
|
async def db_update_workspace(workspace_id: str = None, workspace_mets_path: str = None, **kwargs) -> DBWorkspace:
|
|
90
90
|
workspace = None
|
|
91
91
|
if not workspace_id and not workspace_mets_path:
|
|
92
|
-
raise ValueError(
|
|
92
|
+
raise ValueError('Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
|
|
93
93
|
if workspace_id:
|
|
94
94
|
workspace = await DBWorkspace.find_one(DBWorkspace.workspace_id == workspace_id)
|
|
95
95
|
if not workspace:
|
|
@@ -274,4 +274,4 @@ def verify_mongodb_available(mongo_url: str) -> None:
|
|
|
274
274
|
client = MongoClient(mongo_url, serverSelectionTimeoutMS=60000.0)
|
|
275
275
|
client.admin.command("ismaster")
|
|
276
276
|
except Exception:
|
|
277
|
-
raise RuntimeError(f'Cannot connect to MongoDB: {re_sub(r":[^@]+@", ":****@", mongo_url)}')
|
|
277
|
+
raise RuntimeError(f'Cannot connect to MongoDB: {re_sub(r":[^@]+@", ":****@", mongo_url)}')
|
ocrd_network/logging_utils.py
CHANGED
|
@@ -2,7 +2,7 @@ from logging import FileHandler, Formatter, Logger
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
from ocrd_utils import config, LOG_FORMAT, safe_filename
|
|
5
|
-
from .constants import
|
|
5
|
+
from .constants import NetworkLoggingDirs
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def configure_file_handler_with_formatter(logger: Logger, log_file: Path, mode: str = "a") -> None:
|
|
@@ -54,10 +54,5 @@ def get_processing_server_logging_file_path(pid: int) -> Path:
|
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Path:
|
|
57
|
-
log_file: str = f"
|
|
57
|
+
log_file: str = f"worker.{pid}.{processor_name}.log"
|
|
58
58
|
return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSING_WORKERS), log_file)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
|
|
62
|
-
log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
|
|
63
|
-
return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
|
ocrd_network/models/__init__.py
CHANGED
|
@@ -10,13 +10,11 @@ __all__ = [
|
|
|
10
10
|
'DBWorkflowScript',
|
|
11
11
|
'PYJobInput',
|
|
12
12
|
'PYJobOutput',
|
|
13
|
-
'PYOcrdTool',
|
|
14
13
|
'PYResultMessage',
|
|
15
14
|
'PYWorkflowJobOutput'
|
|
16
15
|
]
|
|
17
16
|
|
|
18
17
|
from .job import DBProcessorJob, DBWorkflowJob, PYJobInput, PYJobOutput, PYWorkflowJobOutput
|
|
19
18
|
from .messages import PYResultMessage
|
|
20
|
-
from .ocrd_tool import PYOcrdTool
|
|
21
19
|
from .workspace import DBWorkspace
|
|
22
20
|
from .workflow import DBWorkflowScript
|
ocrd_network/models/job.py
CHANGED
|
@@ -2,7 +2,7 @@ from beanie import Document
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
from typing import Dict, List, Optional
|
|
5
|
-
from ..constants import
|
|
5
|
+
from ..constants import JobState
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class PYJobInput(BaseModel):
|
|
@@ -13,30 +13,28 @@ class PYJobInput(BaseModel):
|
|
|
13
13
|
workspace_id: Optional[str] = None
|
|
14
14
|
description: Optional[str] = None
|
|
15
15
|
input_file_grps: List[str]
|
|
16
|
-
output_file_grps: Optional[List[str]]
|
|
16
|
+
output_file_grps: Optional[List[str]] = None
|
|
17
17
|
page_id: Optional[str] = None
|
|
18
18
|
parameters: dict = {} # Always set to empty dict when None, otherwise it fails ocr-d-validation
|
|
19
19
|
result_queue_name: Optional[str] = None
|
|
20
20
|
callback_url: Optional[str] = None
|
|
21
|
-
#
|
|
22
|
-
agent_type: AgentType = AgentType.PROCESSING_WORKER
|
|
23
|
-
# Auto generated by the Processing Server when forwarding to the Processor Server
|
|
21
|
+
# Auto generated by the Processing Server when forwarding to the Processing-Worker
|
|
24
22
|
job_id: Optional[str] = None
|
|
25
23
|
# If set, specifies a list of job ids this job depends on
|
|
26
24
|
depends_on: Optional[List[str]] = None
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
model_config = {
|
|
27
|
+
'json_schema_extra': {
|
|
30
28
|
'example': {
|
|
31
29
|
'path_to_mets': '/path/to/mets.xml',
|
|
32
30
|
'description': 'The description of this execution',
|
|
33
31
|
'input_file_grps': ['DEFAULT'],
|
|
34
32
|
'output_file_grps': ['OCR-D-BIN'],
|
|
35
|
-
'agent_type': AgentType.PROCESSING_WORKER,
|
|
36
33
|
'page_id': 'PHYS_0001..PHYS_0003',
|
|
37
34
|
'parameters': {}
|
|
38
35
|
}
|
|
39
36
|
}
|
|
37
|
+
}
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
class PYJobOutput(BaseModel):
|
|
@@ -45,12 +43,12 @@ class PYJobOutput(BaseModel):
|
|
|
45
43
|
job_id: str
|
|
46
44
|
processor_name: str
|
|
47
45
|
state: JobState = JobState.unset
|
|
48
|
-
path_to_mets: Optional[str]
|
|
49
|
-
workspace_id: Optional[str]
|
|
46
|
+
path_to_mets: Optional[str] = None
|
|
47
|
+
workspace_id: Optional[str] = None
|
|
50
48
|
input_file_grps: List[str]
|
|
51
|
-
output_file_grps: Optional[List[str]]
|
|
49
|
+
output_file_grps: Optional[List[str]] = None
|
|
52
50
|
page_id: Optional[str] = None
|
|
53
|
-
log_file_path: Optional[str]
|
|
51
|
+
log_file_path: Optional[str] = None
|
|
54
52
|
|
|
55
53
|
|
|
56
54
|
class DBProcessorJob(Document):
|
|
@@ -58,22 +56,22 @@ class DBProcessorJob(Document):
|
|
|
58
56
|
"""
|
|
59
57
|
job_id: str
|
|
60
58
|
processor_name: str
|
|
61
|
-
path_to_mets: Optional[str]
|
|
62
|
-
workspace_id: Optional[str]
|
|
63
|
-
description: Optional[str]
|
|
59
|
+
path_to_mets: Optional[str] = None
|
|
60
|
+
workspace_id: Optional[str] = None
|
|
61
|
+
description: Optional[str] = None
|
|
64
62
|
state: JobState = JobState.unset
|
|
65
63
|
input_file_grps: List[str]
|
|
66
|
-
output_file_grps: Optional[List[str]]
|
|
67
|
-
page_id: Optional[str]
|
|
68
|
-
parameters: Optional[dict]
|
|
69
|
-
depends_on: Optional[List[str]]
|
|
70
|
-
result_queue_name: Optional[str]
|
|
71
|
-
callback_url: Optional[str]
|
|
72
|
-
internal_callback_url: Optional[str]
|
|
73
|
-
start_time: Optional[datetime]
|
|
74
|
-
end_time: Optional[datetime]
|
|
75
|
-
exec_time: Optional[str]
|
|
76
|
-
log_file_path: Optional[str]
|
|
64
|
+
output_file_grps: Optional[List[str]] = None
|
|
65
|
+
page_id: Optional[str] = None
|
|
66
|
+
parameters: Optional[dict] = None
|
|
67
|
+
depends_on: Optional[List[str]] = None
|
|
68
|
+
result_queue_name: Optional[str] = None
|
|
69
|
+
callback_url: Optional[str] = None
|
|
70
|
+
internal_callback_url: Optional[str] = None
|
|
71
|
+
start_time: Optional[datetime] = None
|
|
72
|
+
end_time: Optional[datetime] = None
|
|
73
|
+
exec_time: Optional[str] = None
|
|
74
|
+
log_file_path: Optional[str] = None
|
|
77
75
|
|
|
78
76
|
class Settings:
|
|
79
77
|
use_enum_values = True
|
|
@@ -102,9 +100,9 @@ class PYWorkflowJobOutput(BaseModel):
|
|
|
102
100
|
page_id: str
|
|
103
101
|
page_wise: bool = False
|
|
104
102
|
job_id: str
|
|
105
|
-
path_to_mets: Optional[str]
|
|
106
|
-
workspace_id: Optional[str]
|
|
107
|
-
description: Optional[str]
|
|
103
|
+
path_to_mets: Optional[str] = None
|
|
104
|
+
workspace_id: Optional[str] = None
|
|
105
|
+
description: Optional[str] = None
|
|
108
106
|
|
|
109
107
|
|
|
110
108
|
class DBWorkflowJob(Document):
|
|
@@ -117,10 +115,10 @@ class DBWorkflowJob(Document):
|
|
|
117
115
|
# key: page_id
|
|
118
116
|
# value: List of and processing job ids sorted in dependency order
|
|
119
117
|
processing_job_ids: Dict
|
|
120
|
-
path_to_mets: Optional[str]
|
|
121
|
-
workspace_id: Optional[str]
|
|
122
|
-
description: Optional[str]
|
|
123
|
-
workflow_callback_url: Optional[str]
|
|
118
|
+
path_to_mets: Optional[str] = None
|
|
119
|
+
workspace_id: Optional[str] = None
|
|
120
|
+
description: Optional[str] = None
|
|
121
|
+
workflow_callback_url: Optional[str] = None
|
|
124
122
|
|
|
125
123
|
class Settings:
|
|
126
124
|
use_enum_values = True
|