ocrd 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +3 -1
- ocrd/decorators/__init__.py +3 -2
- ocrd/mets_server.py +62 -42
- ocrd/processor/base.py +7 -6
- ocrd/processor/builtin/dummy/ocrd-tool.json +20 -0
- ocrd/processor/builtin/dummy_processor.py +0 -3
- ocrd/processor/builtin/filter_processor.py +108 -0
- ocrd/resource_manager.py +4 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/METADATA +2 -1
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/RECORD +32 -31
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/entry_points.txt +1 -0
- ocrd_modelfactory/__init__.py +7 -1
- ocrd_models/ocrd_exif.py +2 -2
- ocrd_models/ocrd_page.py +22 -3
- ocrd_models/ocrd_page_generateds.py +2813 -1438
- ocrd_models/xpath_functions.py +51 -0
- ocrd_network/cli/client.py +27 -8
- ocrd_network/client.py +9 -6
- ocrd_network/client_utils.py +25 -14
- ocrd_network/processing_server.py +27 -15
- ocrd_network/processing_worker.py +7 -4
- ocrd_network/processor_server.py +2 -1
- ocrd_network/rabbitmq_utils/connector.py +2 -2
- ocrd_network/runtime_data/deployer.py +28 -18
- ocrd_network/server_cache.py +26 -23
- ocrd_network/server_utils.py +40 -4
- ocrd_network/tcp_to_uds_mets_proxy.py +8 -5
- ocrd_network/utils.py +19 -15
- ocrd_utils/config.py +38 -16
- ocrd/processor/concurrent.py +0 -909
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/LICENSE +0 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/WHEEL +0 -0
- {ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/top_level.txt +0 -0
ocrd_network/server_cache.py
CHANGED
|
@@ -31,7 +31,7 @@ class CacheLockedPages:
|
|
|
31
31
|
self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]
|
|
32
32
|
) -> bool:
|
|
33
33
|
if not self.locked_pages.get(workspace_key, None):
|
|
34
|
-
self.log.
|
|
34
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
35
35
|
return False
|
|
36
36
|
debug_message = f"Caching the received request due to locked output file grp pages."
|
|
37
37
|
for file_group in output_file_grps:
|
|
@@ -46,46 +46,45 @@ class CacheLockedPages:
|
|
|
46
46
|
|
|
47
47
|
def get_locked_pages(self, workspace_key: str) -> Dict[str, List[str]]:
|
|
48
48
|
if not self.locked_pages.get(workspace_key, None):
|
|
49
|
-
self.log.
|
|
49
|
+
self.log.info(f"No locked pages available for workspace key: {workspace_key}")
|
|
50
50
|
return {}
|
|
51
51
|
return self.locked_pages[workspace_key]
|
|
52
52
|
|
|
53
53
|
def lock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None:
|
|
54
54
|
if not self.locked_pages.get(workspace_key, None):
|
|
55
|
-
self.log.
|
|
56
|
-
self.log.
|
|
55
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
56
|
+
self.log.info(f"Creating an entry in the locked pages cache for workspace key: {workspace_key}")
|
|
57
57
|
self.locked_pages[workspace_key] = {}
|
|
58
58
|
for file_group in output_file_grps:
|
|
59
59
|
if file_group not in self.locked_pages[workspace_key]:
|
|
60
|
-
self.log.
|
|
60
|
+
self.log.info(f"Creating an empty list for output file grp: {file_group}")
|
|
61
61
|
self.locked_pages[workspace_key][file_group] = []
|
|
62
62
|
# The page id list is not empty - only some pages are in the request
|
|
63
63
|
if page_ids:
|
|
64
|
-
self.log.
|
|
64
|
+
self.log.info(f"Locking pages for '{file_group}': {page_ids}")
|
|
65
65
|
self.locked_pages[workspace_key][file_group].extend(page_ids)
|
|
66
|
-
self.log.
|
|
67
|
-
f"{self.locked_pages[workspace_key][file_group]}")
|
|
66
|
+
self.log.info(f"Locked pages of '{file_group}': {self.locked_pages[workspace_key][file_group]}")
|
|
68
67
|
else:
|
|
69
68
|
# Lock all pages with a single value
|
|
70
|
-
self.log.
|
|
69
|
+
self.log.info(f"Locking pages for '{file_group}': {self.placeholder_all_pages}")
|
|
71
70
|
self.locked_pages[workspace_key][file_group].append(self.placeholder_all_pages)
|
|
72
71
|
|
|
73
72
|
def unlock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None:
|
|
74
73
|
if not self.locked_pages.get(workspace_key, None):
|
|
75
|
-
self.log.
|
|
74
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
76
75
|
return
|
|
77
76
|
for file_group in output_file_grps:
|
|
78
77
|
if file_group in self.locked_pages[workspace_key]:
|
|
79
78
|
if page_ids:
|
|
80
79
|
# Unlock the previously locked pages
|
|
81
|
-
self.log.
|
|
80
|
+
self.log.info(f"Unlocking pages of '{file_group}': {page_ids}")
|
|
82
81
|
self.locked_pages[workspace_key][file_group] = \
|
|
83
82
|
[x for x in self.locked_pages[workspace_key][file_group] if x not in page_ids]
|
|
84
|
-
self.log.
|
|
85
|
-
|
|
83
|
+
self.log.info(f"Remaining locked pages of '{file_group}': "
|
|
84
|
+
f"{self.locked_pages[workspace_key][file_group]}")
|
|
86
85
|
else:
|
|
87
86
|
# Remove the single variable used to indicate all pages are locked
|
|
88
|
-
self.log.
|
|
87
|
+
self.log.info(f"Unlocking all pages for: {file_group}")
|
|
89
88
|
self.locked_pages[workspace_key][file_group].remove(self.placeholder_all_pages)
|
|
90
89
|
|
|
91
90
|
|
|
@@ -127,11 +126,11 @@ class CacheProcessingRequests:
|
|
|
127
126
|
debug_message += f", page ids: {job_input.page_id}"
|
|
128
127
|
debug_message += f", job id: {job_input.job_id}"
|
|
129
128
|
debug_message += f", job depends on: {job_input.depends_on}"
|
|
130
|
-
self.log.
|
|
129
|
+
self.log.info(debug_message)
|
|
131
130
|
|
|
132
131
|
async def consume_cached_requests(self, workspace_key: str) -> List[PYJobInput]:
|
|
133
132
|
if not self.has_workspace_cached_requests(workspace_key=workspace_key):
|
|
134
|
-
self.log.
|
|
133
|
+
self.log.info(f"No jobs to be consumed for workspace key: {workspace_key}")
|
|
135
134
|
return []
|
|
136
135
|
found_consume_requests = []
|
|
137
136
|
for current_element in self.processing_requests[workspace_key]:
|
|
@@ -165,25 +164,27 @@ class CacheProcessingRequests:
|
|
|
165
164
|
# If a record counter of this workspace key does not exist
|
|
166
165
|
# in the requests counter cache yet, create one and assign 0
|
|
167
166
|
if not self.processing_counter.get(workspace_key, None):
|
|
168
|
-
self.log.
|
|
167
|
+
self.log.info(f"Creating an internal request counter for workspace key: {workspace_key}")
|
|
169
168
|
self.processing_counter[workspace_key] = 0
|
|
170
169
|
self.processing_counter[workspace_key] = self.processing_counter[workspace_key] + by_value
|
|
170
|
+
self.log.info(f"The new request counter of {workspace_key}: {self.processing_counter[workspace_key]}")
|
|
171
171
|
return self.processing_counter[workspace_key]
|
|
172
172
|
|
|
173
173
|
def cache_request(self, workspace_key: str, data: PYJobInput):
|
|
174
174
|
# If a record queue of this workspace key does not exist in the requests cache
|
|
175
175
|
if not self.processing_requests.get(workspace_key, None):
|
|
176
|
-
self.log.
|
|
176
|
+
self.log.info(f"Creating an internal request queue for workspace_key: {workspace_key}")
|
|
177
177
|
self.processing_requests[workspace_key] = []
|
|
178
178
|
self.__print_job_input_debug_message(job_input=data)
|
|
179
179
|
# Add the processing request to the end of the internal queue
|
|
180
|
+
self.log.info(f"Caching a processing request of {workspace_key}: {data.job_id}")
|
|
180
181
|
self.processing_requests[workspace_key].append(data)
|
|
181
182
|
|
|
182
183
|
async def cancel_dependent_jobs(self, workspace_key: str, processing_job_id: str) -> List[PYJobInput]:
|
|
183
184
|
if not self.has_workspace_cached_requests(workspace_key=workspace_key):
|
|
184
|
-
self.log.
|
|
185
|
+
self.log.info(f"No jobs to be cancelled for workspace key: {workspace_key}")
|
|
185
186
|
return []
|
|
186
|
-
self.log.
|
|
187
|
+
self.log.info(f"Cancelling jobs dependent on job id: {processing_job_id}")
|
|
187
188
|
found_cancel_requests = []
|
|
188
189
|
for i, current_element in enumerate(self.processing_requests[workspace_key]):
|
|
189
190
|
if processing_job_id in current_element.depends_on:
|
|
@@ -192,7 +193,7 @@ class CacheProcessingRequests:
|
|
|
192
193
|
for cancel_element in found_cancel_requests:
|
|
193
194
|
try:
|
|
194
195
|
self.processing_requests[workspace_key].remove(cancel_element)
|
|
195
|
-
self.log.
|
|
196
|
+
self.log.info(f"For job id: '{processing_job_id}', cancelling job id: '{cancel_element.job_id}'")
|
|
196
197
|
cancelled_jobs.append(cancel_element)
|
|
197
198
|
await db_update_processing_job(job_id=cancel_element.job_id, state=JobState.cancelled)
|
|
198
199
|
# Recursively cancel dependent jobs for the cancelled job
|
|
@@ -225,9 +226,11 @@ class CacheProcessingRequests:
|
|
|
225
226
|
|
|
226
227
|
def has_workspace_cached_requests(self, workspace_key: str) -> bool:
|
|
227
228
|
if not self.processing_requests.get(workspace_key, None):
|
|
228
|
-
self.log.
|
|
229
|
+
self.log.info(f"In processing requests cache, no workspace key found: {workspace_key}")
|
|
229
230
|
return False
|
|
230
231
|
if not len(self.processing_requests[workspace_key]):
|
|
231
|
-
self.log.
|
|
232
|
+
self.log.info(f"The processing requests cache is empty for workspace key: {workspace_key}")
|
|
232
233
|
return False
|
|
234
|
+
self.log.info(f"The processing requests cache has {len(self.processing_requests[workspace_key])} "
|
|
235
|
+
f"entries for workspace key: {workspace_key} ")
|
|
233
236
|
return True
|
ocrd_network/server_utils.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import signal
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from json import dumps, loads
|
|
6
|
+
from urllib.parse import urljoin
|
|
7
|
+
from typing import Dict, List, Optional, Union
|
|
8
|
+
from time import time
|
|
9
|
+
|
|
1
10
|
from fastapi import HTTPException, status, UploadFile
|
|
2
11
|
from fastapi.responses import FileResponse
|
|
3
12
|
from httpx import AsyncClient, Timeout
|
|
4
|
-
from json import dumps, loads
|
|
5
13
|
from logging import Logger
|
|
6
|
-
from pathlib import Path
|
|
7
14
|
from requests import get as requests_get
|
|
8
|
-
from
|
|
9
|
-
from urllib.parse import urljoin
|
|
15
|
+
from requests_unixsocket import sys
|
|
10
16
|
|
|
11
17
|
from ocrd.resolver import Resolver
|
|
12
18
|
from ocrd.task_sequence import ProcessorTask
|
|
@@ -241,3 +247,33 @@ def validate_first_task_input_file_groups_existence(logger: Logger, mets_path: s
|
|
|
241
247
|
if group not in available_groups:
|
|
242
248
|
message = f"Input file group '{group}' of the first processor not found: {input_file_grps}"
|
|
243
249
|
raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def kill_mets_server_zombies(minutes_ago : Optional[int], dry_run : Optional[bool]) -> List[int]:
|
|
253
|
+
if minutes_ago == None:
|
|
254
|
+
minutes_ago = 90
|
|
255
|
+
if dry_run == None:
|
|
256
|
+
dry_run = False
|
|
257
|
+
|
|
258
|
+
now = time()
|
|
259
|
+
cmdline_pat = r'.*ocrd workspace -U.*server start $'
|
|
260
|
+
ret = []
|
|
261
|
+
for procdir in sorted(Path('/proc').glob('*'), key=os.path.getctime):
|
|
262
|
+
if not procdir.is_dir():
|
|
263
|
+
continue
|
|
264
|
+
cmdline_file = procdir.joinpath('cmdline')
|
|
265
|
+
if not cmdline_file.is_file():
|
|
266
|
+
continue
|
|
267
|
+
ctime_ago = int((now - procdir.stat().st_ctime) / 60)
|
|
268
|
+
if ctime_ago < minutes_ago:
|
|
269
|
+
continue
|
|
270
|
+
cmdline = cmdline_file.read_text().replace('\x00', ' ')
|
|
271
|
+
if re.match(cmdline_pat, cmdline):
|
|
272
|
+
pid = int(procdir.name)
|
|
273
|
+
ret.append(pid)
|
|
274
|
+
print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, so killing (cmdline="{cmdline})', file=sys.stderr)
|
|
275
|
+
if dry_run:
|
|
276
|
+
print(f'[dry_run is active] kill {pid}')
|
|
277
|
+
else:
|
|
278
|
+
os.kill(pid, signal.SIGTERM)
|
|
279
|
+
return ret
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from requests_unixsocket import Session as requests_unixsocket_session
|
|
2
|
-
from .utils import get_uds_path
|
|
2
|
+
from .utils import get_uds_path, convert_url_to_uds_format
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from ocrd_utils import getLogger
|
|
5
5
|
|
|
@@ -31,9 +31,13 @@ class MetsServerProxy:
|
|
|
31
31
|
if method_type not in SUPPORTED_METHOD_TYPES:
|
|
32
32
|
raise NotImplementedError(f"Method type: {method_type} not recognized")
|
|
33
33
|
ws_socket_file = str(get_uds_path(ws_dir_path=ws_dir_path))
|
|
34
|
-
ws_unix_socket_url =
|
|
34
|
+
ws_unix_socket_url = convert_url_to_uds_format(ws_socket_file)
|
|
35
35
|
uds_request_url = f"{ws_unix_socket_url}/{request_url}"
|
|
36
36
|
|
|
37
|
+
self.log.info(f"Forwarding TCP mets server request to UDS url: {uds_request_url}")
|
|
38
|
+
self.log.info(f"Forwarding method type {method_type}, request data: {request_data}, "
|
|
39
|
+
f"expected response type: {response_type}")
|
|
40
|
+
|
|
37
41
|
if not request_data:
|
|
38
42
|
response = self.session.request(method_type, uds_request_url)
|
|
39
43
|
elif "params" in request_data:
|
|
@@ -45,12 +49,11 @@ class MetsServerProxy:
|
|
|
45
49
|
else:
|
|
46
50
|
raise ValueError("Expecting request_data to be empty or containing single key: params,"
|
|
47
51
|
f"form, or class but not {request_data.keys}")
|
|
48
|
-
|
|
52
|
+
if response_type == "empty":
|
|
53
|
+
return {}
|
|
49
54
|
if not response:
|
|
50
55
|
self.log.error(f"Uds-Mets-Server gives unexpected error. Response: {response.__dict__}")
|
|
51
56
|
return {"error": response.text}
|
|
52
|
-
elif response_type == "empty":
|
|
53
|
-
return {}
|
|
54
57
|
elif response_type == "text":
|
|
55
58
|
return {"text": response.text}
|
|
56
59
|
elif response_type == "class" or response_type == "dict":
|
ocrd_network/utils.py
CHANGED
|
@@ -4,6 +4,7 @@ from fastapi import UploadFile
|
|
|
4
4
|
from functools import wraps
|
|
5
5
|
from hashlib import md5
|
|
6
6
|
from json import loads
|
|
7
|
+
from logging import Logger
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from re import compile as re_compile, split as re_split
|
|
9
10
|
from requests import get as requests_get, Session as Session_TCP
|
|
@@ -151,22 +152,25 @@ def is_mets_server_running(mets_server_url: str, ws_dir_path: str = None) -> boo
|
|
|
151
152
|
return False
|
|
152
153
|
|
|
153
154
|
|
|
154
|
-
def stop_mets_server(mets_server_url: str, ws_dir_path: str
|
|
155
|
+
def stop_mets_server(logger: Logger, mets_server_url: str, ws_dir_path: str) -> bool:
|
|
155
156
|
protocol = "tcp" if (mets_server_url.startswith("http://") or mets_server_url.startswith("https://")) else "uds"
|
|
156
|
-
|
|
157
|
-
if protocol == "
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
157
|
+
# If the mets server URL is the proxy endpoint
|
|
158
|
+
if protocol == "tcp" and "tcp_mets" in mets_server_url:
|
|
159
|
+
# Convert the mets server url to UDS format
|
|
160
|
+
ws_socket_file = str(get_uds_path(ws_dir_path))
|
|
161
|
+
mets_server_url = convert_url_to_uds_format(ws_socket_file)
|
|
162
|
+
protocol = "uds"
|
|
163
|
+
if protocol == "tcp":
|
|
164
|
+
request_json = MpxReq.stop(ws_dir_path)
|
|
165
|
+
logger.info(f"Sending POST request to: {mets_server_url}, request_json: {request_json}")
|
|
166
|
+
response = Session_TCP().post(url=f"{mets_server_url}", json=request_json)
|
|
167
|
+
return response.status_code == 200
|
|
168
|
+
elif protocol == "uds":
|
|
169
|
+
logger.info(f"Sending DELETE request to: {mets_server_url}/")
|
|
170
|
+
response = Session_UDS().delete(url=f"{mets_server_url}/")
|
|
171
|
+
return response.status_code == 200
|
|
172
|
+
else:
|
|
173
|
+
ValueError(f"Unexpected protocol type: {protocol}")
|
|
170
174
|
|
|
171
175
|
def get_uds_path(ws_dir_path: str) -> Path:
|
|
172
176
|
return Path(config.OCRD_NETWORK_SOCKETS_ROOT_DIR, f"{safe_filename(ws_dir_path)}.sock")
|
ocrd_utils/config.py
CHANGED
|
@@ -21,7 +21,7 @@ def _parser_boolean(val):
|
|
|
21
21
|
|
|
22
22
|
class OcrdEnvVariable():
|
|
23
23
|
|
|
24
|
-
def __init__(self, name, description, parser=str, validator=lambda
|
|
24
|
+
def __init__(self, name, description, parser=str, validator=lambda _: True, default=[False, None]):
|
|
25
25
|
"""
|
|
26
26
|
An environment variable for use in OCR-D.
|
|
27
27
|
|
|
@@ -47,10 +47,19 @@ class OcrdEnvVariable():
|
|
|
47
47
|
return f'{self.name}: {self.description}'
|
|
48
48
|
|
|
49
49
|
def describe(self, wrap_text=True, indent_text=True):
|
|
50
|
+
"""
|
|
51
|
+
Output help information on a config option.
|
|
52
|
+
|
|
53
|
+
If ``option.description`` is a multiline string with complex formatting
|
|
54
|
+
(e.g. markdown lists), replace empty lines with ``\b`` and set
|
|
55
|
+
``wrap_text`` to ``False``.
|
|
56
|
+
"""
|
|
50
57
|
desc = self.description
|
|
51
58
|
if self.has_default:
|
|
52
59
|
default = self.default() if callable(self.default) else self.default
|
|
53
|
-
|
|
60
|
+
if not desc.endswith('\n'):
|
|
61
|
+
desc += ' '
|
|
62
|
+
desc += f'(Default: "{default}")'
|
|
54
63
|
ret = ''
|
|
55
64
|
ret = f'{self.name}\n'
|
|
56
65
|
if wrap_text:
|
|
@@ -146,11 +155,11 @@ config.add("OCRD_PROFILE",
|
|
|
146
155
|
description="""\
|
|
147
156
|
Whether to enable gathering runtime statistics
|
|
148
157
|
on the `ocrd.profile` logger (comma-separated):
|
|
149
|
-
|
|
158
|
+
\b
|
|
150
159
|
- `CPU`: yields CPU and wall-time,
|
|
151
160
|
- `RSS`: also yields peak memory (resident set size)
|
|
152
161
|
- `PSS`: also yields peak memory (proportional set size)
|
|
153
|
-
|
|
162
|
+
\b
|
|
154
163
|
""",
|
|
155
164
|
validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
|
|
156
165
|
default=(True, ''))
|
|
@@ -183,11 +192,12 @@ config.add("OCRD_DOWNLOAD_INPUT",
|
|
|
183
192
|
|
|
184
193
|
config.add("OCRD_MISSING_INPUT",
|
|
185
194
|
description="""\
|
|
186
|
-
How to deal with missing input files
|
|
187
|
-
|
|
195
|
+
How to deal with missing input files
|
|
196
|
+
(for some fileGrp/pageId) during processing:
|
|
197
|
+
\b
|
|
188
198
|
- `SKIP`: ignore and proceed with next page's input
|
|
189
199
|
- `ABORT`: throw :py:class:`.MissingInputFile`
|
|
190
|
-
|
|
200
|
+
\b
|
|
191
201
|
""",
|
|
192
202
|
default=(True, 'SKIP'),
|
|
193
203
|
validator=lambda val: val in ['SKIP', 'ABORT'],
|
|
@@ -195,12 +205,13 @@ How to deal with missing input files (for some fileGrp/pageId) during processing
|
|
|
195
205
|
|
|
196
206
|
config.add("OCRD_MISSING_OUTPUT",
|
|
197
207
|
description="""\
|
|
198
|
-
How to deal with missing output files
|
|
199
|
-
|
|
208
|
+
How to deal with missing output files
|
|
209
|
+
(for some fileGrp/pageId) during processing:
|
|
210
|
+
\b
|
|
200
211
|
- `SKIP`: ignore and proceed processing next page
|
|
201
212
|
- `COPY`: fall back to copying input PAGE to output fileGrp for page
|
|
202
213
|
- `ABORT`: re-throw whatever caused processing to fail
|
|
203
|
-
|
|
214
|
+
\b
|
|
204
215
|
""",
|
|
205
216
|
default=(True, 'SKIP'),
|
|
206
217
|
validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
|
|
@@ -213,12 +224,13 @@ config.add("OCRD_MAX_MISSING_OUTPUTS",
|
|
|
213
224
|
|
|
214
225
|
config.add("OCRD_EXISTING_OUTPUT",
|
|
215
226
|
description="""\
|
|
216
|
-
How to deal with already existing output files
|
|
217
|
-
|
|
227
|
+
How to deal with already existing output files
|
|
228
|
+
(for some fileGrp/pageId) during processing:
|
|
229
|
+
\b
|
|
218
230
|
- `SKIP`: ignore and proceed processing next page
|
|
219
231
|
- `OVERWRITE`: force writing result to output fileGrp for page
|
|
220
232
|
- `ABORT`: re-throw :py:class:`FileExistsError`
|
|
221
|
-
|
|
233
|
+
\b
|
|
222
234
|
""",
|
|
223
235
|
default=(True, 'SKIP'),
|
|
224
236
|
validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
|
|
@@ -231,7 +243,7 @@ config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
|
|
|
231
243
|
config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP",
|
|
232
244
|
description="How many seconds to sleep before trying again.",
|
|
233
245
|
parser=int,
|
|
234
|
-
default=(True,
|
|
246
|
+
default=(True, 10))
|
|
235
247
|
|
|
236
248
|
config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT",
|
|
237
249
|
description="Timeout for a blocking ocrd network client (in seconds).",
|
|
@@ -247,9 +259,19 @@ config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
|
|
|
247
259
|
default=(True, ''))
|
|
248
260
|
|
|
249
261
|
config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS",
|
|
250
|
-
|
|
262
|
+
description="Number of attempts for a RabbitMQ client to connect before failing.",
|
|
263
|
+
parser=int,
|
|
264
|
+
default=(True, 3))
|
|
265
|
+
|
|
266
|
+
config.add(
|
|
267
|
+
name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
|
|
268
|
+
description="""
|
|
269
|
+
Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
|
|
270
|
+
proposed by broker. Use 0 to deactivate heartbeat.
|
|
271
|
+
""",
|
|
251
272
|
parser=int,
|
|
252
|
-
default=(True,
|
|
273
|
+
default=(True, 0)
|
|
274
|
+
)
|
|
253
275
|
|
|
254
276
|
config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
|
|
255
277
|
description="The root directory where all mets server related socket files are created",
|