ocrd 3.0.0b6__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +3 -1
- ocrd/decorators/__init__.py +3 -2
- ocrd/mets_server.py +62 -42
- ocrd/processor/base.py +25 -9
- ocrd/processor/builtin/dummy/ocrd-tool.json +20 -0
- ocrd/processor/builtin/dummy_processor.py +0 -3
- ocrd/processor/builtin/filter_processor.py +108 -0
- ocrd/resource_manager.py +4 -0
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/METADATA +2 -1
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/RECORD +34 -32
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/entry_points.txt +1 -0
- ocrd_modelfactory/__init__.py +7 -1
- ocrd_models/ocrd_exif.py +2 -2
- ocrd_models/ocrd_page.py +22 -3
- ocrd_models/ocrd_page_generateds.py +2813 -1438
- ocrd_models/xpath_functions.py +51 -0
- ocrd_network/cli/client.py +27 -8
- ocrd_network/client.py +9 -6
- ocrd_network/client_utils.py +25 -14
- ocrd_network/processing_server.py +27 -15
- ocrd_network/processing_worker.py +7 -4
- ocrd_network/processor_server.py +2 -1
- ocrd_network/rabbitmq_utils/connector.py +2 -2
- ocrd_network/runtime_data/deployer.py +28 -18
- ocrd_network/server_cache.py +26 -23
- ocrd_network/server_utils.py +40 -4
- ocrd_network/tcp_to_uds_mets_proxy.py +8 -5
- ocrd_network/utils.py +19 -15
- ocrd_utils/config.py +38 -16
- ocrd_utils/logging.py +27 -56
- ocrd_utils/ocrd_logging.conf +14 -16
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/LICENSE +0 -0
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/WHEEL +0 -0
- {ocrd-3.0.0b6.dist-info → ocrd-3.0.1.dist-info}/top_level.txt +0 -0
ocrd_network/server_cache.py
CHANGED
|
@@ -31,7 +31,7 @@ class CacheLockedPages:
|
|
|
31
31
|
self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]
|
|
32
32
|
) -> bool:
|
|
33
33
|
if not self.locked_pages.get(workspace_key, None):
|
|
34
|
-
self.log.
|
|
34
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
35
35
|
return False
|
|
36
36
|
debug_message = f"Caching the received request due to locked output file grp pages."
|
|
37
37
|
for file_group in output_file_grps:
|
|
@@ -46,46 +46,45 @@ class CacheLockedPages:
|
|
|
46
46
|
|
|
47
47
|
def get_locked_pages(self, workspace_key: str) -> Dict[str, List[str]]:
|
|
48
48
|
if not self.locked_pages.get(workspace_key, None):
|
|
49
|
-
self.log.
|
|
49
|
+
self.log.info(f"No locked pages available for workspace key: {workspace_key}")
|
|
50
50
|
return {}
|
|
51
51
|
return self.locked_pages[workspace_key]
|
|
52
52
|
|
|
53
53
|
def lock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None:
|
|
54
54
|
if not self.locked_pages.get(workspace_key, None):
|
|
55
|
-
self.log.
|
|
56
|
-
self.log.
|
|
55
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
56
|
+
self.log.info(f"Creating an entry in the locked pages cache for workspace key: {workspace_key}")
|
|
57
57
|
self.locked_pages[workspace_key] = {}
|
|
58
58
|
for file_group in output_file_grps:
|
|
59
59
|
if file_group not in self.locked_pages[workspace_key]:
|
|
60
|
-
self.log.
|
|
60
|
+
self.log.info(f"Creating an empty list for output file grp: {file_group}")
|
|
61
61
|
self.locked_pages[workspace_key][file_group] = []
|
|
62
62
|
# The page id list is not empty - only some pages are in the request
|
|
63
63
|
if page_ids:
|
|
64
|
-
self.log.
|
|
64
|
+
self.log.info(f"Locking pages for '{file_group}': {page_ids}")
|
|
65
65
|
self.locked_pages[workspace_key][file_group].extend(page_ids)
|
|
66
|
-
self.log.
|
|
67
|
-
f"{self.locked_pages[workspace_key][file_group]}")
|
|
66
|
+
self.log.info(f"Locked pages of '{file_group}': {self.locked_pages[workspace_key][file_group]}")
|
|
68
67
|
else:
|
|
69
68
|
# Lock all pages with a single value
|
|
70
|
-
self.log.
|
|
69
|
+
self.log.info(f"Locking pages for '{file_group}': {self.placeholder_all_pages}")
|
|
71
70
|
self.locked_pages[workspace_key][file_group].append(self.placeholder_all_pages)
|
|
72
71
|
|
|
73
72
|
def unlock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None:
|
|
74
73
|
if not self.locked_pages.get(workspace_key, None):
|
|
75
|
-
self.log.
|
|
74
|
+
self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
|
|
76
75
|
return
|
|
77
76
|
for file_group in output_file_grps:
|
|
78
77
|
if file_group in self.locked_pages[workspace_key]:
|
|
79
78
|
if page_ids:
|
|
80
79
|
# Unlock the previously locked pages
|
|
81
|
-
self.log.
|
|
80
|
+
self.log.info(f"Unlocking pages of '{file_group}': {page_ids}")
|
|
82
81
|
self.locked_pages[workspace_key][file_group] = \
|
|
83
82
|
[x for x in self.locked_pages[workspace_key][file_group] if x not in page_ids]
|
|
84
|
-
self.log.
|
|
85
|
-
|
|
83
|
+
self.log.info(f"Remaining locked pages of '{file_group}': "
|
|
84
|
+
f"{self.locked_pages[workspace_key][file_group]}")
|
|
86
85
|
else:
|
|
87
86
|
# Remove the single variable used to indicate all pages are locked
|
|
88
|
-
self.log.
|
|
87
|
+
self.log.info(f"Unlocking all pages for: {file_group}")
|
|
89
88
|
self.locked_pages[workspace_key][file_group].remove(self.placeholder_all_pages)
|
|
90
89
|
|
|
91
90
|
|
|
@@ -127,11 +126,11 @@ class CacheProcessingRequests:
|
|
|
127
126
|
debug_message += f", page ids: {job_input.page_id}"
|
|
128
127
|
debug_message += f", job id: {job_input.job_id}"
|
|
129
128
|
debug_message += f", job depends on: {job_input.depends_on}"
|
|
130
|
-
self.log.
|
|
129
|
+
self.log.info(debug_message)
|
|
131
130
|
|
|
132
131
|
async def consume_cached_requests(self, workspace_key: str) -> List[PYJobInput]:
|
|
133
132
|
if not self.has_workspace_cached_requests(workspace_key=workspace_key):
|
|
134
|
-
self.log.
|
|
133
|
+
self.log.info(f"No jobs to be consumed for workspace key: {workspace_key}")
|
|
135
134
|
return []
|
|
136
135
|
found_consume_requests = []
|
|
137
136
|
for current_element in self.processing_requests[workspace_key]:
|
|
@@ -165,25 +164,27 @@ class CacheProcessingRequests:
|
|
|
165
164
|
# If a record counter of this workspace key does not exist
|
|
166
165
|
# in the requests counter cache yet, create one and assign 0
|
|
167
166
|
if not self.processing_counter.get(workspace_key, None):
|
|
168
|
-
self.log.
|
|
167
|
+
self.log.info(f"Creating an internal request counter for workspace key: {workspace_key}")
|
|
169
168
|
self.processing_counter[workspace_key] = 0
|
|
170
169
|
self.processing_counter[workspace_key] = self.processing_counter[workspace_key] + by_value
|
|
170
|
+
self.log.info(f"The new request counter of {workspace_key}: {self.processing_counter[workspace_key]}")
|
|
171
171
|
return self.processing_counter[workspace_key]
|
|
172
172
|
|
|
173
173
|
def cache_request(self, workspace_key: str, data: PYJobInput):
|
|
174
174
|
# If a record queue of this workspace key does not exist in the requests cache
|
|
175
175
|
if not self.processing_requests.get(workspace_key, None):
|
|
176
|
-
self.log.
|
|
176
|
+
self.log.info(f"Creating an internal request queue for workspace_key: {workspace_key}")
|
|
177
177
|
self.processing_requests[workspace_key] = []
|
|
178
178
|
self.__print_job_input_debug_message(job_input=data)
|
|
179
179
|
# Add the processing request to the end of the internal queue
|
|
180
|
+
self.log.info(f"Caching a processing request of {workspace_key}: {data.job_id}")
|
|
180
181
|
self.processing_requests[workspace_key].append(data)
|
|
181
182
|
|
|
182
183
|
async def cancel_dependent_jobs(self, workspace_key: str, processing_job_id: str) -> List[PYJobInput]:
|
|
183
184
|
if not self.has_workspace_cached_requests(workspace_key=workspace_key):
|
|
184
|
-
self.log.
|
|
185
|
+
self.log.info(f"No jobs to be cancelled for workspace key: {workspace_key}")
|
|
185
186
|
return []
|
|
186
|
-
self.log.
|
|
187
|
+
self.log.info(f"Cancelling jobs dependent on job id: {processing_job_id}")
|
|
187
188
|
found_cancel_requests = []
|
|
188
189
|
for i, current_element in enumerate(self.processing_requests[workspace_key]):
|
|
189
190
|
if processing_job_id in current_element.depends_on:
|
|
@@ -192,7 +193,7 @@ class CacheProcessingRequests:
|
|
|
192
193
|
for cancel_element in found_cancel_requests:
|
|
193
194
|
try:
|
|
194
195
|
self.processing_requests[workspace_key].remove(cancel_element)
|
|
195
|
-
self.log.
|
|
196
|
+
self.log.info(f"For job id: '{processing_job_id}', cancelling job id: '{cancel_element.job_id}'")
|
|
196
197
|
cancelled_jobs.append(cancel_element)
|
|
197
198
|
await db_update_processing_job(job_id=cancel_element.job_id, state=JobState.cancelled)
|
|
198
199
|
# Recursively cancel dependent jobs for the cancelled job
|
|
@@ -225,9 +226,11 @@ class CacheProcessingRequests:
|
|
|
225
226
|
|
|
226
227
|
def has_workspace_cached_requests(self, workspace_key: str) -> bool:
|
|
227
228
|
if not self.processing_requests.get(workspace_key, None):
|
|
228
|
-
self.log.
|
|
229
|
+
self.log.info(f"In processing requests cache, no workspace key found: {workspace_key}")
|
|
229
230
|
return False
|
|
230
231
|
if not len(self.processing_requests[workspace_key]):
|
|
231
|
-
self.log.
|
|
232
|
+
self.log.info(f"The processing requests cache is empty for workspace key: {workspace_key}")
|
|
232
233
|
return False
|
|
234
|
+
self.log.info(f"The processing requests cache has {len(self.processing_requests[workspace_key])} "
|
|
235
|
+
f"entries for workspace key: {workspace_key} ")
|
|
233
236
|
return True
|
ocrd_network/server_utils.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import signal
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from json import dumps, loads
|
|
6
|
+
from urllib.parse import urljoin
|
|
7
|
+
from typing import Dict, List, Optional, Union
|
|
8
|
+
from time import time
|
|
9
|
+
|
|
1
10
|
from fastapi import HTTPException, status, UploadFile
|
|
2
11
|
from fastapi.responses import FileResponse
|
|
3
12
|
from httpx import AsyncClient, Timeout
|
|
4
|
-
from json import dumps, loads
|
|
5
13
|
from logging import Logger
|
|
6
|
-
from pathlib import Path
|
|
7
14
|
from requests import get as requests_get
|
|
8
|
-
from
|
|
9
|
-
from urllib.parse import urljoin
|
|
15
|
+
from requests_unixsocket import sys
|
|
10
16
|
|
|
11
17
|
from ocrd.resolver import Resolver
|
|
12
18
|
from ocrd.task_sequence import ProcessorTask
|
|
@@ -241,3 +247,33 @@ def validate_first_task_input_file_groups_existence(logger: Logger, mets_path: s
|
|
|
241
247
|
if group not in available_groups:
|
|
242
248
|
message = f"Input file group '{group}' of the first processor not found: {input_file_grps}"
|
|
243
249
|
raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def kill_mets_server_zombies(minutes_ago : Optional[int], dry_run : Optional[bool]) -> List[int]:
|
|
253
|
+
if minutes_ago == None:
|
|
254
|
+
minutes_ago = 90
|
|
255
|
+
if dry_run == None:
|
|
256
|
+
dry_run = False
|
|
257
|
+
|
|
258
|
+
now = time()
|
|
259
|
+
cmdline_pat = r'.*ocrd workspace -U.*server start $'
|
|
260
|
+
ret = []
|
|
261
|
+
for procdir in sorted(Path('/proc').glob('*'), key=os.path.getctime):
|
|
262
|
+
if not procdir.is_dir():
|
|
263
|
+
continue
|
|
264
|
+
cmdline_file = procdir.joinpath('cmdline')
|
|
265
|
+
if not cmdline_file.is_file():
|
|
266
|
+
continue
|
|
267
|
+
ctime_ago = int((now - procdir.stat().st_ctime) / 60)
|
|
268
|
+
if ctime_ago < minutes_ago:
|
|
269
|
+
continue
|
|
270
|
+
cmdline = cmdline_file.read_text().replace('\x00', ' ')
|
|
271
|
+
if re.match(cmdline_pat, cmdline):
|
|
272
|
+
pid = int(procdir.name)
|
|
273
|
+
ret.append(pid)
|
|
274
|
+
print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, so killing (cmdline="{cmdline})', file=sys.stderr)
|
|
275
|
+
if dry_run:
|
|
276
|
+
print(f'[dry_run is active] kill {pid}')
|
|
277
|
+
else:
|
|
278
|
+
os.kill(pid, signal.SIGTERM)
|
|
279
|
+
return ret
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from requests_unixsocket import Session as requests_unixsocket_session
|
|
2
|
-
from .utils import get_uds_path
|
|
2
|
+
from .utils import get_uds_path, convert_url_to_uds_format
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from ocrd_utils import getLogger
|
|
5
5
|
|
|
@@ -31,9 +31,13 @@ class MetsServerProxy:
|
|
|
31
31
|
if method_type not in SUPPORTED_METHOD_TYPES:
|
|
32
32
|
raise NotImplementedError(f"Method type: {method_type} not recognized")
|
|
33
33
|
ws_socket_file = str(get_uds_path(ws_dir_path=ws_dir_path))
|
|
34
|
-
ws_unix_socket_url =
|
|
34
|
+
ws_unix_socket_url = convert_url_to_uds_format(ws_socket_file)
|
|
35
35
|
uds_request_url = f"{ws_unix_socket_url}/{request_url}"
|
|
36
36
|
|
|
37
|
+
self.log.info(f"Forwarding TCP mets server request to UDS url: {uds_request_url}")
|
|
38
|
+
self.log.info(f"Forwarding method type {method_type}, request data: {request_data}, "
|
|
39
|
+
f"expected response type: {response_type}")
|
|
40
|
+
|
|
37
41
|
if not request_data:
|
|
38
42
|
response = self.session.request(method_type, uds_request_url)
|
|
39
43
|
elif "params" in request_data:
|
|
@@ -45,12 +49,11 @@ class MetsServerProxy:
|
|
|
45
49
|
else:
|
|
46
50
|
raise ValueError("Expecting request_data to be empty or containing single key: params,"
|
|
47
51
|
f"form, or class but not {request_data.keys}")
|
|
48
|
-
|
|
52
|
+
if response_type == "empty":
|
|
53
|
+
return {}
|
|
49
54
|
if not response:
|
|
50
55
|
self.log.error(f"Uds-Mets-Server gives unexpected error. Response: {response.__dict__}")
|
|
51
56
|
return {"error": response.text}
|
|
52
|
-
elif response_type == "empty":
|
|
53
|
-
return {}
|
|
54
57
|
elif response_type == "text":
|
|
55
58
|
return {"text": response.text}
|
|
56
59
|
elif response_type == "class" or response_type == "dict":
|
ocrd_network/utils.py
CHANGED
|
@@ -4,6 +4,7 @@ from fastapi import UploadFile
|
|
|
4
4
|
from functools import wraps
|
|
5
5
|
from hashlib import md5
|
|
6
6
|
from json import loads
|
|
7
|
+
from logging import Logger
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from re import compile as re_compile, split as re_split
|
|
9
10
|
from requests import get as requests_get, Session as Session_TCP
|
|
@@ -151,22 +152,25 @@ def is_mets_server_running(mets_server_url: str, ws_dir_path: str = None) -> boo
|
|
|
151
152
|
return False
|
|
152
153
|
|
|
153
154
|
|
|
154
|
-
def stop_mets_server(mets_server_url: str, ws_dir_path: str
|
|
155
|
+
def stop_mets_server(logger: Logger, mets_server_url: str, ws_dir_path: str) -> bool:
|
|
155
156
|
protocol = "tcp" if (mets_server_url.startswith("http://") or mets_server_url.startswith("https://")) else "uds"
|
|
156
|
-
|
|
157
|
-
if protocol == "
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
157
|
+
# If the mets server URL is the proxy endpoint
|
|
158
|
+
if protocol == "tcp" and "tcp_mets" in mets_server_url:
|
|
159
|
+
# Convert the mets server url to UDS format
|
|
160
|
+
ws_socket_file = str(get_uds_path(ws_dir_path))
|
|
161
|
+
mets_server_url = convert_url_to_uds_format(ws_socket_file)
|
|
162
|
+
protocol = "uds"
|
|
163
|
+
if protocol == "tcp":
|
|
164
|
+
request_json = MpxReq.stop(ws_dir_path)
|
|
165
|
+
logger.info(f"Sending POST request to: {mets_server_url}, request_json: {request_json}")
|
|
166
|
+
response = Session_TCP().post(url=f"{mets_server_url}", json=request_json)
|
|
167
|
+
return response.status_code == 200
|
|
168
|
+
elif protocol == "uds":
|
|
169
|
+
logger.info(f"Sending DELETE request to: {mets_server_url}/")
|
|
170
|
+
response = Session_UDS().delete(url=f"{mets_server_url}/")
|
|
171
|
+
return response.status_code == 200
|
|
172
|
+
else:
|
|
173
|
+
ValueError(f"Unexpected protocol type: {protocol}")
|
|
170
174
|
|
|
171
175
|
def get_uds_path(ws_dir_path: str) -> Path:
|
|
172
176
|
return Path(config.OCRD_NETWORK_SOCKETS_ROOT_DIR, f"{safe_filename(ws_dir_path)}.sock")
|
ocrd_utils/config.py
CHANGED
|
@@ -21,7 +21,7 @@ def _parser_boolean(val):
|
|
|
21
21
|
|
|
22
22
|
class OcrdEnvVariable():
|
|
23
23
|
|
|
24
|
-
def __init__(self, name, description, parser=str, validator=lambda
|
|
24
|
+
def __init__(self, name, description, parser=str, validator=lambda _: True, default=[False, None]):
|
|
25
25
|
"""
|
|
26
26
|
An environment variable for use in OCR-D.
|
|
27
27
|
|
|
@@ -47,10 +47,19 @@ class OcrdEnvVariable():
|
|
|
47
47
|
return f'{self.name}: {self.description}'
|
|
48
48
|
|
|
49
49
|
def describe(self, wrap_text=True, indent_text=True):
|
|
50
|
+
"""
|
|
51
|
+
Output help information on a config option.
|
|
52
|
+
|
|
53
|
+
If ``option.description`` is a multiline string with complex formatting
|
|
54
|
+
(e.g. markdown lists), replace empty lines with ``\b`` and set
|
|
55
|
+
``wrap_text`` to ``False``.
|
|
56
|
+
"""
|
|
50
57
|
desc = self.description
|
|
51
58
|
if self.has_default:
|
|
52
59
|
default = self.default() if callable(self.default) else self.default
|
|
53
|
-
|
|
60
|
+
if not desc.endswith('\n'):
|
|
61
|
+
desc += ' '
|
|
62
|
+
desc += f'(Default: "{default}")'
|
|
54
63
|
ret = ''
|
|
55
64
|
ret = f'{self.name}\n'
|
|
56
65
|
if wrap_text:
|
|
@@ -146,11 +155,11 @@ config.add("OCRD_PROFILE",
|
|
|
146
155
|
description="""\
|
|
147
156
|
Whether to enable gathering runtime statistics
|
|
148
157
|
on the `ocrd.profile` logger (comma-separated):
|
|
149
|
-
|
|
158
|
+
\b
|
|
150
159
|
- `CPU`: yields CPU and wall-time,
|
|
151
160
|
- `RSS`: also yields peak memory (resident set size)
|
|
152
161
|
- `PSS`: also yields peak memory (proportional set size)
|
|
153
|
-
|
|
162
|
+
\b
|
|
154
163
|
""",
|
|
155
164
|
validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
|
|
156
165
|
default=(True, ''))
|
|
@@ -183,11 +192,12 @@ config.add("OCRD_DOWNLOAD_INPUT",
|
|
|
183
192
|
|
|
184
193
|
config.add("OCRD_MISSING_INPUT",
|
|
185
194
|
description="""\
|
|
186
|
-
How to deal with missing input files
|
|
187
|
-
|
|
195
|
+
How to deal with missing input files
|
|
196
|
+
(for some fileGrp/pageId) during processing:
|
|
197
|
+
\b
|
|
188
198
|
- `SKIP`: ignore and proceed with next page's input
|
|
189
199
|
- `ABORT`: throw :py:class:`.MissingInputFile`
|
|
190
|
-
|
|
200
|
+
\b
|
|
191
201
|
""",
|
|
192
202
|
default=(True, 'SKIP'),
|
|
193
203
|
validator=lambda val: val in ['SKIP', 'ABORT'],
|
|
@@ -195,12 +205,13 @@ How to deal with missing input files (for some fileGrp/pageId) during processing
|
|
|
195
205
|
|
|
196
206
|
config.add("OCRD_MISSING_OUTPUT",
|
|
197
207
|
description="""\
|
|
198
|
-
How to deal with missing output files
|
|
199
|
-
|
|
208
|
+
How to deal with missing output files
|
|
209
|
+
(for some fileGrp/pageId) during processing:
|
|
210
|
+
\b
|
|
200
211
|
- `SKIP`: ignore and proceed processing next page
|
|
201
212
|
- `COPY`: fall back to copying input PAGE to output fileGrp for page
|
|
202
213
|
- `ABORT`: re-throw whatever caused processing to fail
|
|
203
|
-
|
|
214
|
+
\b
|
|
204
215
|
""",
|
|
205
216
|
default=(True, 'SKIP'),
|
|
206
217
|
validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
|
|
@@ -213,12 +224,13 @@ config.add("OCRD_MAX_MISSING_OUTPUTS",
|
|
|
213
224
|
|
|
214
225
|
config.add("OCRD_EXISTING_OUTPUT",
|
|
215
226
|
description="""\
|
|
216
|
-
How to deal with already existing output files
|
|
217
|
-
|
|
227
|
+
How to deal with already existing output files
|
|
228
|
+
(for some fileGrp/pageId) during processing:
|
|
229
|
+
\b
|
|
218
230
|
- `SKIP`: ignore and proceed processing next page
|
|
219
231
|
- `OVERWRITE`: force writing result to output fileGrp for page
|
|
220
232
|
- `ABORT`: re-throw :py:class:`FileExistsError`
|
|
221
|
-
|
|
233
|
+
\b
|
|
222
234
|
""",
|
|
223
235
|
default=(True, 'SKIP'),
|
|
224
236
|
validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
|
|
@@ -231,7 +243,7 @@ config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
|
|
|
231
243
|
config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP",
|
|
232
244
|
description="How many seconds to sleep before trying again.",
|
|
233
245
|
parser=int,
|
|
234
|
-
default=(True,
|
|
246
|
+
default=(True, 10))
|
|
235
247
|
|
|
236
248
|
config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT",
|
|
237
249
|
description="Timeout for a blocking ocrd network client (in seconds).",
|
|
@@ -247,9 +259,19 @@ config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
|
|
|
247
259
|
default=(True, ''))
|
|
248
260
|
|
|
249
261
|
config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS",
|
|
250
|
-
|
|
262
|
+
description="Number of attempts for a RabbitMQ client to connect before failing.",
|
|
263
|
+
parser=int,
|
|
264
|
+
default=(True, 3))
|
|
265
|
+
|
|
266
|
+
config.add(
|
|
267
|
+
name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
|
|
268
|
+
description="""
|
|
269
|
+
Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
|
|
270
|
+
proposed by broker. Use 0 to deactivate heartbeat.
|
|
271
|
+
""",
|
|
251
272
|
parser=int,
|
|
252
|
-
default=(True,
|
|
273
|
+
default=(True, 0)
|
|
274
|
+
)
|
|
253
275
|
|
|
254
276
|
config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
|
|
255
277
|
description="The root directory where all mets server related socket files are created",
|
ocrd_utils/logging.py
CHANGED
|
@@ -46,14 +46,8 @@ __all__ = [
|
|
|
46
46
|
'setOverrideLogLevel',
|
|
47
47
|
]
|
|
48
48
|
|
|
49
|
-
# These are the loggers we add handlers to
|
|
50
|
-
ROOT_OCRD_LOGGERS = [
|
|
51
|
-
'',
|
|
52
|
-
'ocrd',
|
|
53
|
-
'ocrd_network'
|
|
54
|
-
]
|
|
55
|
-
|
|
56
49
|
LOGGING_DEFAULTS = {
|
|
50
|
+
'': logging.WARNING,
|
|
57
51
|
'ocrd': logging.INFO,
|
|
58
52
|
'ocrd_network': logging.INFO,
|
|
59
53
|
# 'ocrd.resolver': logging.INFO,
|
|
@@ -114,18 +108,15 @@ def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG):
|
|
|
114
108
|
lvl (string): Log level name.
|
|
115
109
|
silent (boolean): Whether to log the override call
|
|
116
110
|
"""
|
|
117
|
-
if not
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if not silent:
|
|
127
|
-
print(f'[LOGGING] Overriding ocrd log level to {lvl}', file=sys.stderr)
|
|
128
|
-
ocrd_logger.setLevel(lvl)
|
|
111
|
+
if lvl is not None:
|
|
112
|
+
lvl = getLevelName(lvl)
|
|
113
|
+
if not _initialized_flag:
|
|
114
|
+
initLogging(silent=silent)
|
|
115
|
+
# affect all configured loggers
|
|
116
|
+
for logger_name in logging.root.manager.loggerDict:
|
|
117
|
+
if not silent:
|
|
118
|
+
print(f'[LOGGING] Overriding {logger_name} log level to {lvl}', file=sys.stderr)
|
|
119
|
+
logging.getLogger(logger_name).setLevel(lvl)
|
|
129
120
|
|
|
130
121
|
def get_logging_config_files():
|
|
131
122
|
"""
|
|
@@ -159,20 +150,11 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
|
|
|
159
150
|
- silent (bool): Whether to log logging behavior by printing to stderr
|
|
160
151
|
"""
|
|
161
152
|
global _initialized_flag
|
|
162
|
-
if _initialized_flag
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# If logging.disable(logging.NOTSET) is called, it effectively removes this
|
|
168
|
-
# overriding level, so that logging output again depends on the effective
|
|
169
|
-
# levels of individual loggers.
|
|
170
|
-
logging.disable(logging.NOTSET)
|
|
171
|
-
|
|
172
|
-
# remove all handlers for the ocrd root loggers
|
|
173
|
-
for logger_name in ROOT_OCRD_LOGGERS:
|
|
174
|
-
for handler in logging.getLogger(logger_name).handlers[:]:
|
|
175
|
-
logging.getLogger(logger_name).removeHandler(handler)
|
|
153
|
+
if _initialized_flag:
|
|
154
|
+
if force_reinit:
|
|
155
|
+
disableLogging(silent=silent)
|
|
156
|
+
else:
|
|
157
|
+
return
|
|
176
158
|
|
|
177
159
|
config_file = None
|
|
178
160
|
if not builtin_only:
|
|
@@ -191,11 +173,8 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
|
|
|
191
173
|
ocrd_handler = logging.StreamHandler(stream=sys.stderr)
|
|
192
174
|
ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT))
|
|
193
175
|
ocrd_handler.setLevel(logging.DEBUG)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
logger.addHandler(ocrd_handler)
|
|
197
|
-
if logger_name:
|
|
198
|
-
logger.propagate = False # avoid duplication (from root handler)
|
|
176
|
+
root_logger = logging.getLogger('')
|
|
177
|
+
root_logger.addHandler(ocrd_handler)
|
|
199
178
|
for logger_name, logger_level in LOGGING_DEFAULTS.items():
|
|
200
179
|
logging.getLogger(logger_name).setLevel(logger_level)
|
|
201
180
|
_initialized_flag = True
|
|
@@ -211,24 +190,16 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
|
|
|
211
190
|
if _initialized_flag and not silent:
|
|
212
191
|
print("[LOGGING] Disabling logging", file=sys.stderr)
|
|
213
192
|
_initialized_flag = False
|
|
214
|
-
#
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
193
|
+
# remove all handlers we might have added (via initLogging on builtin or file config)
|
|
194
|
+
for logger_name in logging.root.manager.loggerDict:
|
|
195
|
+
if not silent:
|
|
196
|
+
print(f'[LOGGING] Resetting {logger_name} log level and handlers')
|
|
197
|
+
logger = logging.getLogger(logger_name)
|
|
198
|
+
logger.setLevel(logging.NOTSET)
|
|
199
|
+
for handler in logger.handlers[:]:
|
|
200
|
+
logger.removeHandler(handler)
|
|
201
|
+
for handler in logging.root.handlers[:]:
|
|
202
|
+
logging.root.removeHandler(handler)
|
|
222
203
|
# Python default log level is WARNING
|
|
223
204
|
logging.root.setLevel(logging.WARNING)
|
|
224
205
|
|
|
225
|
-
# Initializing stream handlers at module level
|
|
226
|
-
# would cause message output in all runtime contexts,
|
|
227
|
-
# including those which are already run for std output
|
|
228
|
-
# (--dump-json, --version, ocrd-tool, bashlib etc).
|
|
229
|
-
# So this needs to be an opt-in from the CLIs/decorators:
|
|
230
|
-
#initLogging()
|
|
231
|
-
# Also, we even have to block log output for libraries
|
|
232
|
-
# (like matplotlib/tensorflow) which set up logging
|
|
233
|
-
# themselves already:
|
|
234
|
-
disableLogging()
|
ocrd_utils/ocrd_logging.conf
CHANGED
|
@@ -34,7 +34,7 @@ keys=defaultFormatter,detailedFormatter
|
|
|
34
34
|
# default logger "root" using consoleHandler
|
|
35
35
|
#
|
|
36
36
|
[logger_root]
|
|
37
|
-
level=
|
|
37
|
+
level=WARNING
|
|
38
38
|
handlers=consoleHandler,fileHandler
|
|
39
39
|
|
|
40
40
|
|
|
@@ -56,22 +56,22 @@ handlers=consoleHandler,fileHandler
|
|
|
56
56
|
# ocrd loggers
|
|
57
57
|
[logger_ocrd]
|
|
58
58
|
level=INFO
|
|
59
|
-
handlers=
|
|
59
|
+
handlers=
|
|
60
60
|
qualname=ocrd
|
|
61
|
-
propagate=0
|
|
62
61
|
|
|
63
62
|
[logger_ocrd_network]
|
|
64
63
|
level=INFO
|
|
65
|
-
handlers=consoleHandler,processingServerHandler
|
|
64
|
+
#handlers=consoleHandler,processingServerHandler
|
|
65
|
+
handlers=processingServerHandler
|
|
66
66
|
qualname=ocrd_network
|
|
67
|
-
propagate=0
|
|
67
|
+
#propagate=0
|
|
68
68
|
|
|
69
69
|
#
|
|
70
70
|
# logger tensorflow
|
|
71
71
|
#
|
|
72
72
|
[logger_ocrd_tensorflow]
|
|
73
73
|
level=ERROR
|
|
74
|
-
handlers=
|
|
74
|
+
handlers=
|
|
75
75
|
qualname=tensorflow
|
|
76
76
|
|
|
77
77
|
#
|
|
@@ -79,7 +79,7 @@ qualname=tensorflow
|
|
|
79
79
|
#
|
|
80
80
|
[logger_ocrd_shapely_geos]
|
|
81
81
|
level=ERROR
|
|
82
|
-
handlers=
|
|
82
|
+
handlers=
|
|
83
83
|
qualname=shapely.geos
|
|
84
84
|
|
|
85
85
|
|
|
@@ -88,7 +88,7 @@ qualname=shapely.geos
|
|
|
88
88
|
#
|
|
89
89
|
[logger_ocrd_PIL]
|
|
90
90
|
level=INFO
|
|
91
|
-
handlers=
|
|
91
|
+
handlers=
|
|
92
92
|
qualname=PIL
|
|
93
93
|
|
|
94
94
|
#
|
|
@@ -96,34 +96,32 @@ qualname=PIL
|
|
|
96
96
|
#
|
|
97
97
|
[logger_paramiko]
|
|
98
98
|
level=INFO
|
|
99
|
-
handlers=
|
|
99
|
+
handlers=
|
|
100
100
|
qualname=paramiko
|
|
101
|
-
propagate=0
|
|
102
101
|
|
|
103
102
|
[logger_paramiko_transport]
|
|
104
103
|
level=INFO
|
|
105
|
-
handlers=
|
|
104
|
+
handlers=
|
|
106
105
|
qualname=paramiko.transport
|
|
107
|
-
propagate=0
|
|
108
106
|
|
|
109
107
|
#
|
|
110
108
|
# uvicorn loggers
|
|
111
109
|
#
|
|
112
110
|
[logger_uvicorn]
|
|
113
111
|
level=INFO
|
|
114
|
-
handlers=
|
|
112
|
+
handlers=
|
|
115
113
|
qualname=uvicorn
|
|
116
114
|
[logger_uvicorn_access]
|
|
117
115
|
level=WARN
|
|
118
|
-
handlers=
|
|
116
|
+
handlers=
|
|
119
117
|
qualname=uvicorn.access
|
|
120
118
|
[logger_uvicorn_error]
|
|
121
119
|
level=INFO
|
|
122
|
-
handlers=
|
|
120
|
+
handlers=
|
|
123
121
|
qualname=uvicorn.error
|
|
124
122
|
[logger_multipart]
|
|
125
123
|
level=INFO
|
|
126
|
-
handlers=
|
|
124
|
+
handlers=
|
|
127
125
|
qualname=multipart
|
|
128
126
|
|
|
129
127
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|