ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +8 -6
- ocrd/cli/bashlib.py +8 -114
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/mets_server.py +11 -15
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
- ocrd-3.7.0.dist-info/RECORD +123 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +31 -33
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +5 -5
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +15 -7
- ocrd_utils/os.py +17 -4
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd/cli/log.py +0 -51
- ocrd/lib.bash +0 -317
- ocrd-3.5.1.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd_utils/__init__.py
CHANGED
|
@@ -8,11 +8,11 @@ Utility functions and constants usable in various circumstances.
|
|
|
8
8
|
levels below page (i.e. region, line, word, glyph) between relative coordinates
|
|
9
9
|
w.r.t. a corresponding image and absolute coordinates w.r.t. the top-level image.
|
|
10
10
|
This includes rotation and offset correction, based on affine transformations.
|
|
11
|
-
(Used by :py:class:`ocrd.workspace.Workspace` methods
|
|
12
|
-
:py:meth:`ocrd.workspace.Workspace.image_from_page` and
|
|
11
|
+
(Used by :py:class:`ocrd.workspace.Workspace` methods
|
|
12
|
+
:py:meth:`ocrd.workspace.Workspace.image_from_page` and
|
|
13
13
|
:py:meth:`ocrd.workspace.Workspace.image_from_segment`.)
|
|
14
14
|
|
|
15
|
-
* :py:func:`rotate_coordinates`,
|
|
15
|
+
* :py:func:`rotate_coordinates`,
|
|
16
16
|
:py:func:`scale_coordinates`,
|
|
17
17
|
:py:func:`shift_coordinates`,
|
|
18
18
|
:py:func:`transpose_coordinates`,
|
|
@@ -23,7 +23,7 @@ Utility functions and constants usable in various circumstances.
|
|
|
23
23
|
used to pass down the coordinate system along with images (both invariably sharing
|
|
24
24
|
the same operations context) when traversing the element hierarchy top to bottom.
|
|
25
25
|
(Used by :py:class:`ocrd.workspace.Workspace` methods
|
|
26
|
-
:py:meth:`ocrd.workspace.Workspace.image_from_page` and
|
|
26
|
+
:py:meth:`ocrd.workspace.Workspace.image_from_page` and
|
|
27
27
|
:py:meth:`ocrd.workspace.Workspace.image_from_segment`.)
|
|
28
28
|
|
|
29
29
|
* :py:func:`rotate_image`,
|
ocrd_utils/config.py
CHANGED
|
@@ -16,9 +16,11 @@ from textwrap import fill, indent
|
|
|
16
16
|
def _validator_boolean(val):
|
|
17
17
|
return isinstance(val, bool) or str.lower(val) in ('true', 'false', '0', '1')
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
def _parser_boolean(val):
|
|
20
21
|
return bool(val) if isinstance(val, (int, bool)) else str.lower(val) in ('true', '1')
|
|
21
22
|
|
|
23
|
+
|
|
22
24
|
class OcrdEnvVariable():
|
|
23
25
|
|
|
24
26
|
def __init__(self, name, description, parser=str, validator=lambda _: True, default=[False, None]):
|
|
@@ -61,7 +63,7 @@ class OcrdEnvVariable():
|
|
|
61
63
|
desc += ' '
|
|
62
64
|
desc += f'(Default: "{default}")'
|
|
63
65
|
ret = ''
|
|
64
|
-
ret
|
|
66
|
+
ret = f'{self.name}\n'
|
|
65
67
|
if wrap_text:
|
|
66
68
|
desc = fill(desc, width=50)
|
|
67
69
|
if indent_text:
|
|
@@ -69,6 +71,7 @@ class OcrdEnvVariable():
|
|
|
69
71
|
desc = indent(desc, ' ')
|
|
70
72
|
return ret + desc
|
|
71
73
|
|
|
74
|
+
|
|
72
75
|
class OcrdEnvConfig():
|
|
73
76
|
|
|
74
77
|
def __init__(self):
|
|
@@ -83,7 +86,7 @@ class OcrdEnvConfig():
|
|
|
83
86
|
return self._variables[name]
|
|
84
87
|
|
|
85
88
|
def has_default(self, name):
|
|
86
|
-
if not
|
|
89
|
+
if name not in self._variables:
|
|
87
90
|
raise ValueError(f"Unregistered env variable {name}")
|
|
88
91
|
return self._variables[name].has_default
|
|
89
92
|
|
|
@@ -99,13 +102,13 @@ class OcrdEnvConfig():
|
|
|
99
102
|
pass
|
|
100
103
|
|
|
101
104
|
def describe(self, name, *args, **kwargs):
|
|
102
|
-
if not
|
|
105
|
+
if name not in self._variables:
|
|
103
106
|
raise ValueError(f"Unregistered env variable {name}")
|
|
104
107
|
return self._variables[name].describe(*args, **kwargs)
|
|
105
108
|
|
|
106
109
|
def __getattr__(self, name):
|
|
107
110
|
# will be called if name is not accessible (has not been added directly yet)
|
|
108
|
-
if not
|
|
111
|
+
if name not in self._variables:
|
|
109
112
|
raise AttributeError(f"Unregistered env variable {name}")
|
|
110
113
|
var_obj = self._variables[name]
|
|
111
114
|
try:
|
|
@@ -120,39 +123,44 @@ class OcrdEnvConfig():
|
|
|
120
123
|
return var_obj.parser(raw_value)
|
|
121
124
|
|
|
122
125
|
def is_set(self, name):
|
|
123
|
-
if not
|
|
126
|
+
if name not in self._variables:
|
|
124
127
|
raise ValueError(f"Unregistered env variable {name}")
|
|
125
128
|
return name in environ
|
|
126
129
|
|
|
127
130
|
def raw_value(self, name):
|
|
128
|
-
if not
|
|
131
|
+
if name not in self._variables:
|
|
129
132
|
raise ValueError(f"Unregistered env variable {name}")
|
|
130
133
|
return environ[name]
|
|
131
134
|
|
|
135
|
+
|
|
132
136
|
config = OcrdEnvConfig()
|
|
133
137
|
|
|
134
138
|
config.add('OCRD_METS_CACHING',
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
139
|
+
description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
|
|
140
|
+
validator=_validator_boolean,
|
|
141
|
+
parser=_parser_boolean)
|
|
138
142
|
|
|
139
143
|
config.add('OCRD_MAX_PROCESSOR_CACHE',
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
144
|
+
description="Maximum number of processor instances (for each set of parameters) to be kept in memory "
|
|
145
|
+
"(including loaded models) for processing workers.",
|
|
146
|
+
parser=int,
|
|
147
|
+
default=(True, 128))
|
|
143
148
|
|
|
144
149
|
config.add('OCRD_MAX_PARALLEL_PAGES',
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
150
|
+
description="Maximum number of processor workers for page-parallel processing (within "
|
|
151
|
+
"each Processor's selected page range, independent of the number of Processing "
|
|
152
|
+
"Workers). If set >1, then a METS Server must be used for METS synchronisation.",
|
|
153
|
+
parser=int,
|
|
154
|
+
default=(True, 1))
|
|
148
155
|
|
|
149
156
|
config.add('OCRD_PROCESSING_PAGE_TIMEOUT',
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
157
|
+
description="Timeout in seconds for processing a single page. If set >0, when exceeded, "
|
|
158
|
+
"the same as OCRD_MISSING_OUTPUT applies.",
|
|
159
|
+
parser=int,
|
|
160
|
+
default=(True, 0))
|
|
153
161
|
|
|
154
162
|
config.add("OCRD_PROFILE",
|
|
155
|
-
|
|
163
|
+
description="""\
|
|
156
164
|
Whether to enable gathering runtime statistics
|
|
157
165
|
on the `ocrd.profile` logger (comma-separated):
|
|
158
166
|
\b
|
|
@@ -161,16 +169,18 @@ on the `ocrd.profile` logger (comma-separated):
|
|
|
161
169
|
- `PSS`: also yields peak memory (proportional set size)
|
|
162
170
|
\b
|
|
163
171
|
""",
|
|
164
|
-
|
|
165
|
-
|
|
172
|
+
validator=lambda val: all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
|
|
173
|
+
default=(True, ''))
|
|
166
174
|
|
|
167
175
|
config.add("OCRD_PROFILE_FILE",
|
|
168
|
-
|
|
176
|
+
description="If set, then the CPU profile is written to this file for later peruse "
|
|
177
|
+
"with a analysis tools like snakeviz")
|
|
169
178
|
|
|
170
179
|
config.add("OCRD_DOWNLOAD_RETRIES",
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
180
|
+
description="Number of times to retry failed attempts for downloads of resources or workspace files.",
|
|
181
|
+
validator=int,
|
|
182
|
+
parser=int)
|
|
183
|
+
|
|
174
184
|
|
|
175
185
|
def _ocrd_download_timeout_parser(val):
|
|
176
186
|
timeout = val.split(',')
|
|
@@ -180,18 +190,19 @@ def _ocrd_download_timeout_parser(val):
|
|
|
180
190
|
timeout = float(timeout[0])
|
|
181
191
|
return timeout
|
|
182
192
|
|
|
193
|
+
|
|
183
194
|
config.add("OCRD_DOWNLOAD_TIMEOUT",
|
|
184
|
-
|
|
185
|
-
|
|
195
|
+
description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
|
|
196
|
+
parser=_ocrd_download_timeout_parser)
|
|
186
197
|
|
|
187
198
|
config.add("OCRD_DOWNLOAD_INPUT",
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
199
|
+
description="Whether to download files not present locally during processing",
|
|
200
|
+
default=(True, True),
|
|
201
|
+
validator=_validator_boolean,
|
|
202
|
+
parser=_parser_boolean)
|
|
192
203
|
|
|
193
204
|
config.add("OCRD_MISSING_INPUT",
|
|
194
|
-
|
|
205
|
+
description="""\
|
|
195
206
|
How to deal with missing input files
|
|
196
207
|
(for some fileGrp/pageId) during processing:
|
|
197
208
|
\b
|
|
@@ -199,12 +210,12 @@ How to deal with missing input files
|
|
|
199
210
|
- `ABORT`: throw :py:class:`.MissingInputFile`
|
|
200
211
|
\b
|
|
201
212
|
""",
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
213
|
+
default=(True, 'SKIP'),
|
|
214
|
+
validator=lambda val: val in ['SKIP', 'ABORT'],
|
|
215
|
+
parser=str)
|
|
205
216
|
|
|
206
217
|
config.add("OCRD_MISSING_OUTPUT",
|
|
207
|
-
|
|
218
|
+
description="""\
|
|
208
219
|
How to deal with missing output files
|
|
209
220
|
(for some fileGrp/pageId) during processing:
|
|
210
221
|
\b
|
|
@@ -213,17 +224,18 @@ How to deal with missing output files
|
|
|
213
224
|
- `ABORT`: re-throw whatever caused processing to fail
|
|
214
225
|
\b
|
|
215
226
|
""",
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
227
|
+
default=(True, 'SKIP'),
|
|
228
|
+
validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
|
|
229
|
+
parser=str)
|
|
219
230
|
|
|
220
231
|
config.add("OCRD_MAX_MISSING_OUTPUTS",
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
232
|
+
description="Maximal rate of skipped/fallback pages among all processed pages before aborting "
|
|
233
|
+
"(decimal fraction, ignored if negative).",
|
|
234
|
+
default=(True, 0.1),
|
|
235
|
+
parser=float)
|
|
224
236
|
|
|
225
237
|
config.add("OCRD_EXISTING_OUTPUT",
|
|
226
|
-
|
|
238
|
+
description="""\
|
|
227
239
|
How to deal with already existing output files
|
|
228
240
|
(for some fileGrp/pageId) during processing:
|
|
229
241
|
\b
|
|
@@ -232,13 +244,13 @@ How to deal with already existing output files
|
|
|
232
244
|
- `ABORT`: re-throw :py:class:`FileExistsError`
|
|
233
245
|
\b
|
|
234
246
|
""",
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
247
|
+
default=(True, 'SKIP'),
|
|
248
|
+
validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
|
|
249
|
+
parser=str)
|
|
238
250
|
|
|
239
251
|
config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
|
|
240
|
-
|
|
241
|
-
|
|
252
|
+
description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
|
|
253
|
+
default=(True, ''))
|
|
242
254
|
|
|
243
255
|
config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP",
|
|
244
256
|
description="How many seconds to sleep before trying again.",
|
|
@@ -251,27 +263,25 @@ config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT",
|
|
|
251
263
|
default=(True, 3600))
|
|
252
264
|
|
|
253
265
|
config.add("OCRD_NETWORK_SERVER_ADDR_WORKFLOW",
|
|
254
|
-
|
|
255
|
-
|
|
266
|
+
description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).",
|
|
267
|
+
default=(True, ''))
|
|
256
268
|
|
|
257
269
|
config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
|
|
258
|
-
|
|
259
|
-
|
|
270
|
+
description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).",
|
|
271
|
+
default=(True, ''))
|
|
260
272
|
|
|
261
273
|
config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS",
|
|
262
274
|
description="Number of attempts for a RabbitMQ client to connect before failing.",
|
|
263
275
|
parser=int,
|
|
264
276
|
default=(True, 3))
|
|
265
277
|
|
|
266
|
-
config.add(
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
default=(True, 0)
|
|
274
|
-
)
|
|
278
|
+
config.add(name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
|
|
279
|
+
description="""
|
|
280
|
+
Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
|
|
281
|
+
proposed by broker. Use 0 to deactivate heartbeat.
|
|
282
|
+
""",
|
|
283
|
+
parser=int,
|
|
284
|
+
default=(True, 0))
|
|
275
285
|
|
|
276
286
|
config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
|
|
277
287
|
description="The root directory where all mets server related socket files are created",
|
|
@@ -296,24 +306,24 @@ except PermissionError:
|
|
|
296
306
|
pass
|
|
297
307
|
|
|
298
308
|
config.add("HOME",
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
309
|
+
description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
|
|
310
|
+
# description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html",
|
|
311
|
+
validator=lambda val: Path(val).is_dir(),
|
|
312
|
+
parser=lambda val: Path(val),
|
|
313
|
+
default=(True, lambda: Path.home()))
|
|
304
314
|
|
|
305
315
|
config.add("XDG_DATA_HOME",
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
316
|
+
description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)",
|
|
317
|
+
parser=lambda val: Path(val),
|
|
318
|
+
default=(True, lambda: Path(config.HOME, '.local/share')))
|
|
309
319
|
|
|
310
320
|
config.add("XDG_CONFIG_HOME",
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
321
|
+
description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)",
|
|
322
|
+
parser=lambda val: Path(val),
|
|
323
|
+
default=(True, lambda: Path(config.HOME, '.config')))
|
|
314
324
|
|
|
315
325
|
config.add("OCRD_LOGGING_DEBUG",
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
326
|
+
description="Print information about the logging setup to STDERR",
|
|
327
|
+
default=(True, False),
|
|
328
|
+
validator=_validator_boolean,
|
|
329
|
+
parser=_parser_boolean)
|
ocrd_utils/deprecate.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import warnings
|
|
3
3
|
|
|
4
|
+
|
|
4
5
|
def deprecation_warning(msg, stacklevel=2):
|
|
5
6
|
warnings.warn(msg, DeprecationWarning, stacklevel)
|
|
6
7
|
|
|
8
|
+
|
|
7
9
|
def deprecated_alias(**aliases):
|
|
8
10
|
"""
|
|
9
11
|
Deprecate a kwarg in favor of another kwarg
|
|
@@ -16,6 +18,7 @@ def deprecated_alias(**aliases):
|
|
|
16
18
|
return wrapper
|
|
17
19
|
return deco
|
|
18
20
|
|
|
21
|
+
|
|
19
22
|
def rename_kwargs(func_name, kwargs, aliases):
|
|
20
23
|
"""
|
|
21
24
|
https://stackoverflow.com/questions/49802412/how-to-implement-deprecation-in-python-with-argument-alias
|