ocrd 3.0.2__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/processor/base.py +9 -3
- ocrd/processor/helpers.py +9 -3
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/METADATA +1 -1
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/RECORD +8 -8
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/LICENSE +0 -0
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/WHEEL +0 -0
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/entry_points.txt +0 -0
- {ocrd-3.0.2.dist-info → ocrd-3.0.3.dist-info}/top_level.txt +0 -0
ocrd/processor/base.py
CHANGED
|
@@ -534,15 +534,20 @@ class Processor():
|
|
|
534
534
|
# forward messages from log queue (in subprocesses) to all root handlers
|
|
535
535
|
log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
|
|
536
536
|
log_listener.start()
|
|
537
|
+
tasks = None
|
|
537
538
|
try:
|
|
538
539
|
self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
|
|
539
540
|
tasks = self.process_workspace_submit_tasks(executor, max_seconds)
|
|
540
541
|
stats = self.process_workspace_handle_tasks(tasks)
|
|
541
542
|
finally:
|
|
542
543
|
executor.shutdown(kill_workers=True, wait=False)
|
|
544
|
+
self._base_logger.debug("stopped executor %s after %d tasks", str(executor), len(tasks) if tasks else -1)
|
|
543
545
|
if max_workers > 1:
|
|
544
|
-
|
|
545
|
-
|
|
546
|
+
# can cause deadlock:
|
|
547
|
+
#log_listener.stop()
|
|
548
|
+
# not much better:
|
|
549
|
+
#log_listener.enqueue_sentinel()
|
|
550
|
+
pass
|
|
546
551
|
|
|
547
552
|
except NotImplementedError:
|
|
548
553
|
# fall back to deprecated method
|
|
@@ -670,11 +675,12 @@ class Processor():
|
|
|
670
675
|
nr_succeeded += 1
|
|
671
676
|
# else skipped - already exists
|
|
672
677
|
nr_errors = dict(nr_errors)
|
|
678
|
+
nr_all = nr_succeeded + nr_failed
|
|
673
679
|
if nr_failed > 0:
|
|
674
|
-
nr_all = nr_succeeded + nr_failed
|
|
675
680
|
if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
|
|
676
681
|
raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
|
|
677
682
|
self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
|
|
683
|
+
self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", nr_succeeded, nr_failed, nr_all, str(nr_errors))
|
|
678
684
|
return nr_succeeded, nr_failed, nr_errors, len(tasks)
|
|
679
685
|
|
|
680
686
|
def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
|
ocrd/processor/helpers.py
CHANGED
|
@@ -56,13 +56,19 @@ def run_processor(
|
|
|
56
56
|
- :py:attr:`output_file_grp`
|
|
57
57
|
- :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)
|
|
58
58
|
|
|
59
|
-
Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
|
|
60
|
-
This flag is used for an experimental feature we would like to adopt in future.
|
|
61
|
-
|
|
62
59
|
Run the processor on the workspace (creating output files in the filesystem).
|
|
63
60
|
|
|
64
61
|
Finally, write back the workspace (updating the METS in the filesystem).
|
|
65
62
|
|
|
63
|
+
If :py:attr:`instance_caching` is True, then processor instances (for the same set
|
|
64
|
+
of :py:attr:`parameter` values) will be cached internally. Thus, these objects (and
|
|
65
|
+
all their memory resources, like loaded models) get re-used instead of re-instantiated
|
|
66
|
+
when a match occurs - as long as the program is being run. They only get deleted (and
|
|
67
|
+
their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
|
|
68
|
+
instances have already been cached while this particular parameter set was re-used
|
|
69
|
+
least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
|
|
70
|
+
:py:class:`~ocrd_network.ProcessorServer` for use-cases.)
|
|
71
|
+
|
|
66
72
|
Args:
|
|
67
73
|
processorClass (object): Python class of the module processor.
|
|
68
74
|
"""
|
|
@@ -26,8 +26,8 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
|
|
|
26
26
|
ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
|
|
27
27
|
ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
|
|
28
28
|
ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
|
|
29
|
-
ocrd/processor/base.py,sha256=
|
|
30
|
-
ocrd/processor/helpers.py,sha256=
|
|
29
|
+
ocrd/processor/base.py,sha256=wAu3d5E2Vt-rWBejzgZOOnxuI0njgShuLfRZPxyuUGk,59731
|
|
30
|
+
ocrd/processor/helpers.py,sha256=rk7OYucvUgS0fTsxNpYaiPmuyD2l0hVg9uSXb4Jdgow,10975
|
|
31
31
|
ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
|
|
32
32
|
ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
|
|
@@ -120,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
|
|
|
120
120
|
ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
|
|
121
121
|
ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
|
|
122
122
|
ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
|
|
123
|
-
ocrd-3.0.
|
|
124
|
-
ocrd-3.0.
|
|
125
|
-
ocrd-3.0.
|
|
126
|
-
ocrd-3.0.
|
|
127
|
-
ocrd-3.0.
|
|
128
|
-
ocrd-3.0.
|
|
123
|
+
ocrd-3.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
124
|
+
ocrd-3.0.3.dist-info/METADATA,sha256=gZiKn6AO1tPL9NAfVsX0f3UTUZipDtEUOIXmMLXr76k,10442
|
|
125
|
+
ocrd-3.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
126
|
+
ocrd-3.0.3.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
|
|
127
|
+
ocrd-3.0.3.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
|
|
128
|
+
ocrd-3.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|