ocrd 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/decorators/ocrd_cli_options.py +5 -5
- ocrd/processor/base.py +10 -4
- ocrd/processor/helpers.py +9 -3
- ocrd/workspace.py +0 -2
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/METADATA +6 -6
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/RECORD +12 -12
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/WHEEL +1 -1
- ocrd_network/cli/client.py +1 -1
- ocrd_utils/config.py +1 -1
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/LICENSE +0 -0
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/entry_points.txt +0 -0
- {ocrd-3.0.1.dist-info → ocrd-3.0.3.dist-info}/top_level.txt +0 -0
|
@@ -13,16 +13,16 @@ from ocrd_network import (
|
|
|
13
13
|
|
|
14
14
|
def ocrd_cli_options(f):
|
|
15
15
|
"""
|
|
16
|
-
Implement
|
|
16
|
+
Implement Processor CLI.
|
|
17
17
|
|
|
18
18
|
Usage::
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
from ocrd.decorators import ocrd_cli_options
|
|
21
21
|
|
|
22
22
|
@click.command()
|
|
23
|
-
@
|
|
24
|
-
def cli(
|
|
25
|
-
print(mets_url)
|
|
23
|
+
@ocrd_cli_options
|
|
24
|
+
def cli(**kwargs):
|
|
25
|
+
print(kwargs['mets_url'])
|
|
26
26
|
"""
|
|
27
27
|
# XXX Note that the `--help` output is statically generate_processor_help
|
|
28
28
|
params = [
|
ocrd/processor/base.py
CHANGED
|
@@ -518,7 +518,7 @@ class Processor():
|
|
|
518
518
|
|
|
519
519
|
if max_workers > 1:
|
|
520
520
|
executor_cls = ProcessPoolExecutor
|
|
521
|
-
log_queue = mp.Queue()
|
|
521
|
+
log_queue = mp.get_context('fork').Queue()
|
|
522
522
|
else:
|
|
523
523
|
executor_cls = DummyExecutor
|
|
524
524
|
log_queue = None
|
|
@@ -534,15 +534,20 @@ class Processor():
|
|
|
534
534
|
# forward messages from log queue (in subprocesses) to all root handlers
|
|
535
535
|
log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
|
|
536
536
|
log_listener.start()
|
|
537
|
+
tasks = None
|
|
537
538
|
try:
|
|
538
539
|
self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
|
|
539
540
|
tasks = self.process_workspace_submit_tasks(executor, max_seconds)
|
|
540
541
|
stats = self.process_workspace_handle_tasks(tasks)
|
|
541
542
|
finally:
|
|
542
543
|
executor.shutdown(kill_workers=True, wait=False)
|
|
544
|
+
self._base_logger.debug("stopped executor %s after %d tasks", str(executor), len(tasks) if tasks else -1)
|
|
543
545
|
if max_workers > 1:
|
|
544
|
-
|
|
545
|
-
|
|
546
|
+
# can cause deadlock:
|
|
547
|
+
#log_listener.stop()
|
|
548
|
+
# not much better:
|
|
549
|
+
#log_listener.enqueue_sentinel()
|
|
550
|
+
pass
|
|
546
551
|
|
|
547
552
|
except NotImplementedError:
|
|
548
553
|
# fall back to deprecated method
|
|
@@ -670,11 +675,12 @@ class Processor():
|
|
|
670
675
|
nr_succeeded += 1
|
|
671
676
|
# else skipped - already exists
|
|
672
677
|
nr_errors = dict(nr_errors)
|
|
678
|
+
nr_all = nr_succeeded + nr_failed
|
|
673
679
|
if nr_failed > 0:
|
|
674
|
-
nr_all = nr_succeeded + nr_failed
|
|
675
680
|
if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
|
|
676
681
|
raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
|
|
677
682
|
self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
|
|
683
|
+
self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", nr_succeeded, nr_failed, nr_all, str(nr_errors))
|
|
678
684
|
return nr_succeeded, nr_failed, nr_errors, len(tasks)
|
|
679
685
|
|
|
680
686
|
def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
|
ocrd/processor/helpers.py
CHANGED
|
@@ -56,13 +56,19 @@ def run_processor(
|
|
|
56
56
|
- :py:attr:`output_file_grp`
|
|
57
57
|
- :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)
|
|
58
58
|
|
|
59
|
-
Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
|
|
60
|
-
This flag is used for an experimental feature we would like to adopt in future.
|
|
61
|
-
|
|
62
59
|
Run the processor on the workspace (creating output files in the filesystem).
|
|
63
60
|
|
|
64
61
|
Finally, write back the workspace (updating the METS in the filesystem).
|
|
65
62
|
|
|
63
|
+
If :py:attr:`instance_caching` is True, then processor instances (for the same set
|
|
64
|
+
of :py:attr:`parameter` values) will be cached internally. Thus, these objects (and
|
|
65
|
+
all their memory resources, like loaded models) get re-used instead of re-instantiated
|
|
66
|
+
when a match occurs - as long as the program is being run. They only get deleted (and
|
|
67
|
+
their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
|
|
68
|
+
instances have already been cached while this particular parameter set was re-used
|
|
69
|
+
least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
|
|
70
|
+
:py:class:`~ocrd_network.ProcessorServer` for use-cases.)
|
|
71
|
+
|
|
66
72
|
Args:
|
|
67
73
|
processorClass (object): Python class of the module processor.
|
|
68
74
|
"""
|
ocrd/workspace.py
CHANGED
|
@@ -798,7 +798,6 @@ class Workspace():
|
|
|
798
798
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' +
|
|
799
799
|
'filter="%s" in page "%s"' % (
|
|
800
800
|
feature_filter, page_id))
|
|
801
|
-
page_image.format = 'PNG' # workaround for tesserocr#194
|
|
802
801
|
# ensure DPI will be set in image meta-data again
|
|
803
802
|
if 'DPI' in page_coords:
|
|
804
803
|
dpi = page_coords['DPI']
|
|
@@ -1060,7 +1059,6 @@ class Workspace():
|
|
|
1060
1059
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' +
|
|
1061
1060
|
'filter="%s" in segment "%s"' % (
|
|
1062
1061
|
feature_filter, segment.id))
|
|
1063
|
-
segment_image.format = 'PNG' # workaround for tesserocr#194
|
|
1064
1062
|
# ensure DPI will be set in image meta-data again
|
|
1065
1063
|
if 'DPI' in segment_coords:
|
|
1066
1064
|
dpi = segment_coords['DPI']
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: ocrd
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.3
|
|
4
4
|
Summary: OCR-D framework
|
|
5
5
|
Author-email: Konstantin Baierer <unixprog@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -24,13 +24,15 @@ Requires-Dist: Flask
|
|
|
24
24
|
Requires-Dist: frozendict>=2.3.4
|
|
25
25
|
Requires-Dist: gdown
|
|
26
26
|
Requires-Dist: httpx>=0.22.0
|
|
27
|
+
Requires-Dist: importlib_metadata; python_version < "3.8"
|
|
28
|
+
Requires-Dist: importlib_resources; python_version < "3.10"
|
|
27
29
|
Requires-Dist: jsonschema>=4
|
|
28
30
|
Requires-Dist: loky
|
|
29
31
|
Requires-Dist: lxml
|
|
30
32
|
Requires-Dist: memory-profiler>=0.58.0
|
|
31
33
|
Requires-Dist: numpy
|
|
32
34
|
Requires-Dist: ocrd-fork-bagit>=1.8.1.post2
|
|
33
|
-
Requires-Dist: ocrd-fork-
|
|
35
|
+
Requires-Dist: ocrd-fork-bagit_profile>=1.3.0.post1
|
|
34
36
|
Requires-Dist: opencv-python-headless
|
|
35
37
|
Requires-Dist: paramiko
|
|
36
38
|
Requires-Dist: pika>=1.2.0
|
|
@@ -40,12 +42,10 @@ Requires-Dist: python-magic
|
|
|
40
42
|
Requires-Dist: python-multipart
|
|
41
43
|
Requires-Dist: pyyaml
|
|
42
44
|
Requires-Dist: requests
|
|
43
|
-
Requires-Dist:
|
|
45
|
+
Requires-Dist: requests_unixsocket2
|
|
44
46
|
Requires-Dist: shapely
|
|
45
47
|
Requires-Dist: uvicorn
|
|
46
48
|
Requires-Dist: uvicorn>=0.17.6
|
|
47
|
-
Requires-Dist: importlib-resources; python_version < "3.10"
|
|
48
|
-
Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
49
49
|
|
|
50
50
|
# OCR-D/core
|
|
51
51
|
|
|
@@ -7,7 +7,7 @@ ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
|
|
|
7
7
|
ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
|
|
8
8
|
ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
|
|
9
9
|
ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
|
|
10
|
-
ocrd/workspace.py,sha256
|
|
10
|
+
ocrd/workspace.py,sha256=-j3X83K0f4vtd5jwfu6_R53RJ2R8gt1HYpyrv8YP2bg,65661
|
|
11
11
|
ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
|
|
12
12
|
ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
|
|
13
13
|
ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
|
|
@@ -23,11 +23,11 @@ ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
|
|
|
23
23
|
ocrd/decorators/__init__.py,sha256=PyXX7vxdWkRHixas9dWUtyO3YLczcly8ZEpfZDSMVp8,7639
|
|
24
24
|
ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
|
|
25
25
|
ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
|
|
26
|
-
ocrd/decorators/ocrd_cli_options.py,sha256=
|
|
26
|
+
ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
|
|
27
27
|
ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
|
|
28
28
|
ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
|
|
29
|
-
ocrd/processor/base.py,sha256=
|
|
30
|
-
ocrd/processor/helpers.py,sha256=
|
|
29
|
+
ocrd/processor/base.py,sha256=wAu3d5E2Vt-rWBejzgZOOnxuI0njgShuLfRZPxyuUGk,59731
|
|
30
|
+
ocrd/processor/helpers.py,sha256=rk7OYucvUgS0fTsxNpYaiPmuyD2l0hVg9uSXb4Jdgow,10975
|
|
31
31
|
ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
|
|
32
32
|
ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
|
|
@@ -64,7 +64,7 @@ ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,
|
|
|
64
64
|
ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
|
|
65
65
|
ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
|
|
66
66
|
ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
|
|
67
|
-
ocrd_network/cli/client.py,sha256=
|
|
67
|
+
ocrd_network/cli/client.py,sha256=gFEXjz-d074CpvimqaM4kJRbJVNYRAOK-jsUl2EAUVs,8424
|
|
68
68
|
ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
|
|
69
69
|
ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
|
|
70
70
|
ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
|
|
@@ -89,7 +89,7 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
|
|
|
89
89
|
ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
|
|
90
90
|
ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
|
|
91
91
|
ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
|
|
92
|
-
ocrd_utils/config.py,sha256=
|
|
92
|
+
ocrd_utils/config.py,sha256=1_8j4kpKK5gxhFKObbBEzmq4JazTtKEkGe14Ch_tpw0,11796
|
|
93
93
|
ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
|
|
94
94
|
ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
|
|
95
95
|
ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
|
|
@@ -120,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
|
|
|
120
120
|
ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
|
|
121
121
|
ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
|
|
122
122
|
ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
|
|
123
|
-
ocrd-3.0.
|
|
124
|
-
ocrd-3.0.
|
|
125
|
-
ocrd-3.0.
|
|
126
|
-
ocrd-3.0.
|
|
127
|
-
ocrd-3.0.
|
|
128
|
-
ocrd-3.0.
|
|
123
|
+
ocrd-3.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
124
|
+
ocrd-3.0.3.dist-info/METADATA,sha256=gZiKn6AO1tPL9NAfVsX0f3UTUZipDtEUOIXmMLXr76k,10442
|
|
125
|
+
ocrd-3.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
126
|
+
ocrd-3.0.3.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
|
|
127
|
+
ocrd-3.0.3.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
|
|
128
|
+
ocrd-3.0.3.dist-info/RECORD,,
|
ocrd_network/cli/client.py
CHANGED
|
@@ -110,8 +110,8 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
|
|
|
110
110
|
@click.option('-p', '--print-state', default=False, is_flag=True,
|
|
111
111
|
help='If set, the client will print job states by each iteration.')
|
|
112
112
|
def send_processing_job_request(
|
|
113
|
-
address: Optional[str],
|
|
114
113
|
processor_name: str,
|
|
114
|
+
address: Optional[str],
|
|
115
115
|
mets: str,
|
|
116
116
|
input_file_grp: str,
|
|
117
117
|
output_file_grp: Optional[str],
|
ocrd_utils/config.py
CHANGED
|
@@ -142,7 +142,7 @@ config.add('OCRD_MAX_PROCESSOR_CACHE',
|
|
|
142
142
|
default=(True, 128))
|
|
143
143
|
|
|
144
144
|
config.add('OCRD_MAX_PARALLEL_PAGES',
|
|
145
|
-
description="Maximum number of processor
|
|
145
|
+
description="Maximum number of processor workers for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set >1, then a METS Server must be used for METS synchronisation.",
|
|
146
146
|
parser=int,
|
|
147
147
|
default=(True, 1))
|
|
148
148
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|