ocrd 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,16 +13,16 @@ from ocrd_network import (
13
13
 
14
14
  def ocrd_cli_options(f):
15
15
  """
16
- Implement MP CLI.
16
+ Implement Processor CLI.
17
17
 
18
18
  Usage::
19
19
 
20
- import ocrd_click_cli from ocrd.utils
20
+ from ocrd.decorators import ocrd_cli_options
21
21
 
22
22
  @click.command()
23
- @ocrd_click_cli
24
- def cli(mets_url):
25
- print(mets_url)
23
+ @ocrd_cli_options
24
+ def cli(**kwargs):
25
+ print(kwargs['mets_url'])
26
26
  """
27
27
  # XXX Note that the `--help` output is statically generate_processor_help
28
28
  params = [
ocrd/processor/base.py CHANGED
@@ -518,7 +518,7 @@ class Processor():
518
518
 
519
519
  if max_workers > 1:
520
520
  executor_cls = ProcessPoolExecutor
521
- log_queue = mp.Queue()
521
+ log_queue = mp.get_context('fork').Queue()
522
522
  else:
523
523
  executor_cls = DummyExecutor
524
524
  log_queue = None
@@ -534,15 +534,20 @@ class Processor():
534
534
  # forward messages from log queue (in subprocesses) to all root handlers
535
535
  log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
536
536
  log_listener.start()
537
+ tasks = None
537
538
  try:
538
539
  self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
539
540
  tasks = self.process_workspace_submit_tasks(executor, max_seconds)
540
541
  stats = self.process_workspace_handle_tasks(tasks)
541
542
  finally:
542
543
  executor.shutdown(kill_workers=True, wait=False)
544
+ self._base_logger.debug("stopped executor %s after %d tasks", str(executor), len(tasks) if tasks else -1)
543
545
  if max_workers > 1:
544
- log_listener.stop()
545
- del log_listener
546
+ # can cause deadlock:
547
+ #log_listener.stop()
548
+ # not much better:
549
+ #log_listener.enqueue_sentinel()
550
+ pass
546
551
 
547
552
  except NotImplementedError:
548
553
  # fall back to deprecated method
@@ -670,11 +675,12 @@ class Processor():
670
675
  nr_succeeded += 1
671
676
  # else skipped - already exists
672
677
  nr_errors = dict(nr_errors)
678
+ nr_all = nr_succeeded + nr_failed
673
679
  if nr_failed > 0:
674
- nr_all = nr_succeeded + nr_failed
675
680
  if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
676
681
  raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
677
682
  self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
683
+ self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", nr_succeeded, nr_failed, nr_all, str(nr_errors))
678
684
  return nr_succeeded, nr_failed, nr_errors, len(tasks)
679
685
 
680
686
  def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
ocrd/processor/helpers.py CHANGED
@@ -56,13 +56,19 @@ def run_processor(
56
56
  - :py:attr:`output_file_grp`
57
57
  - :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)
58
58
 
59
- Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
60
- This flag is used for an experimental feature we would like to adopt in future.
61
-
62
59
  Run the processor on the workspace (creating output files in the filesystem).
63
60
 
64
61
  Finally, write back the workspace (updating the METS in the filesystem).
65
62
 
63
+ If :py:attr:`instance_caching` is True, then processor instances (for the same set
64
+ of :py:attr:`parameter` values) will be cached internally. Thus, these objects (and
65
+ all their memory resources, like loaded models) get re-used instead of re-instantiated
66
+ when a match occurs - as long as the program is being run. They only get deleted (and
67
+ their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
68
+ instances have already been cached while this particular parameter set was re-used
69
+ least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
70
+ :py:class:`~ocrd_network.ProcessorServer` for use-cases.)
71
+
66
72
  Args:
67
73
  processorClass (object): Python class of the module processor.
68
74
  """
ocrd/workspace.py CHANGED
@@ -798,7 +798,6 @@ class Workspace():
798
798
  raise Exception('Found no AlternativeImage that satisfies all requirements ' +
799
799
  'filter="%s" in page "%s"' % (
800
800
  feature_filter, page_id))
801
- page_image.format = 'PNG' # workaround for tesserocr#194
802
801
  # ensure DPI will be set in image meta-data again
803
802
  if 'DPI' in page_coords:
804
803
  dpi = page_coords['DPI']
@@ -1060,7 +1059,6 @@ class Workspace():
1060
1059
  raise Exception('Found no AlternativeImage that satisfies all requirements ' +
1061
1060
  'filter="%s" in segment "%s"' % (
1062
1061
  feature_filter, segment.id))
1063
- segment_image.format = 'PNG' # workaround for tesserocr#194
1064
1062
  # ensure DPI will be set in image meta-data again
1065
1063
  if 'DPI' in segment_coords:
1066
1064
  dpi = segment_coords['DPI']
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: ocrd
3
- Version: 3.0.1
3
+ Version: 3.0.3
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -24,13 +24,15 @@ Requires-Dist: Flask
24
24
  Requires-Dist: frozendict>=2.3.4
25
25
  Requires-Dist: gdown
26
26
  Requires-Dist: httpx>=0.22.0
27
+ Requires-Dist: importlib_metadata; python_version < "3.8"
28
+ Requires-Dist: importlib_resources; python_version < "3.10"
27
29
  Requires-Dist: jsonschema>=4
28
30
  Requires-Dist: loky
29
31
  Requires-Dist: lxml
30
32
  Requires-Dist: memory-profiler>=0.58.0
31
33
  Requires-Dist: numpy
32
34
  Requires-Dist: ocrd-fork-bagit>=1.8.1.post2
33
- Requires-Dist: ocrd-fork-bagit-profile>=1.3.0.post1
35
+ Requires-Dist: ocrd-fork-bagit_profile>=1.3.0.post1
34
36
  Requires-Dist: opencv-python-headless
35
37
  Requires-Dist: paramiko
36
38
  Requires-Dist: pika>=1.2.0
@@ -40,12 +42,10 @@ Requires-Dist: python-magic
40
42
  Requires-Dist: python-multipart
41
43
  Requires-Dist: pyyaml
42
44
  Requires-Dist: requests
43
- Requires-Dist: requests-unixsocket2
45
+ Requires-Dist: requests_unixsocket2
44
46
  Requires-Dist: shapely
45
47
  Requires-Dist: uvicorn
46
48
  Requires-Dist: uvicorn>=0.17.6
47
- Requires-Dist: importlib-resources; python_version < "3.10"
48
- Requires-Dist: importlib-metadata; python_version < "3.8"
49
49
 
50
50
  # OCR-D/core
51
51
 
@@ -7,7 +7,7 @@ ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
8
  ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
- ocrd/workspace.py,sha256=cedqK7es2i2nwQCiUiVyWk3j4-nH7bsi6TF7v8siTio,65794
10
+ ocrd/workspace.py,sha256=-j3X83K0f4vtd5jwfu6_R53RJ2R8gt1HYpyrv8YP2bg,65661
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
12
  ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
13
  ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
@@ -23,11 +23,11 @@ ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
23
  ocrd/decorators/__init__.py,sha256=PyXX7vxdWkRHixas9dWUtyO3YLczcly8ZEpfZDSMVp8,7639
24
24
  ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
25
25
  ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
26
- ocrd/decorators/ocrd_cli_options.py,sha256=hr2EugwAY_-GJ7F7g77Od9o9eAqhfLBHSpfmCql2OCU,2665
26
+ ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=rVTQeUpZR_rBDh44Q7Xtl5TGcEdyBhMTDfpAgx4eLPg,59295
30
- ocrd/processor/helpers.py,sha256=8ngrqAJ01BSoSJNsIoK_YfA8QdryM5y0MqaqA9f7ELM,10483
29
+ ocrd/processor/base.py,sha256=wAu3d5E2Vt-rWBejzgZOOnxuI0njgShuLfRZPxyuUGk,59731
30
+ ocrd/processor/helpers.py,sha256=rk7OYucvUgS0fTsxNpYaiPmuyD2l0hVg9uSXb4Jdgow,10975
31
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
@@ -64,7 +64,7 @@ ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,
64
64
  ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
65
65
  ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
66
66
  ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
67
- ocrd_network/cli/client.py,sha256=XYWbeSiPK4BQXuyTq_FTOXEKljXVLkukWfx07aKbthY,8424
67
+ ocrd_network/cli/client.py,sha256=gFEXjz-d074CpvimqaM4kJRbJVNYRAOK-jsUl2EAUVs,8424
68
68
  ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
69
69
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
70
70
  ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
@@ -89,7 +89,7 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
89
89
  ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
90
90
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
91
91
  ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
92
- ocrd_utils/config.py,sha256=smjUAGK5n0iKZCs4dZAtrZurelcaavlFqhIUJWNMOi0,11796
92
+ ocrd_utils/config.py,sha256=1_8j4kpKK5gxhFKObbBEzmq4JazTtKEkGe14Ch_tpw0,11796
93
93
  ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
94
94
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
95
95
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
@@ -120,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
120
120
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
121
121
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
122
122
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
123
- ocrd-3.0.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
- ocrd-3.0.1.dist-info/METADATA,sha256=y6mdBSjKmkTMSJ7F1LAvjCWeagTBRAeiYaka9_Z2Djc,10442
125
- ocrd-3.0.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
126
- ocrd-3.0.1.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
- ocrd-3.0.1.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
- ocrd-3.0.1.dist-info/RECORD,,
123
+ ocrd-3.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
+ ocrd-3.0.3.dist-info/METADATA,sha256=gZiKn6AO1tPL9NAfVsX0f3UTUZipDtEUOIXmMLXr76k,10442
125
+ ocrd-3.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
126
+ ocrd-3.0.3.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
+ ocrd-3.0.3.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
+ ocrd-3.0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -110,8 +110,8 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
110
110
  @click.option('-p', '--print-state', default=False, is_flag=True,
111
111
  help='If set, the client will print job states by each iteration.')
112
112
  def send_processing_job_request(
113
- address: Optional[str],
114
113
  processor_name: str,
114
+ address: Optional[str],
115
115
  mets: str,
116
116
  input_file_grp: str,
117
117
  output_file_grp: Optional[str],
ocrd_utils/config.py CHANGED
@@ -142,7 +142,7 @@ config.add('OCRD_MAX_PROCESSOR_CACHE',
142
142
  default=(True, 128))
143
143
 
144
144
  config.add('OCRD_MAX_PARALLEL_PAGES',
145
- description="Maximum number of processor threads for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set >1, then a METS Server must be used for METS synchronisation.",
145
+ description="Maximum number of processor workers for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set >1, then a METS Server must be used for METS synchronisation.",
146
146
  parser=int,
147
147
  default=(True, 1))
148
148
 
File without changes