ocrd 3.0.2__py3-none-any.whl → 3.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/mets_server.py CHANGED
@@ -474,7 +474,7 @@ class OcrdMetsServer:
474
474
  """
475
475
  workspace.save_mets()
476
476
  response = Response(content="The Mets Server is writing changes to disk.", media_type='text/plain')
477
- self.log.info(f"PUT / -> {response.__dict__}")
477
+ self.log.debug(f"PUT / -> {response.__dict__}")
478
478
  return response
479
479
 
480
480
  @app.delete(path='/')
@@ -485,7 +485,7 @@ class OcrdMetsServer:
485
485
  workspace.save_mets()
486
486
  response = Response(content="The Mets Server will shut down soon...", media_type='text/plain')
487
487
  self.shutdown()
488
- self.log.info(f"DELETE / -> {response.__dict__}")
488
+ self.log.debug(f"DELETE / -> {response.__dict__}")
489
489
  return response
490
490
 
491
491
  @app.post(path='/reload')
@@ -495,25 +495,25 @@ class OcrdMetsServer:
495
495
  """
496
496
  workspace.reload_mets()
497
497
  response = Response(content=f"Reloaded from {workspace.directory}", media_type='text/plain')
498
- self.log.info(f"POST /reload -> {response.__dict__}")
498
+ self.log.debug(f"POST /reload -> {response.__dict__}")
499
499
  return response
500
500
 
501
501
  @app.get(path='/unique_identifier', response_model=str)
502
502
  async def unique_identifier():
503
503
  response = Response(content=workspace.mets.unique_identifier, media_type='text/plain')
504
- self.log.info(f"GET /unique_identifier -> {response.__dict__}")
504
+ self.log.debug(f"GET /unique_identifier -> {response.__dict__}")
505
505
  return response
506
506
 
507
507
  @app.get(path='/workspace_path', response_model=str)
508
508
  async def workspace_path():
509
509
  response = Response(content=workspace.directory, media_type="text/plain")
510
- self.log.info(f"GET /workspace_path -> {response.__dict__}")
510
+ self.log.debug(f"GET /workspace_path -> {response.__dict__}")
511
511
  return response
512
512
 
513
513
  @app.get(path='/physical_pages', response_model=OcrdPageListModel)
514
514
  async def physical_pages():
515
515
  response = {'physical_pages': workspace.mets.physical_pages}
516
- self.log.info(f"GET /physical_pages -> {response}")
516
+ self.log.debug(f"GET /physical_pages -> {response}")
517
517
  return response
518
518
 
519
519
  @app.get(path='/physical_pages', response_model=OcrdPageListModel)
@@ -523,13 +523,13 @@ class OcrdMetsServer:
523
523
  @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
524
524
  async def file_groups():
525
525
  response = {'file_groups': workspace.mets.file_groups}
526
- self.log.info(f"GET /file_groups -> {response}")
526
+ self.log.debug(f"GET /file_groups -> {response}")
527
527
  return response
528
528
 
529
529
  @app.get(path='/agent', response_model=OcrdAgentListModel)
530
530
  async def agents():
531
531
  response = OcrdAgentListModel.create(workspace.mets.agents)
532
- self.log.info(f"GET /agent -> {response.__dict__}")
532
+ self.log.debug(f"GET /agent -> {response.__dict__}")
533
533
  return response
534
534
 
535
535
  @app.post(path='/agent', response_model=OcrdAgentModel)
@@ -538,7 +538,7 @@ class OcrdMetsServer:
538
538
  kwargs['_type'] = kwargs.pop('type')
539
539
  workspace.mets.add_agent(**kwargs)
540
540
  response = agent
541
- self.log.info(f"POST /agent -> {response.__dict__}")
541
+ self.log.debug(f"POST /agent -> {response.__dict__}")
542
542
  return response
543
543
 
544
544
  @app.get(path="/file", response_model=OcrdFileListModel)
@@ -557,14 +557,14 @@ class OcrdMetsServer:
557
557
  fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype, local_filename=local_filename, url=url
558
558
  )
559
559
  response = OcrdFileListModel.create(found)
560
- self.log.info(f"GET /file -> {response.__dict__}")
560
+ self.log.debug(f"GET /file -> {response.__dict__}")
561
561
  return response
562
562
 
563
563
  @app.post(path='/file', response_model=OcrdFileModel)
564
564
  async def add_file(
565
565
  file_grp: str = Form(),
566
566
  file_id: str = Form(),
567
- page_id: Optional[str] = Form(),
567
+ page_id: Optional[str] = Form(None),
568
568
  mimetype: str = Form(),
569
569
  url: Optional[str] = Form(None),
570
570
  local_filename: Optional[str] = Form(None),
@@ -582,7 +582,7 @@ class OcrdMetsServer:
582
582
  kwargs = file_resource.dict()
583
583
  workspace.add_file(**kwargs, force=force)
584
584
  response = file_resource
585
- self.log.info(f"POST /file -> {response.__dict__}")
585
+ self.log.debug(f"POST /file -> {response.__dict__}")
586
586
  return response
587
587
 
588
588
  # ------------- #
ocrd/processor/base.py CHANGED
@@ -534,15 +534,20 @@ class Processor():
534
534
  # forward messages from log queue (in subprocesses) to all root handlers
535
535
  log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
536
536
  log_listener.start()
537
+ tasks = None
537
538
  try:
538
539
  self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
539
540
  tasks = self.process_workspace_submit_tasks(executor, max_seconds)
540
541
  stats = self.process_workspace_handle_tasks(tasks)
541
542
  finally:
542
543
  executor.shutdown(kill_workers=True, wait=False)
544
+ self._base_logger.debug("stopped executor %s after %d tasks", str(executor), len(tasks) if tasks else -1)
543
545
  if max_workers > 1:
544
- log_listener.stop()
545
- del log_listener
546
+ # can cause deadlock:
547
+ #log_listener.stop()
548
+ # not much better:
549
+ #log_listener.enqueue_sentinel()
550
+ pass
546
551
 
547
552
  except NotImplementedError:
548
553
  # fall back to deprecated method
@@ -670,11 +675,12 @@ class Processor():
670
675
  nr_succeeded += 1
671
676
  # else skipped - already exists
672
677
  nr_errors = dict(nr_errors)
678
+ nr_all = nr_succeeded + nr_failed
673
679
  if nr_failed > 0:
674
- nr_all = nr_succeeded + nr_failed
675
680
  if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
676
681
  raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
677
682
  self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
683
+ self._base_logger.debug("succeeded %d, missed %d of %d pages due to %s", nr_succeeded, nr_failed, nr_all, str(nr_errors))
678
684
  return nr_succeeded, nr_failed, nr_errors, len(tasks)
679
685
 
680
686
  def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
ocrd/processor/helpers.py CHANGED
@@ -56,13 +56,19 @@ def run_processor(
56
56
  - :py:attr:`output_file_grp`
57
57
  - :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings)
58
58
 
59
- Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects.
60
- This flag is used for an experimental feature we would like to adopt in future.
61
-
62
59
  Run the processor on the workspace (creating output files in the filesystem).
63
60
 
64
61
  Finally, write back the workspace (updating the METS in the filesystem).
65
62
 
63
+ If :py:attr:`instance_caching` is True, then processor instances (for the same set
64
+ of :py:attr:`parameter` values) will be cached internally. Thus, these objects (and
65
+ all their memory resources, like loaded models) get re-used instead of re-instantiated
66
+ when a match occurs - as long as the program is being run. They only get deleted (and
67
+ their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
68
+ instances have already been cached while this particular parameter set was re-used
69
+ least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
70
+ :py:class:`~ocrd_network.ProcessorServer` for use-cases.)
71
+
66
72
  Args:
67
73
  processorClass (object): Python class of the module processor.
68
74
  """
@@ -137,7 +143,7 @@ def run_processor(
137
143
  t1_cpu,
138
144
  processor.input_file_grp or '',
139
145
  processor.output_file_grp or '',
140
- json.dumps(processor.parameter) or '',
146
+ json.dumps(dict(processor.parameter or {})),
141
147
  processor.page_id or ''
142
148
  )
143
149
  workspace.mets.add_agent(
@@ -148,7 +154,7 @@ def run_processor(
148
154
  otherrole=otherrole,
149
155
  notes=[({'option': 'input-file-grp'}, processor.input_file_grp or ''),
150
156
  ({'option': 'output-file-grp'}, processor.output_file_grp or ''),
151
- ({'option': 'parameter'}, json.dumps(processor.parameter or '')),
157
+ ({'option': 'parameter'}, json.dumps(dict(processor.parameter or {}))),
152
158
  ({'option': 'page-id'}, processor.page_id or '')]
153
159
  )
154
160
  workspace.save_mets()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -24,15 +24,13 @@ Requires-Dist: Flask
24
24
  Requires-Dist: frozendict>=2.3.4
25
25
  Requires-Dist: gdown
26
26
  Requires-Dist: httpx>=0.22.0
27
- Requires-Dist: importlib_metadata; python_version < "3.8"
28
- Requires-Dist: importlib_resources; python_version < "3.10"
29
27
  Requires-Dist: jsonschema>=4
30
28
  Requires-Dist: loky
31
29
  Requires-Dist: lxml
32
30
  Requires-Dist: memory-profiler>=0.58.0
33
31
  Requires-Dist: numpy
34
32
  Requires-Dist: ocrd-fork-bagit>=1.8.1.post2
35
- Requires-Dist: ocrd-fork-bagit_profile>=1.3.0.post1
33
+ Requires-Dist: ocrd-fork-bagit-profile>=1.3.0.post1
36
34
  Requires-Dist: opencv-python-headless
37
35
  Requires-Dist: paramiko
38
36
  Requires-Dist: pika>=1.2.0
@@ -42,10 +40,12 @@ Requires-Dist: python-magic
42
40
  Requires-Dist: python-multipart
43
41
  Requires-Dist: pyyaml
44
42
  Requires-Dist: requests
45
- Requires-Dist: requests_unixsocket2
43
+ Requires-Dist: requests-unixsocket2
46
44
  Requires-Dist: shapely
47
45
  Requires-Dist: uvicorn
48
46
  Requires-Dist: uvicorn>=0.17.6
47
+ Requires-Dist: importlib-resources; python_version < "3.10"
48
+ Requires-Dist: importlib-metadata; python_version < "3.8"
49
49
 
50
50
  # OCR-D/core
51
51
 
@@ -1,7 +1,7 @@
1
1
  ocrd/__init__.py,sha256=ZswMVmlqFhAEIzMR3my6IKPq9XLH21aDPC_m_8Jh4dA,1076
2
2
  ocrd/constants.py,sha256=6dn3mG54WqHsKInmLZp4kJjNqqPtBoFoSuLUuRbOps0,740
3
3
  ocrd/lib.bash,sha256=le6XqAOEacdjP3JNSlPkxwRH1y0oVjNQM2tX5d6QFO4,10901
4
- ocrd/mets_server.py,sha256=wiOvSSYZMicjQC958dD2i7uHMwfZCaDZQd7E5l50ayU,22436
4
+ ocrd/mets_server.py,sha256=EL6CMtVjmVPs9--3Vn-JyhU2VDMNw-6QN_XWRKFp6uk,22451
5
5
  ocrd/ocrd-all-tool.json,sha256=9bX2VYnUwhTAzAvKaoT77BFzbgBGgyIt7qBqARpwWNc,586
6
6
  ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
@@ -26,8 +26,8 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=L396ON0p77D8r34CESJ2SIYspTZ6mYFqaBq6DYg6ywM,59315
30
- ocrd/processor/helpers.py,sha256=8ngrqAJ01BSoSJNsIoK_YfA8QdryM5y0MqaqA9f7ELM,10483
29
+ ocrd/processor/base.py,sha256=wAu3d5E2Vt-rWBejzgZOOnxuI0njgShuLfRZPxyuUGk,59731
30
+ ocrd/processor/helpers.py,sha256=gIc6PdvOS1sR0UkYlrdZopImAXxXglDBNpgNZGWHO7Y,10987
31
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  ocrd/processor/builtin/dummy_processor.py,sha256=a-4kKJ1JeXQuBIyyN8w2R3s7ov-wAfyEdEz3nxrf0sU,3479
@@ -43,7 +43,7 @@ ocrd_models/ocrd_exif.py,sha256=wRSprHxCy9LCXw41Fi9kp-CbFc5NFX9ZFIFNszB41qk,4585
43
43
  ocrd_models/ocrd_file.py,sha256=7lyHezuNnl2FEYV1lV35-QTCrgYAL-3wO2ulFUNq2Ak,9717
44
44
  ocrd_models/ocrd_mets.py,sha256=cRBEnvRNmRUhcrKouqygTQqYA-XdgyA-FNLwt86V0vg,42878
45
45
  ocrd_models/ocrd_page.py,sha256=TTCnvpKGyZx1dqH8LnDiVVVPjU6emWGVLO_4o9rQHtw,6233
46
- ocrd_models/ocrd_page_generateds.py,sha256=vjXPob5UMxYxPI6un8SYTL673Nhph0E2WEZexBgbvY4,841794
46
+ ocrd_models/ocrd_page_generateds.py,sha256=IWoN3V-v3C4JgyPaFh9OQC87ob__wUP1Q6ELBxhLA1w,841794
47
47
  ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,1641
48
48
  ocrd_models/report.py,sha256=luZxvzAAQyGYOlRNSJQUIUIANG81iGmBW5ag-uXxKCA,2026
49
49
  ocrd_models/utils.py,sha256=0_WHf5NEn1WC8MKJc6X_RK8gW-70Z09_mslkKOj7uF8,2369
@@ -120,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
120
120
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
121
121
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
122
122
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
123
- ocrd-3.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
- ocrd-3.0.2.dist-info/METADATA,sha256=e6sO3z4kPE8qe8Ne-aPLX5Tl03M_NcrORBnlAQodLDE,10442
125
- ocrd-3.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
126
- ocrd-3.0.2.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
- ocrd-3.0.2.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
- ocrd-3.0.2.dist-info/RECORD,,
123
+ ocrd-3.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
+ ocrd-3.0.4.dist-info/METADATA,sha256=BRwaS739FiiA7T2QsmeVSBSddJu_FyGyh7Jzxy2D1_g,10442
125
+ ocrd-3.0.4.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
126
+ ocrd-3.0.4.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
+ ocrd-3.0.4.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
+ ocrd-3.0.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -2,7 +2,7 @@
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
4
  #
5
- # Generated Sun Sep 15 21:49:27 2024 by generateDS.py version 2.44.1.
5
+ # Generated Mon Feb 17 10:32:54 2025 by generateDS.py version 2.44.1.
6
6
  # Python 3.8.17+ (heads/3.8-dirty:1663f8ba84, Aug 15 2023, 18:13:01) [GCC 8.3.0]
7
7
  #
8
8
  # Command line options:
@@ -7112,7 +7112,7 @@ class OrderedGroupIndexedType(GeneratedsSuper):
7112
7112
  else:
7113
7113
  cleaned.append(entry)
7114
7114
  for entry in cleaned:
7115
- entry.export(outfile, level, entry.ns_prefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
7115
+ entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
7116
7116
  # end class OrderedGroupIndexedType
7117
7117
 
7118
7118
 
@@ -8075,7 +8075,7 @@ class OrderedGroupType(GeneratedsSuper):
8075
8075
  else:
8076
8076
  cleaned.append(entry)
8077
8077
  for entry in cleaned:
8078
- entry.export(outfile, level, entry.ns_prefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
8078
+ entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print)
8079
8079
  # end class OrderedGroupType
8080
8080
 
8081
8081
 
File without changes