ocrd 3.8.0__tar.gz → 3.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. {ocrd-3.8.0/src/ocrd.egg-info → ocrd-3.9.0}/PKG-INFO +1 -1
  2. ocrd-3.9.0/VERSION +1 -0
  3. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/ocrd-all-tool.json +25 -0
  4. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/base.py +51 -43
  5. ocrd-3.9.0/src/ocrd/processor/ocrd_page_result.py +93 -0
  6. {ocrd-3.8.0 → ocrd-3.9.0/src/ocrd.egg-info}/PKG-INFO +1 -1
  7. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_page.py +20 -0
  8. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_page_generateds.py +1273 -69
  9. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/cli/client.py +1 -1
  10. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/client.py +4 -0
  11. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/client_utils.py +9 -2
  12. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/constants.py +1 -1
  13. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/helpers.py +1 -1
  14. ocrd-3.8.0/VERSION +0 -1
  15. ocrd-3.8.0/src/ocrd/processor/ocrd_page_result.py +0 -19
  16. {ocrd-3.8.0 → ocrd-3.9.0}/LICENSE +0 -0
  17. {ocrd-3.8.0 → ocrd-3.9.0}/MANIFEST.in +0 -0
  18. {ocrd-3.8.0 → ocrd-3.9.0}/README.md +0 -0
  19. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd.md +0 -0
  20. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd_modelfactory.md +0 -0
  21. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd_models.md +0 -0
  22. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd_network.md +0 -0
  23. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd_utils.md +0 -0
  24. {ocrd-3.8.0 → ocrd-3.9.0}/README_ocrd_validators.md +0 -0
  25. {ocrd-3.8.0 → ocrd-3.9.0}/pyproject.toml +0 -0
  26. {ocrd-3.8.0 → ocrd-3.9.0}/requirements.txt +0 -0
  27. {ocrd-3.8.0 → ocrd-3.9.0}/setup.cfg +0 -0
  28. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/__init__.py +0 -0
  29. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/__init__.py +0 -0
  30. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/bashlib.py +0 -0
  31. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/network.py +0 -0
  32. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/ocrd_tool.py +0 -0
  33. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/process.py +0 -0
  34. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/resmgr.py +0 -0
  35. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/validate.py +0 -0
  36. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/workspace.py +0 -0
  37. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/cli/zip.py +0 -0
  38. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/constants.py +0 -0
  39. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/decorators/__init__.py +0 -0
  40. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/decorators/loglevel_option.py +0 -0
  41. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/decorators/mets_find_options.py +0 -0
  42. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
  43. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/decorators/parameter_option.py +0 -0
  44. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/mets_server.py +0 -0
  45. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/__init__.py +0 -0
  46. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/__init__.py +0 -0
  47. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
  48. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
  49. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
  50. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/filter_processor.py +0 -0
  51. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/merge_processor.py +0 -0
  52. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/param_command_header2unordered.json +0 -0
  53. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/param_command_heading2unordered.json +0 -0
  54. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/param_command_lines2orientation.json +0 -0
  55. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/param_command_page-update-version.json +0 -0
  56. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/param_command_transkribus-to-prima.json +0 -0
  57. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/builtin/shell_processor.py +0 -0
  58. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/processor/helpers.py +0 -0
  59. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/resolver.py +0 -0
  60. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/resource_manager.py +0 -0
  61. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/task_sequence.py +0 -0
  62. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/workspace.py +0 -0
  63. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/workspace_backup.py +0 -0
  64. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd/workspace_bagger.py +0 -0
  65. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd.egg-info/SOURCES.txt +0 -0
  66. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd.egg-info/dependency_links.txt +0 -0
  67. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd.egg-info/entry_points.txt +0 -0
  68. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd.egg-info/requires.txt +0 -0
  69. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd.egg-info/top_level.txt +0 -0
  70. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_modelfactory/__init__.py +0 -0
  71. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/__init__.py +0 -0
  72. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/constants.py +0 -0
  73. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/mets-empty.xml +0 -0
  74. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_agent.py +0 -0
  75. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_exif.py +0 -0
  76. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_file.py +0 -0
  77. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_mets.py +0 -0
  78. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/ocrd_xml_base.py +0 -0
  79. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/report.py +0 -0
  80. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/utils.py +0 -0
  81. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_models/xpath_functions.py +0 -0
  82. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/__init__.py +0 -0
  83. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/cli/__init__.py +0 -0
  84. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/cli/processing_server.py +0 -0
  85. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/cli/processing_worker.py +0 -0
  86. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/cli/resmgr_server.py +0 -0
  87. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/database.py +0 -0
  88. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/logging_utils.py +0 -0
  89. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/models/__init__.py +0 -0
  90. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/models/job.py +0 -0
  91. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/models/messages.py +0 -0
  92. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/models/workflow.py +0 -0
  93. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/models/workspace.py +0 -0
  94. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/param_validators.py +0 -0
  95. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/process_helpers.py +0 -0
  96. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/processing_server.py +0 -0
  97. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/processing_worker.py +0 -0
  98. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
  99. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
  100. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
  101. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
  102. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
  103. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
  104. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/resource_manager_server.py +0 -0
  105. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/__init__.py +0 -0
  106. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/config_parser.py +0 -0
  107. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
  108. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/deployer.py +0 -0
  109. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/hosts.py +0 -0
  110. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/network_agents.py +0 -0
  111. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/runtime_data/network_services.py +0 -0
  112. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/server_cache.py +0 -0
  113. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/server_utils.py +0 -0
  114. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
  115. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_network/utils.py +0 -0
  116. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/__init__.py +0 -0
  117. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/config.py +0 -0
  118. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/constants.py +0 -0
  119. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/deprecate.py +0 -0
  120. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/image.py +0 -0
  121. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/introspect.py +0 -0
  122. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/logging.py +0 -0
  123. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/ocrd_logging.conf +0 -0
  124. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/os.py +0 -0
  125. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_utils/str.py +0 -0
  126. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/__init__.py +0 -0
  127. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/bagit-profile.yml +0 -0
  128. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/constants.py +0 -0
  129. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/json_validator.py +0 -0
  130. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/message_processing.schema.yml +0 -0
  131. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/message_result.schema.yml +0 -0
  132. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/mets.xsd +0 -0
  133. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
  134. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
  135. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
  136. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
  137. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/page.xsd +0 -0
  138. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/page_validator.py +0 -0
  139. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/parameter_validator.py +0 -0
  140. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
  141. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/processing_server_config_validator.py +0 -0
  142. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/resource_list_validator.py +0 -0
  143. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/workspace_validator.py +0 -0
  144. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/xlink.xsd +0 -0
  145. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/xsd_mets_validator.py +0 -0
  146. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/xsd_page_validator.py +0 -0
  147. {ocrd-3.8.0 → ocrd-3.9.0}/src/ocrd_validators/xsd_validator.py +0 -0
  148. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_decorators.py +0 -0
  149. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_logging.py +0 -0
  150. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_logging_conf.py +0 -0
  151. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_mets_server.py +0 -0
  152. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_model_factory.py +0 -0
  153. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_resolver.py +0 -0
  154. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_resolver_oai.py +0 -0
  155. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_resource_manager.py +0 -0
  156. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_task_sequence.py +0 -0
  157. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_utils.py +0 -0
  158. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_version.py +0 -0
  159. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_workspace.py +0 -0
  160. {ocrd-3.8.0 → ocrd-3.9.0}/tests/test_workspace_remove.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.8.0
3
+ Version: 3.9.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
ocrd-3.9.0/VERSION ADDED
@@ -0,0 +1 @@
1
+ 3.9.0
@@ -41,5 +41,30 @@
41
41
  "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
42
42
  }
43
43
  }
44
+ },
45
+ "ocrd-command": {
46
+ "executable": "ocrd-command",
47
+ "description": "Bare-bones processor runs shell commands to process PAGE files",
48
+ "steps": ["recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/analysis"],
49
+ "categories": [],
50
+ "input_file_grp_cardinality": [1, -1],
51
+ "output_file_grp_cardinality": 1,
52
+ "parameters": {
53
+ "command": {
54
+ "type": "string",
55
+ "default": "cat @INFILE > @OUTFILE",
56
+ "description": "Shell command to operate on PAGE files, with @INFILE as place-holder for the input file path(s), and @OUTFILE as place-holder for the output file path. If running on multiple input fileGrps, then @INFILE must be repeated as many times."
57
+ }
58
+ }
59
+ },
60
+ "ocrd-merge": {
61
+ "executable": "ocrd-merge",
62
+ "description": "Bare-bones processor merges annotations from multiple fileGrps",
63
+ "steps": ["layout/segmentation"],
64
+ "categories": [],
65
+ "input_file_grp_cardinality": [1, -1],
66
+ "output_file_grp_cardinality": 1,
67
+ "parameters": {
68
+ }
44
69
  }
45
70
  }
@@ -824,51 +824,59 @@ class Processor():
824
824
  if not any(input_pcgts):
825
825
  self._base_logger.warning(f'skipping page {page_id}')
826
826
  return
827
- output_file_id = make_file_id(input_files[input_pos], self.output_file_grp)
828
- if input_files[input_pos].fileGrp == self.output_file_grp:
829
- # input=output fileGrp: re-use ID exactly
830
- output_file_id = input_files[input_pos].ID
831
- output_file = next(self.workspace.mets.find_files(ID=output_file_id), None)
832
- if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
833
- # short-cut avoiding useless computation:
834
- raise FileExistsError(
835
- f"A file with ID=={output_file_id} already exists {output_file} and neither force nor ignore are set"
836
- )
837
- result = self.process_page_pcgts(*input_pcgts, page_id=page_id)
838
- for image_result in result.images:
839
- image_file_id = f'{output_file_id}_{image_result.file_id_suffix}'
840
- image_file_path = join(self.output_file_grp, f'{image_file_id}.png')
841
- if isinstance(image_result.alternative_image, PageType):
842
- # special case: not an alternative image, but replacing the original image
843
- # (this is needed by certain processors when the original's coordinate system
844
- # cannot or must not be kept)
845
- image_result.alternative_image.set_imageFilename(image_file_path)
846
- image_result.alternative_image.set_imageWidth(image_result.pil.width)
847
- image_result.alternative_image.set_imageHeight(image_result.pil.height)
848
- elif isinstance(image_result.alternative_image, AlternativeImageType):
849
- image_result.alternative_image.set_filename(image_file_path)
850
- elif image_result.alternative_image is None:
851
- pass # do not reference in PAGE result
852
- else:
853
- raise ValueError(f"process_page_pcgts returned an OcrdPageResultImage of unknown type "
854
- f"{type(image_result.alternative_image)}")
855
- self.workspace.save_image_file(
856
- image_result.pil,
857
- image_file_id,
858
- self.output_file_grp,
827
+ output_file_grps = self.output_file_grp.split(',')
828
+ output_file_ids = [make_file_id(input_files[input_pos], output_file_grp)
829
+ if input_files[input_pos].fileGrp != output_file_grp else
830
+ # input=output fileGrp: re-use ID exactly
831
+ input_files[input_pos].ID
832
+ for output_file_grp in output_file_grps]
833
+ if config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
834
+ for output_file_id in output_file_ids:
835
+ if output_file := next(self.workspace.mets.find_files(ID=output_file_id), None):
836
+ # short-cut avoiding useless computation:
837
+ raise FileExistsError(
838
+ f"A file with ID=={output_file_id} already exists {output_file}"
839
+ " and OCRD_EXISTING_OUTPUT != OVERWRITE"
840
+ )
841
+ results = self.process_page_pcgts(*input_pcgts, page_id=page_id)
842
+ if len(results) > len(output_file_grps):
843
+ self._base_logger.error(f"processor returned {len(results) - len(output_file_grps)} "
844
+ f"more results than specified output fileGrps for page {page_id}")
845
+ for result, output_file_id, output_file_grp in zip(results, output_file_ids, output_file_grps):
846
+ for image_result in result.images:
847
+ image_file_id = f'{output_file_id}_{image_result.file_id_suffix}'
848
+ image_file_path = join(output_file_grp, f'{image_file_id}.png')
849
+ if isinstance(image_result.alternative_image, PageType):
850
+ # special case: not an alternative image, but replacing the original image
851
+ # (this is needed by certain processors when the original's coordinate system
852
+ # cannot or must not be kept, e.g. dewarping)
853
+ image_result.alternative_image.set_imageFilename(image_file_path)
854
+ image_result.alternative_image.set_imageWidth(image_result.pil.width)
855
+ image_result.alternative_image.set_imageHeight(image_result.pil.height)
856
+ elif isinstance(image_result.alternative_image, AlternativeImageType):
857
+ image_result.alternative_image.set_filename(image_file_path)
858
+ elif image_result.alternative_image is None:
859
+ pass # do not reference in PAGE result
860
+ else:
861
+ raise ValueError(f"process_page_pcgts returned an OcrdPageResultImage of unknown type "
862
+ f"{type(image_result.alternative_image)}")
863
+ self.workspace.save_image_file(
864
+ image_result.pil,
865
+ image_file_id,
866
+ output_file_grp,
867
+ page_id=page_id,
868
+ file_path=image_file_path,
869
+ )
870
+ result.pcgts.set_pcGtsId(output_file_id)
871
+ self.add_metadata(result.pcgts)
872
+ self.workspace.add_file(
873
+ file_id=output_file_id,
874
+ file_grp=output_file_grp,
859
875
  page_id=page_id,
860
- file_path=image_file_path,
876
+ local_filename=os.path.join(output_file_grp, output_file_id + '.xml'),
877
+ mimetype=MIMETYPE_PAGE,
878
+ content=to_xml(result.pcgts),
861
879
  )
862
- result.pcgts.set_pcGtsId(output_file_id)
863
- self.add_metadata(result.pcgts)
864
- self.workspace.add_file(
865
- file_id=output_file_id,
866
- file_grp=self.output_file_grp,
867
- page_id=page_id,
868
- local_filename=os.path.join(self.output_file_grp, output_file_id + '.xml'),
869
- mimetype=MIMETYPE_PAGE,
870
- content=to_xml(result.pcgts),
871
- )
872
880
 
873
881
  def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
874
882
  """
@@ -0,0 +1,93 @@
1
+ from dataclasses import dataclass, field
2
+ import copy
3
+ from typing import List, Union, Optional
4
+ from ocrd_models.ocrd_page import OcrdPage
5
+ from PIL.Image import Image
6
+
7
+ from ocrd_models.ocrd_page_generateds import AlternativeImageType, PageType
8
+
9
+
10
+ @dataclass
11
+ class OcrdPageResultImage():
12
+ """
13
+ Encapsulates a single ``AlternativeImage`` reference to be persisted
14
+ as image file to the :py:class:`ocrd.Workspace`.
15
+ """
16
+ pil: Image
17
+ """
18
+ image data to be saved
19
+ """
20
+ file_id_suffix: str
21
+ """
22
+ a suffix to append to the file name when saving
23
+ (something like ``.IMG`` according to OCR-D
24
+ conventions for PAGE-XML)
25
+ """
26
+ alternative_image: Optional[Union[AlternativeImageType, PageType]]
27
+ """
28
+ the ``AlternativeImage`` instance that references this image;
29
+ to be amended with the actual (final) ``@filename`` when saving
30
+
31
+ alternatively, can be a ``Page`` instance: in that case,
32
+ amend its ``@imageFilename`` (i.e. replace the original image
33
+ of the PAGE-XML)
34
+ """
35
+
36
+
37
+ @dataclass
38
+ class OcrdPageResult():
39
+ """
40
+ Encapsulates the return type of :py:func:`ocrd.Processor.process_page_pcgts`,
41
+ i.e. an instance of :py:class:`ocrd_models.ocrd_page.OcrdPage` and an
42
+ accompanying list of :py:class:`OcrdPageResultImage` that contain all
43
+ image files referenced via ``AlternativeImage`` to be persisted into the
44
+ :py:class:`ocrd.Workspace` along with the PAGE-XML itself.
45
+ """
46
+ pcgts: OcrdPage
47
+ images: List[OcrdPageResultImage] = field(default_factory=list)
48
+
49
+ class OcrdPageResultVariadicListWrapper():
50
+ """
51
+ Proxy object for :py:class:`ocrd.SingleOcrdPageResult` allowing
52
+ list semantics (i.e. multi-valued return from
53
+ :py:func:`ocrd.Processor.process_page_pcgts`) without changing
54
+ the API introduced in version 3.0.
55
+
56
+ Everything but list access will yield the old (singular valued)
57
+ semantics.
58
+ """
59
+ def __init__(
60
+ self,
61
+ pcgts: OcrdPage,
62
+ *args):
63
+ self._results = [SingleOcrdPageResult(pcgts)] + [
64
+ SingleOcrdPageResult(arg) for arg in args]
65
+
66
+ def __getitem__(self, key):
67
+ return self._results[key]
68
+
69
+ def __contains__(self, key):
70
+ return key in self._results
71
+
72
+ def __len__(self):
73
+ return len(self._results)
74
+
75
+ def __iter__(self):
76
+ return iter(self._results)
77
+
78
+ def __repr__(self):
79
+ return repr(self._results)
80
+
81
+ # allow copy() without infinite recursion
82
+ def __copy__(self):
83
+ return OcrdPageResultVariadicListWrapper(*copy.copy(self._results))
84
+
85
+ # allow deepcopy() without infinite recursion
86
+ def __deepcopy__(self, memo):
87
+ return OcrdPageResultVariadicListWrapper(*copy.deepcopy(self._results))
88
+
89
+ # delegate to all members of first result
90
+ def __getattr__(self, name):
91
+ return getattr(self._results[0], name)
92
+
93
+ SingleOcrdPageResult, OcrdPageResult = OcrdPageResult, OcrdPageResultVariadicListWrapper
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.8.0
3
+ Version: 3.9.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -2,6 +2,7 @@
2
2
  API to PAGE-XML, generated with generateDS from XML schema.
3
3
  """
4
4
  from io import StringIO
5
+ import copy
5
6
  from typing import Dict, Union, Any
6
7
  from lxml import etree as ET
7
8
  from elementpath import XPath2Parser, XPathContext
@@ -212,6 +213,25 @@ class OcrdPage():
212
213
  self.xpath_context = XPathContext(self.etree)
213
214
  self.xpath = lambda expression: self.xpath_parser.parse(expression).get_results(self.xpath_context)
214
215
 
216
+ # allow copy() without infinite recursion
217
+ def __copy__(self):
218
+ return OcrdPage(
219
+ copy.copy(self._pcgts),
220
+ copy.copy(self.etree),
221
+ copy.copy(self.mapping),
222
+ copy.copy(self.revmap),
223
+ )
224
+
225
+ # allow deepcopy() without infinite recursion
226
+ def __deepcopy__(self, memo):
227
+ return OcrdPage(
228
+ copy.deepcopy(self._pcgts, memo),
229
+ copy.deepcopy(self.etree, memo),
230
+ copy.deepcopy(self.mapping, memo),
231
+ copy.deepcopy(self.revmap, memo),
232
+ )
233
+
234
+ # delegate to all members of ._pcgts
215
235
  def __getattr__(self, name):
216
236
  return getattr(self._pcgts, name)
217
237