ocrd 3.1.2__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {ocrd-3.1.2/src/ocrd.egg-info → ocrd-3.3.0}/PKG-INFO +1 -1
  2. ocrd-3.3.0/VERSION +1 -0
  3. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/workspace.py +2 -2
  4. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/base.py +10 -3
  5. {ocrd-3.1.2 → ocrd-3.3.0/src/ocrd.egg-info}/PKG-INFO +1 -1
  6. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/constants.py +16 -0
  7. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/workspace_validator.py +45 -10
  8. ocrd-3.1.2/VERSION +0 -1
  9. {ocrd-3.1.2 → ocrd-3.3.0}/LICENSE +0 -0
  10. {ocrd-3.1.2 → ocrd-3.3.0}/MANIFEST.in +0 -0
  11. {ocrd-3.1.2 → ocrd-3.3.0}/README.md +0 -0
  12. {ocrd-3.1.2 → ocrd-3.3.0}/README_bashlib.md +0 -0
  13. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd.md +0 -0
  14. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_modelfactory.md +0 -0
  15. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_models.md +0 -0
  16. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_network.md +0 -0
  17. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_utils.md +0 -0
  18. {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_validators.md +0 -0
  19. {ocrd-3.1.2 → ocrd-3.3.0}/pyproject.toml +0 -0
  20. {ocrd-3.1.2 → ocrd-3.3.0}/requirements.txt +0 -0
  21. {ocrd-3.1.2 → ocrd-3.3.0}/setup.cfg +0 -0
  22. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/__init__.py +0 -0
  23. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/__init__.py +0 -0
  24. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/bashlib.py +0 -0
  25. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/log.py +0 -0
  26. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/network.py +0 -0
  27. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/ocrd_tool.py +0 -0
  28. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/process.py +0 -0
  29. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/resmgr.py +0 -0
  30. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/validate.py +0 -0
  31. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/zip.py +0 -0
  32. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/constants.py +0 -0
  33. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/__init__.py +0 -0
  34. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/loglevel_option.py +0 -0
  35. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/mets_find_options.py +0 -0
  36. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
  37. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/parameter_option.py +0 -0
  38. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/lib.bash +0 -0
  39. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/mets_server.py +0 -0
  40. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/ocrd-all-tool.json +0 -0
  41. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/__init__.py +0 -0
  42. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/__init__.py +0 -0
  43. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
  44. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
  45. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
  46. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/filter_processor.py +0 -0
  47. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/helpers.py +0 -0
  48. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/ocrd_page_result.py +0 -0
  49. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resolver.py +0 -0
  50. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resource_list.yml +0 -0
  51. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resource_manager.py +0 -0
  52. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/task_sequence.py +0 -0
  53. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace.py +0 -0
  54. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace_backup.py +0 -0
  55. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace_bagger.py +0 -0
  56. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/SOURCES.txt +0 -0
  57. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/dependency_links.txt +0 -0
  58. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/entry_points.txt +0 -0
  59. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/requires.txt +0 -0
  60. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/top_level.txt +0 -0
  61. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_modelfactory/__init__.py +0 -0
  62. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/__init__.py +0 -0
  63. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/mets-empty.xml +0 -0
  64. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_agent.py +0 -0
  65. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_exif.py +0 -0
  66. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_file.py +0 -0
  67. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_mets.py +0 -0
  68. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_page.py +0 -0
  69. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_page_generateds.py +0 -0
  70. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_xml_base.py +0 -0
  71. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/report.py +0 -0
  72. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/utils.py +0 -0
  73. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/xpath_functions.py +0 -0
  74. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/__init__.py +0 -0
  75. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/__init__.py +0 -0
  76. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/client.py +0 -0
  77. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processing_server.py +0 -0
  78. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processing_worker.py +0 -0
  79. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processor_server.py +0 -0
  80. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/client.py +0 -0
  81. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/client_utils.py +0 -0
  82. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/constants.py +0 -0
  83. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/database.py +0 -0
  84. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/logging_utils.py +0 -0
  85. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/__init__.py +0 -0
  86. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/job.py +0 -0
  87. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/messages.py +0 -0
  88. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/ocrd_tool.py +0 -0
  89. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/workflow.py +0 -0
  90. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/workspace.py +0 -0
  91. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/param_validators.py +0 -0
  92. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/process_helpers.py +0 -0
  93. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processing_server.py +0 -0
  94. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processing_worker.py +0 -0
  95. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processor_server.py +0 -0
  96. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
  97. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
  98. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
  99. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
  100. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
  101. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
  102. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
  103. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/__init__.py +0 -0
  104. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/config_parser.py +0 -0
  105. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
  106. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/deployer.py +0 -0
  107. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/hosts.py +0 -0
  108. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/network_agents.py +0 -0
  109. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/network_services.py +0 -0
  110. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/server_cache.py +0 -0
  111. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/server_utils.py +0 -0
  112. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
  113. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/utils.py +0 -0
  114. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/__init__.py +0 -0
  115. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/config.py +0 -0
  116. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/constants.py +0 -0
  117. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/deprecate.py +0 -0
  118. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/image.py +0 -0
  119. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/introspect.py +0 -0
  120. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/logging.py +0 -0
  121. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/ocrd_logging.conf +0 -0
  122. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/os.py +0 -0
  123. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/str.py +0 -0
  124. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/__init__.py +0 -0
  125. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/bagit-profile.yml +0 -0
  126. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/constants.py +0 -0
  127. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/json_validator.py +0 -0
  128. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/message_processing.schema.yml +0 -0
  129. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/message_result.schema.yml +0 -0
  130. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/mets.xsd +0 -0
  131. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
  132. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
  133. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
  134. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
  135. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/page.xsd +0 -0
  136. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/page_validator.py +0 -0
  137. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/parameter_validator.py +0 -0
  138. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
  139. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/processing_server_config_validator.py +0 -0
  140. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/resource_list_validator.py +0 -0
  141. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xlink.xsd +0 -0
  142. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_mets_validator.py +0 -0
  143. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_page_validator.py +0 -0
  144. {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_validator.py +0 -0
  145. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_decorators.py +0 -0
  146. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_logging.py +0 -0
  147. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_logging_conf.py +0 -0
  148. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_mets_server.py +0 -0
  149. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_model_factory.py +0 -0
  150. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resolver.py +0 -0
  151. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resolver_oai.py +0 -0
  152. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resource_manager.py +0 -0
  153. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_task_sequence.py +0 -0
  154. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_utils.py +0 -0
  155. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_version.py +0 -0
  156. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_workspace.py +0 -0
  157. {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_workspace_remove.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.1.2
3
+ Version: 3.3.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
ocrd-3.3.0/VERSION ADDED
@@ -0,0 +1 @@
1
+ 3.3.0
@@ -88,8 +88,8 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
88
88
  @pass_workspace
89
89
  @click.option('-a', '--download', is_flag=True, help="Download all files")
90
90
  @click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
91
- ['imagefilename', 'dimension', 'pixel_density', 'page', 'url', 'page_xsd', 'mets_fileid_page_pcgtsid',
92
- 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'mets_xsd']))
91
+ ['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density', 'page', 'page_xsd',
92
+ 'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
93
93
  @click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
94
94
  @click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
95
95
  @click.argument('mets_url', default=None, required=False)
@@ -779,10 +779,14 @@ class Processor():
779
779
  to handle cases like multiple output fileGrps, non-PAGE input etc.)
780
780
  """
781
781
  input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
782
- assert isinstance(input_files[0], get_args(OcrdFileType))
783
- page_id = input_files[0].pageId
782
+ input_pos = next(i for i, input_file in enumerate(input_files) if input_file is not None)
783
+ page_id = input_files[input_pos].pageId
784
784
  self._base_logger.info("processing page %s", page_id)
785
785
  for i, input_file in enumerate(input_files):
786
+ if input_file is None:
787
+ grp = self.input_file_grp.split(',')[i]
788
+ self._base_logger.debug(f"ignoring missing file for input fileGrp {grp} for page {page_id}")
789
+ continue
786
790
  assert isinstance(input_file, get_args(OcrdFileType))
787
791
  self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
788
792
  try:
@@ -792,7 +796,10 @@ class Processor():
792
796
  except ValueError as err:
793
797
  # not PAGE and not an image to generate PAGE for
794
798
  self._base_logger.error(f"non-PAGE input for page {page_id}: {err}")
795
- output_file_id = make_file_id(input_files[0], self.output_file_grp)
799
+ output_file_id = make_file_id(input_files[input_pos], self.output_file_grp)
800
+ if input_files[input_pos].fileGrp == self.output_file_grp:
801
+ # input=output fileGrp: re-use ID exactly
802
+ output_file_id = input_files[input_pos].ID
796
803
  output_file = next(self.workspace.mets.find_files(ID=output_file_id), None)
797
804
  if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
798
805
  # short-cut avoiding useless computation:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.1.2
3
+ Version: 3.3.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -28,6 +28,8 @@ __all__ = [
28
28
  'TAG_PAGE_TEXTEQUIV',
29
29
  'TAG_PAGE_TEXTREGION',
30
30
  'METS_PAGE_DIV_ATTRIBUTE',
31
+ 'PAGE_REGION_TYPES',
32
+ 'PAGE_ALTIMG_FEATURES',
31
33
  ]
32
34
 
33
35
 
@@ -72,6 +74,20 @@ PAGE_REGION_TYPES = [
72
74
  'Separator', 'Table', 'Text', 'Unknown'
73
75
  ]
74
76
 
77
+ PAGE_ALTIMG_FEATURES = [
78
+ 'binarized',
79
+ 'grayscale_normalized',
80
+ 'despeckled',
81
+ 'cropped',
82
+ 'deskewed',
83
+ 'rotated-90',
84
+ 'rotated-180',
85
+ 'rotated-270',
86
+ 'dewarped',
87
+ 'clipped',
88
+ ]
89
+
90
+
75
91
  class METS_PAGE_DIV_ATTRIBUTE(Enum):
76
92
  ID = auto()
77
93
  ORDER = auto()
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
 
8
8
  from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename, DEFAULT_METS_BASENAME
9
9
  from ocrd_models import ValidationReport
10
+ from ocrd_models.constants import PAGE_ALTIMG_FEATURES
10
11
  from ocrd_modelfactory import page_from_file
11
12
 
12
13
  from .constants import FILE_GROUP_CATEGORIES, FILE_GROUP_PREFIX
@@ -98,6 +99,9 @@ class WorkspaceValidator():
98
99
  self.page_coordinate_consistency = page_coordinate_consistency
99
100
  # there will be more options to come
100
101
  self.page_checks = [check for check in ['mets_fileid_page_pcgtsid',
102
+ 'imagefilename',
103
+ 'alternativeimage_filename',
104
+ 'alternativeimage_comments',
101
105
  'dimension',
102
106
  'page',
103
107
  'page_xsd']
@@ -118,7 +122,7 @@ class WorkspaceValidator():
118
122
  mets_url (string): URL of the METS file
119
123
  src_dir (string, None): Directory containing mets file
120
124
  skip (list): Validation checks to omit. One or more of
121
- 'mets_unique_identifier', 'mets_file_group_names',
125
+ 'mets_unique_identifier',
122
126
  'mets_files', 'pixel_density', 'dimension', 'url',
123
127
  'multipage', 'page', 'page_xsd', 'mets_xsd',
124
128
  'mets_fileid_page_pcgtsid'
@@ -145,8 +149,6 @@ class WorkspaceValidator():
145
149
  try:
146
150
  if 'mets_unique_identifier' not in self.skip:
147
151
  self._validate_mets_unique_identifier()
148
- if 'mets_file_group_names' not in self.skip:
149
- self._validate_mets_file_group_names()
150
152
  if 'mets_files' not in self.skip:
151
153
  self._validate_mets_files()
152
154
  if 'pixel_density' not in self.skip:
@@ -192,7 +194,11 @@ class WorkspaceValidator():
192
194
  self.workspace.download_file(f)
193
195
  page = page_from_file(f).get_Page()
194
196
  imageFilename = page.imageFilename
195
- if not self.mets.find_files(url=imageFilename, **self.find_kwargs):
197
+ if is_local_filename(imageFilename):
198
+ kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
199
+ else:
200
+ kwargs = dict(url=imageFilename, **self.find_kwargs)
201
+ if not self.mets.find_files(**kwargs):
196
202
  self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
197
203
  if is_local_filename(imageFilename) and not Path(imageFilename).exists():
198
204
  self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
@@ -295,6 +301,9 @@ class WorkspaceValidator():
295
301
  if f.url and 'url' not in self.skip:
296
302
  if re.match(r'^file:/[^/]', f.url):
297
303
  self.report.add_error(f"File '{f.ID}' has an invalid (Java-specific) file URL '{f.url}'")
304
+ elif ':' not in f.url:
305
+ self.report.add_error(f"File '{f.ID}' has an invalid (non-URI) file URL '{f.url}'")
306
+ continue
298
307
  scheme = f.url[0:f.url.index(':')]
299
308
  if scheme not in ('http', 'https', 'file'):
300
309
  self.report.add_warning(f"File '{f.ID}' has non-HTTP, non-file URL '{f.url}'")
@@ -321,17 +330,43 @@ class WorkspaceValidator():
321
330
  pcgts = page_from_file(f)
322
331
  page = pcgts.get_Page()
323
332
  if 'dimension' in self.page_checks:
324
- _, _, exif = self.workspace.image_from_page(page, f.pageId)
325
- if page.imageHeight != exif.height:
326
- self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {exif.height})")
327
- if page.imageWidth != exif.width:
328
- self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {exif.width})")
333
+ img = self.workspace._resolve_image_as_pil(page.imageFilename)
334
+ if page.imageHeight != img.height:
335
+ self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {img.height})")
336
+ if page.imageWidth != img.width:
337
+ self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {img.width})")
329
338
  if 'imagefilename' in self.page_checks:
330
339
  imageFilename = page.imageFilename
331
- if not self.mets.find_files(url=imageFilename):
340
+ if is_local_filename(imageFilename):
341
+ kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
342
+ else:
343
+ kwargs = dict(url=imageFilename, **self.find_kwargs)
344
+ if not self.mets.find_files(**kwargs):
332
345
  self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
333
346
  if is_local_filename(imageFilename) and not Path(imageFilename).exists():
334
347
  self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
348
+ if 'alternativeimage_filename' in self.page_checks:
349
+ for altimg in page.get_AllAlternativeImages():
350
+ if is_local_filename(altimg.filename):
351
+ kwargs = dict(local_filename=altimg.filename, **self.find_kwargs)
352
+ else:
353
+ kwargs = dict(url=altimg.filename, **self.find_kwargs)
354
+ if not self.mets.find_files(**kwargs):
355
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
356
+ f"'{altimg.filename}' not found in METS")
357
+ if is_local_filename(altimg.filename) and not Path(altimg.filename).exists():
358
+ self.report.add_warning(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
359
+ f"'{altimg.filename}' points to non-existent local file")
360
+ if 'alternativeimage_comments' in self.page_checks:
361
+ for altimg in page.get_AllAlternativeImages():
362
+ if altimg.comments is None:
363
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
364
+ f"'{altimg.filename}' features not specified in PAGE")
365
+ else:
366
+ for feature in altimg.comments.split(','):
367
+ if feature not in PAGE_ALTIMG_FEATURES:
368
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
369
+ f"'{altimg.filename}' feature '{feature}' not standardized for PAGE")
335
370
  if 'mets_fileid_page_pcgtsid' in self.page_checks and pcgts.pcGtsId != f.ID:
336
371
  self.report.add_warning('pc:PcGts/@pcGtsId differs from mets:file/@ID: "%s" !== "%s"' % (pcgts.pcGtsId or '', f.ID or ''))
337
372
 
ocrd-3.1.2/VERSION DELETED
@@ -1 +0,0 @@
1
- 3.1.2
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes