ocrd 3.1.2__tar.gz → 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.1.2/src/ocrd.egg-info → ocrd-3.3.0}/PKG-INFO +1 -1
- ocrd-3.3.0/VERSION +1 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/workspace.py +2 -2
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/base.py +10 -3
- {ocrd-3.1.2 → ocrd-3.3.0/src/ocrd.egg-info}/PKG-INFO +1 -1
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/constants.py +16 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/workspace_validator.py +45 -10
- ocrd-3.1.2/VERSION +0 -1
- {ocrd-3.1.2 → ocrd-3.3.0}/LICENSE +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/MANIFEST.in +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_bashlib.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_models.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_network.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_utils.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/README_ocrd_validators.md +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/pyproject.toml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/requirements.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/setup.cfg +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/bashlib.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/ocrd_tool.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/validate.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/constants.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/lib.bash +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/mets_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/builtin/filter_processor.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/helpers.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/processor/ocrd_page_result.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resolver.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/resource_manager.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace_backup.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/SOURCES.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/requires.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_modelfactory/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_exif.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_file.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_mets.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_page.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_page_generateds.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/ocrd_xml_base.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/report.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_models/xpath_functions.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/client.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/client.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/client_utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/constants.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/database.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/logging_utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processing_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/deployer.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/server_utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/config.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/image.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/logging.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/os.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_utils/str.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/json_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/page_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/parameter_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/resource_list_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/src/ocrd_validators/xsd_validator.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_decorators.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_logging.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_logging_conf.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_mets_server.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_model_factory.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resolver.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_resource_manager.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_task_sequence.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_utils.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_version.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_workspace.py +0 -0
- {ocrd-3.1.2 → ocrd-3.3.0}/tests/test_workspace_remove.py +0 -0
ocrd-3.3.0/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.3.0
|
|
@@ -88,8 +88,8 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
|
|
|
88
88
|
@pass_workspace
|
|
89
89
|
@click.option('-a', '--download', is_flag=True, help="Download all files")
|
|
90
90
|
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
|
|
91
|
-
['imagefilename', '
|
|
92
|
-
'
|
|
91
|
+
['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density', 'page', 'page_xsd',
|
|
92
|
+
'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
|
|
93
93
|
@click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
|
|
94
94
|
@click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
|
|
95
95
|
@click.argument('mets_url', default=None, required=False)
|
|
@@ -779,10 +779,14 @@ class Processor():
|
|
|
779
779
|
to handle cases like multiple output fileGrps, non-PAGE input etc.)
|
|
780
780
|
"""
|
|
781
781
|
input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
|
|
782
|
-
|
|
783
|
-
page_id = input_files[
|
|
782
|
+
input_pos = next(i for i, input_file in enumerate(input_files) if input_file is not None)
|
|
783
|
+
page_id = input_files[input_pos].pageId
|
|
784
784
|
self._base_logger.info("processing page %s", page_id)
|
|
785
785
|
for i, input_file in enumerate(input_files):
|
|
786
|
+
if input_file is None:
|
|
787
|
+
grp = self.input_file_grp.split(',')[i]
|
|
788
|
+
self._base_logger.debug(f"ignoring missing file for input fileGrp {grp} for page {page_id}")
|
|
789
|
+
continue
|
|
786
790
|
assert isinstance(input_file, get_args(OcrdFileType))
|
|
787
791
|
self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
|
|
788
792
|
try:
|
|
@@ -792,7 +796,10 @@ class Processor():
|
|
|
792
796
|
except ValueError as err:
|
|
793
797
|
# not PAGE and not an image to generate PAGE for
|
|
794
798
|
self._base_logger.error(f"non-PAGE input for page {page_id}: {err}")
|
|
795
|
-
output_file_id = make_file_id(input_files[
|
|
799
|
+
output_file_id = make_file_id(input_files[input_pos], self.output_file_grp)
|
|
800
|
+
if input_files[input_pos].fileGrp == self.output_file_grp:
|
|
801
|
+
# input=output fileGrp: re-use ID exactly
|
|
802
|
+
output_file_id = input_files[input_pos].ID
|
|
796
803
|
output_file = next(self.workspace.mets.find_files(ID=output_file_id), None)
|
|
797
804
|
if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
|
|
798
805
|
# short-cut avoiding useless computation:
|
|
@@ -28,6 +28,8 @@ __all__ = [
|
|
|
28
28
|
'TAG_PAGE_TEXTEQUIV',
|
|
29
29
|
'TAG_PAGE_TEXTREGION',
|
|
30
30
|
'METS_PAGE_DIV_ATTRIBUTE',
|
|
31
|
+
'PAGE_REGION_TYPES',
|
|
32
|
+
'PAGE_ALTIMG_FEATURES',
|
|
31
33
|
]
|
|
32
34
|
|
|
33
35
|
|
|
@@ -72,6 +74,20 @@ PAGE_REGION_TYPES = [
|
|
|
72
74
|
'Separator', 'Table', 'Text', 'Unknown'
|
|
73
75
|
]
|
|
74
76
|
|
|
77
|
+
PAGE_ALTIMG_FEATURES = [
|
|
78
|
+
'binarized',
|
|
79
|
+
'grayscale_normalized',
|
|
80
|
+
'despeckled',
|
|
81
|
+
'cropped',
|
|
82
|
+
'deskewed',
|
|
83
|
+
'rotated-90',
|
|
84
|
+
'rotated-180',
|
|
85
|
+
'rotated-270',
|
|
86
|
+
'dewarped',
|
|
87
|
+
'clipped',
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
75
91
|
class METS_PAGE_DIV_ATTRIBUTE(Enum):
|
|
76
92
|
ID = auto()
|
|
77
93
|
ORDER = auto()
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
|
|
8
8
|
from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename, DEFAULT_METS_BASENAME
|
|
9
9
|
from ocrd_models import ValidationReport
|
|
10
|
+
from ocrd_models.constants import PAGE_ALTIMG_FEATURES
|
|
10
11
|
from ocrd_modelfactory import page_from_file
|
|
11
12
|
|
|
12
13
|
from .constants import FILE_GROUP_CATEGORIES, FILE_GROUP_PREFIX
|
|
@@ -98,6 +99,9 @@ class WorkspaceValidator():
|
|
|
98
99
|
self.page_coordinate_consistency = page_coordinate_consistency
|
|
99
100
|
# there will be more options to come
|
|
100
101
|
self.page_checks = [check for check in ['mets_fileid_page_pcgtsid',
|
|
102
|
+
'imagefilename',
|
|
103
|
+
'alternativeimage_filename',
|
|
104
|
+
'alternativeimage_comments',
|
|
101
105
|
'dimension',
|
|
102
106
|
'page',
|
|
103
107
|
'page_xsd']
|
|
@@ -118,7 +122,7 @@ class WorkspaceValidator():
|
|
|
118
122
|
mets_url (string): URL of the METS file
|
|
119
123
|
src_dir (string, None): Directory containing mets file
|
|
120
124
|
skip (list): Validation checks to omit. One or more of
|
|
121
|
-
'mets_unique_identifier',
|
|
125
|
+
'mets_unique_identifier',
|
|
122
126
|
'mets_files', 'pixel_density', 'dimension', 'url',
|
|
123
127
|
'multipage', 'page', 'page_xsd', 'mets_xsd',
|
|
124
128
|
'mets_fileid_page_pcgtsid'
|
|
@@ -145,8 +149,6 @@ class WorkspaceValidator():
|
|
|
145
149
|
try:
|
|
146
150
|
if 'mets_unique_identifier' not in self.skip:
|
|
147
151
|
self._validate_mets_unique_identifier()
|
|
148
|
-
if 'mets_file_group_names' not in self.skip:
|
|
149
|
-
self._validate_mets_file_group_names()
|
|
150
152
|
if 'mets_files' not in self.skip:
|
|
151
153
|
self._validate_mets_files()
|
|
152
154
|
if 'pixel_density' not in self.skip:
|
|
@@ -192,7 +194,11 @@ class WorkspaceValidator():
|
|
|
192
194
|
self.workspace.download_file(f)
|
|
193
195
|
page = page_from_file(f).get_Page()
|
|
194
196
|
imageFilename = page.imageFilename
|
|
195
|
-
if
|
|
197
|
+
if is_local_filename(imageFilename):
|
|
198
|
+
kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
|
|
199
|
+
else:
|
|
200
|
+
kwargs = dict(url=imageFilename, **self.find_kwargs)
|
|
201
|
+
if not self.mets.find_files(**kwargs):
|
|
196
202
|
self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
|
|
197
203
|
if is_local_filename(imageFilename) and not Path(imageFilename).exists():
|
|
198
204
|
self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
|
|
@@ -295,6 +301,9 @@ class WorkspaceValidator():
|
|
|
295
301
|
if f.url and 'url' not in self.skip:
|
|
296
302
|
if re.match(r'^file:/[^/]', f.url):
|
|
297
303
|
self.report.add_error(f"File '{f.ID}' has an invalid (Java-specific) file URL '{f.url}'")
|
|
304
|
+
elif ':' not in f.url:
|
|
305
|
+
self.report.add_error(f"File '{f.ID}' has an invalid (non-URI) file URL '{f.url}'")
|
|
306
|
+
continue
|
|
298
307
|
scheme = f.url[0:f.url.index(':')]
|
|
299
308
|
if scheme not in ('http', 'https', 'file'):
|
|
300
309
|
self.report.add_warning(f"File '{f.ID}' has non-HTTP, non-file URL '{f.url}'")
|
|
@@ -321,17 +330,43 @@ class WorkspaceValidator():
|
|
|
321
330
|
pcgts = page_from_file(f)
|
|
322
331
|
page = pcgts.get_Page()
|
|
323
332
|
if 'dimension' in self.page_checks:
|
|
324
|
-
|
|
325
|
-
if page.imageHeight !=
|
|
326
|
-
self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {
|
|
327
|
-
if page.imageWidth !=
|
|
328
|
-
self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {
|
|
333
|
+
img = self.workspace._resolve_image_as_pil(page.imageFilename)
|
|
334
|
+
if page.imageHeight != img.height:
|
|
335
|
+
self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {img.height})")
|
|
336
|
+
if page.imageWidth != img.width:
|
|
337
|
+
self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {img.width})")
|
|
329
338
|
if 'imagefilename' in self.page_checks:
|
|
330
339
|
imageFilename = page.imageFilename
|
|
331
|
-
if
|
|
340
|
+
if is_local_filename(imageFilename):
|
|
341
|
+
kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
|
|
342
|
+
else:
|
|
343
|
+
kwargs = dict(url=imageFilename, **self.find_kwargs)
|
|
344
|
+
if not self.mets.find_files(**kwargs):
|
|
332
345
|
self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
|
|
333
346
|
if is_local_filename(imageFilename) and not Path(imageFilename).exists():
|
|
334
347
|
self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
|
|
348
|
+
if 'alternativeimage_filename' in self.page_checks:
|
|
349
|
+
for altimg in page.get_AllAlternativeImages():
|
|
350
|
+
if is_local_filename(altimg.filename):
|
|
351
|
+
kwargs = dict(local_filename=altimg.filename, **self.find_kwargs)
|
|
352
|
+
else:
|
|
353
|
+
kwargs = dict(url=altimg.filename, **self.find_kwargs)
|
|
354
|
+
if not self.mets.find_files(**kwargs):
|
|
355
|
+
self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
|
|
356
|
+
f"'{altimg.filename}' not found in METS")
|
|
357
|
+
if is_local_filename(altimg.filename) and not Path(altimg.filename).exists():
|
|
358
|
+
self.report.add_warning(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
|
|
359
|
+
f"'{altimg.filename}' points to non-existent local file")
|
|
360
|
+
if 'alternativeimage_comments' in self.page_checks:
|
|
361
|
+
for altimg in page.get_AllAlternativeImages():
|
|
362
|
+
if altimg.comments is None:
|
|
363
|
+
self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
|
|
364
|
+
f"'{altimg.filename}' features not specified in PAGE")
|
|
365
|
+
else:
|
|
366
|
+
for feature in altimg.comments.split(','):
|
|
367
|
+
if feature not in PAGE_ALTIMG_FEATURES:
|
|
368
|
+
self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
|
|
369
|
+
f"'{altimg.filename}' feature '{feature}' not standardized for PAGE")
|
|
335
370
|
if 'mets_fileid_page_pcgtsid' in self.page_checks and pcgts.pcGtsId != f.ID:
|
|
336
371
|
self.report.add_warning('pc:PcGts/@pcGtsId differs from mets:file/@ID: "%s" !== "%s"' % (pcgts.pcGtsId or '', f.ID or ''))
|
|
337
372
|
|
ocrd-3.1.2/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
3.1.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|