ocrd 3.4.1__tar.gz → 3.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.4.1/src/ocrd.egg-info → ocrd-3.5.1}/PKG-INFO +4 -3
- ocrd-3.5.1/VERSION +1 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/pyproject.toml +2 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/requirements.txt +3 -2
- {ocrd-3.4.1 → ocrd-3.5.1/src/ocrd.egg-info}/PKG-INFO +4 -3
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd.egg-info/requires.txt +7 -2
- ocrd-3.5.1/src/ocrd_models/constants.py +205 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_mets.py +231 -97
- ocrd-3.4.1/VERSION +0 -1
- ocrd-3.4.1/src/ocrd_models/constants.py +0 -100
- {ocrd-3.4.1 → ocrd-3.5.1}/LICENSE +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/MANIFEST.in +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_bashlib.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd_models.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd_network.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd_utils.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/README_ocrd_validators.md +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/setup.cfg +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/bashlib.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/ocrd_tool.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/validate.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/workspace.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/constants.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/decorators/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/lib.bash +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/mets_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/base.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/builtin/dummy_processor.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/builtin/filter_processor.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/helpers.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/processor/ocrd_page_result.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/resolver.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/resource_manager.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/workspace.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/workspace_backup.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd.egg-info/SOURCES.txt +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_modelfactory/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_exif.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_file.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_page.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_page_generateds.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/ocrd_xml_base.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/report.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_models/xpath_functions.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/cli/client.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/client.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/client_utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/constants.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/database.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/logging_utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/processing_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/deployer.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/server_utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/config.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/image.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/logging.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/os.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_utils/str.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/json_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/page_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/parameter_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/resource_list_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/workspace_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/src/ocrd_validators/xsd_validator.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_decorators.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_logging.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_logging_conf.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_mets_server.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_model_factory.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_resolver.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_resource_manager.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_task_sequence.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_utils.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_version.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_workspace.py +0 -0
- {ocrd-3.4.1 → ocrd-3.5.1}/tests/test_workspace_remove.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ocrd
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.1
|
|
4
4
|
Summary: OCR-D framework
|
|
5
5
|
Author-email: Konstantin Baierer <unixprog@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -21,7 +21,7 @@ Requires-Dist: elementpath
|
|
|
21
21
|
Requires-Dist: fastapi>=0.78.0
|
|
22
22
|
Requires-Dist: filetype
|
|
23
23
|
Requires-Dist: Flask
|
|
24
|
-
Requires-Dist: frozendict>=2.
|
|
24
|
+
Requires-Dist: frozendict>=2.4.0
|
|
25
25
|
Requires-Dist: gdown
|
|
26
26
|
Requires-Dist: httpx>=0.22.0
|
|
27
27
|
Requires-Dist: importlib_metadata; python_version < "3.8"
|
|
@@ -43,7 +43,8 @@ Requires-Dist: python-multipart
|
|
|
43
43
|
Requires-Dist: pyyaml
|
|
44
44
|
Requires-Dist: requests
|
|
45
45
|
Requires-Dist: requests_unixsocket2
|
|
46
|
-
Requires-Dist: shapely
|
|
46
|
+
Requires-Dist: shapely<2.0.2; python_version < "3.9"
|
|
47
|
+
Requires-Dist: shapely>=2; python_version >= "3.9"
|
|
47
48
|
Requires-Dist: uvicorn
|
|
48
49
|
Requires-Dist: uvicorn>=0.17.6
|
|
49
50
|
|
ocrd-3.5.1/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.5.1
|
|
@@ -8,7 +8,7 @@ elementpath
|
|
|
8
8
|
fastapi>=0.78.0
|
|
9
9
|
filetype
|
|
10
10
|
Flask
|
|
11
|
-
frozendict>=2.
|
|
11
|
+
frozendict>=2.4.0
|
|
12
12
|
gdown
|
|
13
13
|
httpx>=0.22.0
|
|
14
14
|
importlib_metadata ; python_version < '3.8'
|
|
@@ -32,6 +32,7 @@ python-multipart
|
|
|
32
32
|
pyyaml
|
|
33
33
|
requests
|
|
34
34
|
requests_unixsocket2
|
|
35
|
-
shapely
|
|
35
|
+
shapely < 2.0.2 ; python_version < '3.9'
|
|
36
|
+
shapely >= 2 ; python_version >= '3.9'
|
|
36
37
|
uvicorn
|
|
37
38
|
uvicorn>=0.17.6
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ocrd
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.1
|
|
4
4
|
Summary: OCR-D framework
|
|
5
5
|
Author-email: Konstantin Baierer <unixprog@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -21,7 +21,7 @@ Requires-Dist: elementpath
|
|
|
21
21
|
Requires-Dist: fastapi>=0.78.0
|
|
22
22
|
Requires-Dist: filetype
|
|
23
23
|
Requires-Dist: Flask
|
|
24
|
-
Requires-Dist: frozendict>=2.
|
|
24
|
+
Requires-Dist: frozendict>=2.4.0
|
|
25
25
|
Requires-Dist: gdown
|
|
26
26
|
Requires-Dist: httpx>=0.22.0
|
|
27
27
|
Requires-Dist: importlib_metadata; python_version < "3.8"
|
|
@@ -43,7 +43,8 @@ Requires-Dist: python-multipart
|
|
|
43
43
|
Requires-Dist: pyyaml
|
|
44
44
|
Requires-Dist: requests
|
|
45
45
|
Requires-Dist: requests_unixsocket2
|
|
46
|
-
Requires-Dist: shapely
|
|
46
|
+
Requires-Dist: shapely<2.0.2; python_version < "3.9"
|
|
47
|
+
Requires-Dist: shapely>=2; python_version >= "3.9"
|
|
47
48
|
Requires-Dist: uvicorn
|
|
48
49
|
Requires-Dist: uvicorn>=0.17.6
|
|
49
50
|
|
|
@@ -8,7 +8,7 @@ elementpath
|
|
|
8
8
|
fastapi>=0.78.0
|
|
9
9
|
filetype
|
|
10
10
|
Flask
|
|
11
|
-
frozendict>=2.
|
|
11
|
+
frozendict>=2.4.0
|
|
12
12
|
gdown
|
|
13
13
|
httpx>=0.22.0
|
|
14
14
|
jsonschema>=4
|
|
@@ -28,7 +28,6 @@ python-multipart
|
|
|
28
28
|
pyyaml
|
|
29
29
|
requests
|
|
30
30
|
requests_unixsocket2
|
|
31
|
-
shapely
|
|
32
31
|
uvicorn
|
|
33
32
|
uvicorn>=0.17.6
|
|
34
33
|
|
|
@@ -37,3 +36,9 @@ importlib_resources
|
|
|
37
36
|
|
|
38
37
|
[:python_version < "3.8"]
|
|
39
38
|
importlib_metadata
|
|
39
|
+
|
|
40
|
+
[:python_version < "3.9"]
|
|
41
|
+
shapely<2.0.2
|
|
42
|
+
|
|
43
|
+
[:python_version >= "3.9"]
|
|
44
|
+
shapely>=2
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for ocrd_models.
|
|
3
|
+
"""
|
|
4
|
+
from re import Pattern
|
|
5
|
+
from enum import Enum, auto
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Any, List, Optional, Union
|
|
9
|
+
from ocrd_utils import resource_string
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'IDENTIFIER_PRIORITY',
|
|
13
|
+
'METS_XML_EMPTY',
|
|
14
|
+
'NAMESPACES',
|
|
15
|
+
'TAG_METS_AGENT',
|
|
16
|
+
'TAG_METS_DIV',
|
|
17
|
+
'TAG_METS_FILE',
|
|
18
|
+
'TAG_METS_FILEGRP',
|
|
19
|
+
'TAG_METS_FILESEC',
|
|
20
|
+
'TAG_METS_FPTR',
|
|
21
|
+
'TAG_METS_FLOCAT',
|
|
22
|
+
'TAG_METS_METSHDR',
|
|
23
|
+
'TAG_METS_NAME',
|
|
24
|
+
'TAG_METS_NOTE',
|
|
25
|
+
'TAG_METS_STRUCTMAP',
|
|
26
|
+
'TAG_MODS_IDENTIFIER',
|
|
27
|
+
'TAG_PAGE_ALTERNATIVEIMAGE',
|
|
28
|
+
'TAG_PAGE_COORDS',
|
|
29
|
+
'TAG_PAGE_READINGORDER',
|
|
30
|
+
'TAG_PAGE_REGIONREFINDEXED',
|
|
31
|
+
'TAG_PAGE_TEXTLINE',
|
|
32
|
+
'TAG_PAGE_TEXTEQUIV',
|
|
33
|
+
'TAG_PAGE_TEXTREGION',
|
|
34
|
+
'METS_PAGE_DIV_ATTRIBUTE',
|
|
35
|
+
'METS_STRUCT_DIV_ATTRIBUTE',
|
|
36
|
+
'METS_DIV_ATTRIBUTE_ATOM_PATTERN',
|
|
37
|
+
'METS_DIV_ATTRIBUTE_RANGE_PATTERN',
|
|
38
|
+
'METS_DIV_ATTRIBUTE_REGEX_PATTERN',
|
|
39
|
+
'PAGE_REGION_TYPES',
|
|
40
|
+
'PAGE_ALTIMG_FEATURES',
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url']
|
|
45
|
+
|
|
46
|
+
METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml')
|
|
47
|
+
|
|
48
|
+
NAMESPACES = {
|
|
49
|
+
'mets': "http://www.loc.gov/METS/",
|
|
50
|
+
'mods': "http://www.loc.gov/mods/v3",
|
|
51
|
+
'xlink': "http://www.w3.org/1999/xlink",
|
|
52
|
+
'page': "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15",
|
|
53
|
+
'xsl': 'http://www.w3.org/1999/XSL/Transform#',
|
|
54
|
+
'ocrd': 'https://ocr-d.de',
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
TAG_METS_AGENT = '{%s}agent' % NAMESPACES['mets']
|
|
58
|
+
TAG_METS_DIV = '{%s}div' % NAMESPACES['mets']
|
|
59
|
+
TAG_METS_FILE = '{%s}file' % NAMESPACES['mets']
|
|
60
|
+
TAG_METS_FILEGRP = '{%s}fileGrp' % NAMESPACES['mets']
|
|
61
|
+
TAG_METS_FILESEC = '{%s}fileSec' % NAMESPACES['mets']
|
|
62
|
+
TAG_METS_FPTR = '{%s}fptr' % NAMESPACES['mets']
|
|
63
|
+
TAG_METS_FLOCAT = '{%s}FLocat' % NAMESPACES['mets']
|
|
64
|
+
TAG_METS_METSHDR = '{%s}metsHdr' % NAMESPACES['mets']
|
|
65
|
+
TAG_METS_NAME = '{%s}name' % NAMESPACES['mets']
|
|
66
|
+
TAG_METS_NOTE = '{%s}note' % NAMESPACES['mets']
|
|
67
|
+
TAG_METS_STRUCTMAP = '{%s}structMap' % NAMESPACES['mets']
|
|
68
|
+
|
|
69
|
+
TAG_MODS_IDENTIFIER = '{%s}identifier' % NAMESPACES['mods']
|
|
70
|
+
|
|
71
|
+
TAG_PAGE_ALTERNATIVEIMAGE = '{%s}AlternativeImage' % NAMESPACES['page']
|
|
72
|
+
TAG_PAGE_COORDS = '{%s}Coords' % NAMESPACES['page']
|
|
73
|
+
TAG_PAGE_READINGORDER = '{%s}ReadingOrder' % NAMESPACES['page']
|
|
74
|
+
TAG_PAGE_REGIONREFINDEXED = '{%s}RegionRefIndexed' % NAMESPACES['page']
|
|
75
|
+
TAG_PAGE_TEXTLINE = '{%s}TextLine' % NAMESPACES['page']
|
|
76
|
+
TAG_PAGE_TEXTEQUIV = '{%s}TextEquiv' % NAMESPACES['page']
|
|
77
|
+
TAG_PAGE_TEXTREGION = '{%s}TextRegion' % NAMESPACES['page']
|
|
78
|
+
|
|
79
|
+
PAGE_REGION_TYPES = [
|
|
80
|
+
'Advert', 'Chart', 'Chem', 'Custom', 'Graphic', 'Image',
|
|
81
|
+
'LineDrawing', 'Map', 'Maths', 'Music', 'Noise',
|
|
82
|
+
'Separator', 'Table', 'Text', 'Unknown'
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
PAGE_ALTIMG_FEATURES = [
|
|
86
|
+
'binarized',
|
|
87
|
+
'grayscale_normalized',
|
|
88
|
+
'despeckled',
|
|
89
|
+
'cropped',
|
|
90
|
+
'deskewed',
|
|
91
|
+
'rotated-90',
|
|
92
|
+
'rotated-180',
|
|
93
|
+
'rotated-270',
|
|
94
|
+
'dewarped',
|
|
95
|
+
'clipped',
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class METS_PAGE_DIV_ATTRIBUTE(Enum):
|
|
100
|
+
"""page selection attributes of PHYSICAL mets:structMap//mets:div"""
|
|
101
|
+
ID = auto()
|
|
102
|
+
ORDER = auto()
|
|
103
|
+
ORDERLABEL = auto()
|
|
104
|
+
LABEL = auto()
|
|
105
|
+
CONTENTIDS = auto()
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def names(cls):
|
|
109
|
+
return [x.name for x in cls]
|
|
110
|
+
@classmethod
|
|
111
|
+
def type_prefix(cls):
|
|
112
|
+
"""disambiguation prefix to use for all subtypes"""
|
|
113
|
+
return "physical:"
|
|
114
|
+
def prefix(self):
|
|
115
|
+
"""disambiguation prefix to use for this attribute type"""
|
|
116
|
+
return self.type_prefix() + self.name.lower() + ":"
|
|
117
|
+
|
|
118
|
+
class METS_STRUCT_DIV_ATTRIBUTE(Enum):
|
|
119
|
+
"""page selection attributes of LOGICAL mets:structMap//mets:div"""
|
|
120
|
+
ID = auto()
|
|
121
|
+
DMDID = auto()
|
|
122
|
+
TYPE = auto()
|
|
123
|
+
LABEL = auto()
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def names(cls):
|
|
127
|
+
return [x.name for x in cls]
|
|
128
|
+
@classmethod
|
|
129
|
+
def type_prefix(cls):
|
|
130
|
+
"""disambiguation prefix to use for all subtypes"""
|
|
131
|
+
return "logical:"
|
|
132
|
+
def prefix(self):
|
|
133
|
+
"""disambiguation prefix to use for this attribute type"""
|
|
134
|
+
return self.type_prefix() + self.name.lower() + ":"
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class METS_DIV_ATTRIBUTE_PATTERN(ABC):
|
|
138
|
+
"""page selection pattern (abstract supertype)"""
|
|
139
|
+
|
|
140
|
+
expr: Any
|
|
141
|
+
"""pattern value to match a mets:div against"""
|
|
142
|
+
attr: List[Union[METS_PAGE_DIV_ATTRIBUTE, METS_STRUCT_DIV_ATTRIBUTE]] = field(
|
|
143
|
+
default_factory=lambda: list(METS_PAGE_DIV_ATTRIBUTE) + list(METS_STRUCT_DIV_ATTRIBUTE))
|
|
144
|
+
"""attribute type(s) to match a mets:div for
|
|
145
|
+
(pre-disambiguated with prefix syntax, or filled upon first match)
|
|
146
|
+
"""
|
|
147
|
+
has_matched: bool = field(init=False, default=False)
|
|
148
|
+
"""whether this pattern has already been matched"""
|
|
149
|
+
|
|
150
|
+
def attr_prefix(self):
|
|
151
|
+
"""attribute type disambiguation prefix corresponding to the current state of disambiguation"""
|
|
152
|
+
if self.attr == list(METS_PAGE_DIV_ATTRIBUTE) + list(METS_STRUCT_DIV_ATTRIBUTE):
|
|
153
|
+
return ""
|
|
154
|
+
if self.attr == list(METS_PAGE_DIV_ATTRIBUTE):
|
|
155
|
+
return METS_PAGE_DIV_ATTRIBUTE.type_prefix()
|
|
156
|
+
if self.attr == list(METS_STRUCT_DIV_ATTRIBUTE):
|
|
157
|
+
return METS_STRUCT_DIV_ATTRIBUTE.type_prefix()
|
|
158
|
+
assert len(self.attr) == 1, "unexpected type ambiguity: %s" % repr(self.attr)
|
|
159
|
+
return self.attr[0].prefix()
|
|
160
|
+
|
|
161
|
+
@abstractmethod
|
|
162
|
+
def _matches(self, input) -> bool:
|
|
163
|
+
return
|
|
164
|
+
def matches(self, input) -> bool:
|
|
165
|
+
"""does the selection pattern match on the given attribute value?"""
|
|
166
|
+
if (matched := self._matches(input)):
|
|
167
|
+
self.has_matched = True
|
|
168
|
+
return matched
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class METS_DIV_ATTRIBUTE_ATOM_PATTERN(METS_DIV_ATTRIBUTE_PATTERN):
|
|
172
|
+
"""page selection pattern for literal (single value) matching"""
|
|
173
|
+
|
|
174
|
+
expr: str
|
|
175
|
+
def __repr__(self):
|
|
176
|
+
return "%s%s" % (self.attr_prefix(), self.expr)
|
|
177
|
+
def _matches(self, input):
|
|
178
|
+
return input == self.expr
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class METS_DIV_ATTRIBUTE_RANGE_PATTERN(METS_DIV_ATTRIBUTE_PATTERN):
|
|
182
|
+
"""page selection pattern for interval (list expansion) matching"""
|
|
183
|
+
|
|
184
|
+
expr: List[str]
|
|
185
|
+
start: str = field(init=False)
|
|
186
|
+
"""first value of the range after expansion, before matching-exhausting"""
|
|
187
|
+
stop: str = field(init=False)
|
|
188
|
+
"""last value of the range after expansion, before matching-exhausting"""
|
|
189
|
+
def __post_init__(self):
|
|
190
|
+
self.start = self.expr[0]
|
|
191
|
+
self.stop = self.expr[-1]
|
|
192
|
+
def __repr__(self):
|
|
193
|
+
return "%s%s..%s" % (self.attr_prefix(), self.start, self.stop)
|
|
194
|
+
def _matches(self, input):
|
|
195
|
+
return input in self.expr
|
|
196
|
+
|
|
197
|
+
@dataclass
|
|
198
|
+
class METS_DIV_ATTRIBUTE_REGEX_PATTERN(METS_DIV_ATTRIBUTE_PATTERN):
|
|
199
|
+
"""page selection pattern for regular expression matching"""
|
|
200
|
+
|
|
201
|
+
expr: Pattern
|
|
202
|
+
def __repr__(self):
|
|
203
|
+
return "%s//%s" % (self.attr_prefix(), self.expr.pattern)
|
|
204
|
+
def _matches(self, input):
|
|
205
|
+
return bool(self.expr.fullmatch(input))
|