ocrd 3.0.0b4__tar.gz → 3.0.0b6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.0.0b4/src/ocrd.egg-info → ocrd-3.0.0b6}/PKG-INFO +2 -1
- ocrd-3.0.0b6/VERSION +1 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/requirements.txt +1 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/bashlib.py +6 -4
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/ocrd_tool.py +1 -1
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/validate.py +6 -3
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/workspace.py +71 -56
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/decorators/__init__.py +6 -6
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/decorators/ocrd_cli_options.py +1 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/lib.bash +24 -21
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/mets_server.py +39 -8
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/base.py +307 -89
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/builtin/dummy_processor.py +0 -2
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/helpers.py +16 -7
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/ocrd_page_result.py +2 -2
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/workspace.py +3 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6/src/ocrd.egg-info}/PKG-INFO +2 -1
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd.egg-info/requires.txt +1 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_mets.py +9 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_page_generateds.py +44 -11
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/logging.py +6 -2
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/str.py +2 -1
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_decorators.py +5 -1
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_mets_server.py +15 -1
- ocrd-3.0.0b4/VERSION +0 -1
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/LICENSE +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/MANIFEST.in +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_bashlib.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd_models.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd_network.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd_utils.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/README_ocrd_validators.md +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/pyproject.toml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/setup.cfg +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/resolver.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/resource_manager.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/workspace_backup.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd.egg-info/SOURCES.txt +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_modelfactory/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_exif.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_file.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_page.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/ocrd_xml_base.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/report.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/cli/client.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/client.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/client_utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/database.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/logging_utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/processing_server.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/deployer.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/server_utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/config.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/image.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_utils/os.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/json_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/ocrd_tool_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/page_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/parameter_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/resource_list_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/workspace_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/src/ocrd_validators/xsd_validator.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_logging.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_logging_conf.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_model_factory.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_resolver.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_resource_manager.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_task_sequence.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_utils.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_version.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_workspace.py +0 -0
- {ocrd-3.0.0b4 → ocrd-3.0.0b6}/tests/test_workspace_remove.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ocrd
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.0b6
|
|
4
4
|
Summary: OCR-D framework
|
|
5
5
|
Author-email: Konstantin Baierer <unixprog@gmail.com>
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: httpx>=0.22.0
|
|
|
26
26
|
Requires-Dist: importlib_metadata; python_version < "3.8"
|
|
27
27
|
Requires-Dist: importlib_resources; python_version < "3.10"
|
|
28
28
|
Requires-Dist: jsonschema>=4
|
|
29
|
+
Requires-Dist: loky
|
|
29
30
|
Requires-Dist: lxml
|
|
30
31
|
Requires-Dist: memory-profiler>=0.58.0
|
|
31
32
|
Requires-Dist: numpy
|
ocrd-3.0.0b6/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.0.0b6
|
|
@@ -76,10 +76,10 @@ def bashlib_constants(name):
|
|
|
76
76
|
@click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
|
|
77
77
|
@click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
|
|
78
78
|
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
|
|
79
|
-
@click.option('-
|
|
79
|
+
@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server', default=None)
|
|
80
|
+
@click.option('-d', '--working-dir', help="Working Directory")
|
|
80
81
|
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
|
|
81
82
|
@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
|
|
82
|
-
# repeat some other processor options for convenience (will be ignored here)
|
|
83
83
|
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
|
|
84
84
|
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
|
|
85
85
|
"(with '--page-id', remove only those).\n"
|
|
@@ -126,9 +126,10 @@ def bashlib_input_files(ocrd_tool, executable, **kwargs):
|
|
|
126
126
|
def executable(self):
|
|
127
127
|
# needed for ocrd_tool lookup
|
|
128
128
|
return executable
|
|
129
|
+
processor_class = FullBashlibProcessor
|
|
129
130
|
else:
|
|
130
131
|
# we have no true metadata file, so fill in just to make it work
|
|
131
|
-
class
|
|
132
|
+
class UnknownBashlibProcessor(BashlibProcessor):
|
|
132
133
|
@property
|
|
133
134
|
def ocrd_tool(self):
|
|
134
135
|
# needed to satisfy the validator
|
|
@@ -142,5 +143,6 @@ def bashlib_input_files(ocrd_tool, executable, **kwargs):
|
|
|
142
143
|
def version(self):
|
|
143
144
|
# needed to satisfy the validator and wrapper
|
|
144
145
|
return '1.0'
|
|
146
|
+
processor_class = UnknownBashlibProcessor
|
|
145
147
|
|
|
146
|
-
ocrd_cli_wrap_processor(
|
|
148
|
+
ocrd_cli_wrap_processor(processor_class, **kwargs)
|
|
@@ -125,7 +125,7 @@ def ocrd_tool_tool_list_resources(ctx):
|
|
|
125
125
|
@click.argument('res_name')
|
|
126
126
|
@pass_ocrd_tool
|
|
127
127
|
def ocrd_tool_tool_resolve_resource(ctx, res_name):
|
|
128
|
-
ctx.processor(None).resolve_resource(res_name)
|
|
128
|
+
print(ctx.processor(None).resolve_resource(res_name))
|
|
129
129
|
|
|
130
130
|
@ocrd_tool_tool.command('show-resource', help="Dump a tool's file resource")
|
|
131
131
|
@click.argument('res_name')
|
|
@@ -102,16 +102,19 @@ def validate_page(page, **kwargs):
|
|
|
102
102
|
@validate_cli.command('tasks')
|
|
103
103
|
@click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. If omitted, only validate syntax')
|
|
104
104
|
@click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME, help='Basename of the METS file, used in conjunction with --workspace')
|
|
105
|
+
@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server')
|
|
105
106
|
@click.option('--overwrite', is_flag=True, default=False, help='When checking against a concrete workspace, simulate overwriting output or page range.')
|
|
106
107
|
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
|
|
107
108
|
@click.argument('tasks', nargs=-1, required=True)
|
|
108
|
-
def validate_process(tasks, workspace, mets_basename, overwrite, page_id):
|
|
109
|
+
def validate_process(tasks, workspace, mets_basename, mets_server_url, overwrite, page_id):
|
|
109
110
|
'''
|
|
110
111
|
Validate a sequence of tasks passable to `ocrd process`
|
|
111
112
|
'''
|
|
112
113
|
if workspace:
|
|
113
|
-
_inform_of_result(validate_tasks(
|
|
114
|
-
|
|
114
|
+
_inform_of_result(validate_tasks(
|
|
115
|
+
[ProcessorTask.parse(t) for t in tasks],
|
|
116
|
+
Workspace(Resolver(), directory=workspace, mets_basename=mets_basename, mets_server_url=mets_server_url),
|
|
117
|
+
page_id=page_id, overwrite=overwrite))
|
|
115
118
|
else:
|
|
116
119
|
for t in [ProcessorTask.parse(t) for t in tasks]:
|
|
117
120
|
_inform_of_result(t.validate())
|
|
@@ -36,6 +36,17 @@ class WorkspaceCtx():
|
|
|
36
36
|
= self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
|
|
37
37
|
self.automatic_backup = automatic_backup
|
|
38
38
|
|
|
39
|
+
def workspace(self):
|
|
40
|
+
return Workspace(
|
|
41
|
+
self.resolver,
|
|
42
|
+
directory=self.directory,
|
|
43
|
+
mets_basename=self.mets_basename,
|
|
44
|
+
automatic_backup=self.automatic_backup,
|
|
45
|
+
mets_server_url=self.mets_server_url,
|
|
46
|
+
)
|
|
47
|
+
def backup_manager(self):
|
|
48
|
+
return WorkspaceBackupManager(self.workspace())
|
|
49
|
+
|
|
39
50
|
|
|
40
51
|
pass_workspace = click.make_pass_decorator(WorkspaceCtx)
|
|
41
52
|
|
|
@@ -138,6 +149,8 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
|
|
|
138
149
|
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
|
|
139
150
|
ctx.directory = workspace_dir
|
|
140
151
|
|
|
152
|
+
assert not ctx.mets_server_url, \
|
|
153
|
+
f"clone cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
141
154
|
workspace = ctx.resolver.workspace_from_url(
|
|
142
155
|
mets_url,
|
|
143
156
|
dst_dir=ctx.directory,
|
|
@@ -173,10 +186,12 @@ def workspace_init(ctx, clobber_mets, directory):
|
|
|
173
186
|
if directory:
|
|
174
187
|
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory))
|
|
175
188
|
ctx.directory = directory
|
|
189
|
+
assert not ctx.mets_server_url, \
|
|
190
|
+
f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
176
191
|
workspace = ctx.resolver.workspace_from_nothing(
|
|
177
192
|
directory=ctx.directory,
|
|
178
193
|
mets_basename=ctx.mets_basename,
|
|
179
|
-
clobber_mets=clobber_mets
|
|
194
|
+
clobber_mets=clobber_mets,
|
|
180
195
|
)
|
|
181
196
|
workspace.save_mets()
|
|
182
197
|
print(workspace.directory)
|
|
@@ -200,13 +215,7 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
|
|
|
200
215
|
Add a file or http(s) URL FNAME to METS in a workspace.
|
|
201
216
|
If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.
|
|
202
217
|
"""
|
|
203
|
-
workspace =
|
|
204
|
-
ctx.resolver,
|
|
205
|
-
directory=ctx.directory,
|
|
206
|
-
mets_basename=ctx.mets_basename,
|
|
207
|
-
automatic_backup=ctx.automatic_backup,
|
|
208
|
-
mets_server_url=ctx.mets_server_url,
|
|
209
|
-
)
|
|
218
|
+
workspace = ctx.workspace()
|
|
210
219
|
|
|
211
220
|
log = getLogger('ocrd.cli.workspace.add')
|
|
212
221
|
if not mimetype:
|
|
@@ -313,13 +322,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
313
322
|
|
|
314
323
|
"""
|
|
315
324
|
log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
|
|
316
|
-
workspace =
|
|
317
|
-
ctx.resolver,
|
|
318
|
-
directory=ctx.directory,
|
|
319
|
-
mets_basename=ctx.mets_basename,
|
|
320
|
-
automatic_backup=ctx.automatic_backup,
|
|
321
|
-
mets_server_url=ctx.mets_server_url,
|
|
322
|
-
)
|
|
325
|
+
workspace = ctx.workspace()
|
|
323
326
|
|
|
324
327
|
try:
|
|
325
328
|
pat = re.compile(regex)
|
|
@@ -455,12 +458,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
|
|
|
455
458
|
output_field = [snake_to_camel.get(x, x) for x in output_field]
|
|
456
459
|
modified_mets = False
|
|
457
460
|
ret = []
|
|
458
|
-
workspace =
|
|
459
|
-
ctx.resolver,
|
|
460
|
-
directory=ctx.directory,
|
|
461
|
-
mets_basename=ctx.mets_basename,
|
|
462
|
-
mets_server_url=ctx.mets_server_url,
|
|
463
|
-
)
|
|
461
|
+
workspace = ctx.workspace()
|
|
464
462
|
with pushd_popd(workspace.directory):
|
|
465
463
|
for f in workspace.find_files(
|
|
466
464
|
file_id=file_id,
|
|
@@ -510,7 +508,9 @@ def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefin
|
|
|
510
508
|
(If any ``ID`` starts with ``//``, then its remainder
|
|
511
509
|
will be interpreted as a regular expression.)
|
|
512
510
|
"""
|
|
513
|
-
|
|
511
|
+
assert not ctx.mets_server_url, \
|
|
512
|
+
f"remove cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
513
|
+
workspace = ctx.workspace()
|
|
514
514
|
for i in id:
|
|
515
515
|
workspace.remove_file(i, force=force, keep_file=keep_file)
|
|
516
516
|
workspace.save_mets()
|
|
@@ -528,7 +528,9 @@ def rename_group(ctx, old, new):
|
|
|
528
528
|
"""
|
|
529
529
|
Rename fileGrp (USE attribute ``NEW`` to ``OLD``).
|
|
530
530
|
"""
|
|
531
|
-
|
|
531
|
+
assert not ctx.mets_server_url, \
|
|
532
|
+
f"rename-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
533
|
+
workspace = ctx.workspace()
|
|
532
534
|
workspace.rename_file_group(old, new)
|
|
533
535
|
workspace.save_mets()
|
|
534
536
|
|
|
@@ -549,7 +551,9 @@ def remove_group(ctx, group, recursive, force, keep_files):
|
|
|
549
551
|
(If any ``GROUP`` starts with ``//``, then its remainder
|
|
550
552
|
will be interpreted as a regular expression.)
|
|
551
553
|
"""
|
|
552
|
-
|
|
554
|
+
assert not ctx.mets_server_url, \
|
|
555
|
+
f"remove-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
556
|
+
workspace = ctx.workspace()
|
|
553
557
|
for g in group:
|
|
554
558
|
workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
|
|
555
559
|
workspace.save_mets()
|
|
@@ -571,7 +575,9 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id):
|
|
|
571
575
|
(If any ``FILTER`` starts with ``//``, then its remainder
|
|
572
576
|
will be interpreted as a regular expression.)
|
|
573
577
|
"""
|
|
574
|
-
|
|
578
|
+
assert not ctx.mets_server_url, \
|
|
579
|
+
f"prune-files cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
580
|
+
workspace = ctx.workspace()
|
|
575
581
|
with pushd_popd(workspace.directory):
|
|
576
582
|
for f in workspace.find_files(
|
|
577
583
|
file_id=file_id,
|
|
@@ -608,8 +614,7 @@ def clean(ctx, dry_run, directories, path_glob):
|
|
|
608
614
|
If no PATH_GLOB are specified, then all files and directories
|
|
609
615
|
may match.
|
|
610
616
|
"""
|
|
611
|
-
|
|
612
|
-
workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
|
|
617
|
+
workspace = ctx.workspace()
|
|
613
618
|
allowed_files = [normpath(f.local_filename) for f in workspace.find_files(local_only=True)]
|
|
614
619
|
allowed_files.append(relpath(workspace.mets_target, start=workspace.directory))
|
|
615
620
|
allowed_dirs = set(dirname(path) for path in allowed_files)
|
|
@@ -627,7 +632,7 @@ def clean(ctx, dry_run, directories, path_glob):
|
|
|
627
632
|
if normpath(path) in allowed_files:
|
|
628
633
|
continue
|
|
629
634
|
if dry_run:
|
|
630
|
-
log.info('unlink(%s)' % path)
|
|
635
|
+
ctx.log.info('unlink(%s)' % path)
|
|
631
636
|
else:
|
|
632
637
|
unlink(path)
|
|
633
638
|
if not directories:
|
|
@@ -637,7 +642,7 @@ def clean(ctx, dry_run, directories, path_glob):
|
|
|
637
642
|
if normpath(path) in allowed_dirs:
|
|
638
643
|
continue
|
|
639
644
|
if dry_run:
|
|
640
|
-
log.info('rmdir(%s)' % path)
|
|
645
|
+
ctx.log.info('rmdir(%s)' % path)
|
|
641
646
|
else:
|
|
642
647
|
rmdir(path)
|
|
643
648
|
|
|
@@ -651,7 +656,7 @@ def list_groups(ctx):
|
|
|
651
656
|
"""
|
|
652
657
|
List fileGrp USE attributes
|
|
653
658
|
"""
|
|
654
|
-
workspace =
|
|
659
|
+
workspace = ctx.workspace()
|
|
655
660
|
print("\n".join(workspace.mets.file_groups))
|
|
656
661
|
|
|
657
662
|
# ----------------------------------------------------------------------
|
|
@@ -677,20 +682,16 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page
|
|
|
677
682
|
(If any ``FILTER`` starts with ``//``, then its remainder
|
|
678
683
|
will be interpreted as a regular expression.)
|
|
679
684
|
"""
|
|
680
|
-
workspace =
|
|
681
|
-
find_kwargs = {}
|
|
682
|
-
if page_id_range and 'ID' in output_field:
|
|
683
|
-
find_kwargs['pageId'] = page_id_range
|
|
684
|
-
page_ids = sorted({x.pageId for x in workspace.mets.find_files(**find_kwargs) if x.pageId})
|
|
685
|
+
workspace = ctx.workspace()
|
|
685
686
|
ret = []
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
ret = [[x] for x in page_ids]
|
|
689
|
-
else:
|
|
690
|
-
for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=','.join(page_ids), return_divs=True)):
|
|
687
|
+
if page_id_range or list(output_field) != ['ID']:
|
|
688
|
+
for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=page_id_range, return_divs=True)):
|
|
691
689
|
ret.append([])
|
|
692
690
|
for k in output_field:
|
|
693
691
|
ret[i].append(page_div.get(k, 'None'))
|
|
692
|
+
else:
|
|
693
|
+
for page_id in workspace.mets.physical_pages:
|
|
694
|
+
ret.append([page_id])
|
|
694
695
|
|
|
695
696
|
if numeric_range:
|
|
696
697
|
start, end = map(int, numeric_range.split('..'))
|
|
@@ -724,7 +725,7 @@ def get_id(ctx):
|
|
|
724
725
|
"""
|
|
725
726
|
Get METS id if any
|
|
726
727
|
"""
|
|
727
|
-
workspace =
|
|
728
|
+
workspace = ctx.workspace()
|
|
728
729
|
ID = workspace.mets.unique_identifier
|
|
729
730
|
if ID:
|
|
730
731
|
print(ID)
|
|
@@ -744,7 +745,7 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
|
|
|
744
745
|
|
|
745
746
|
Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>.
|
|
746
747
|
"""
|
|
747
|
-
workspace =
|
|
748
|
+
workspace = ctx.workspace()
|
|
748
749
|
workspace.mets.unique_identifier = id
|
|
749
750
|
workspace.save_mets()
|
|
750
751
|
|
|
@@ -767,7 +768,9 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
|
|
|
767
768
|
if contentids:
|
|
768
769
|
update_kwargs['CONTENTIDS'] = contentids
|
|
769
770
|
try:
|
|
770
|
-
|
|
771
|
+
assert not ctx.mets_server_url, \
|
|
772
|
+
f"update-page cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
773
|
+
workspace = ctx.workspace()
|
|
771
774
|
workspace.mets.update_physical_page_attributes(page_id, **update_kwargs)
|
|
772
775
|
workspace.save_mets()
|
|
773
776
|
except Exception as err:
|
|
@@ -805,7 +808,9 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
|
|
|
805
808
|
mets_path = Path(mets_path)
|
|
806
809
|
if filegrp_mapping:
|
|
807
810
|
filegrp_mapping = loads(filegrp_mapping)
|
|
808
|
-
|
|
811
|
+
assert not ctx.mets_server_url, \
|
|
812
|
+
f"merge cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
813
|
+
workspace = ctx.workspace()
|
|
809
814
|
other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name))
|
|
810
815
|
workspace.merge(
|
|
811
816
|
other_workspace,
|
|
@@ -829,11 +834,12 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
|
|
|
829
834
|
# ----------------------------------------------------------------------
|
|
830
835
|
|
|
831
836
|
@workspace_cli.group('backup')
|
|
832
|
-
@
|
|
837
|
+
@pass_workspace
|
|
833
838
|
def workspace_backup_cli(ctx): # pylint: disable=unused-argument
|
|
834
839
|
"""
|
|
835
840
|
Backing and restoring workspaces - dev edition
|
|
836
841
|
"""
|
|
842
|
+
assert not ctx.mets_server_url, "Workspace backups currently not interoperable with METS Server"
|
|
837
843
|
|
|
838
844
|
@workspace_backup_cli.command('add')
|
|
839
845
|
@pass_workspace
|
|
@@ -841,7 +847,7 @@ def workspace_backup_add(ctx):
|
|
|
841
847
|
"""
|
|
842
848
|
Create a new backup
|
|
843
849
|
"""
|
|
844
|
-
backup_manager =
|
|
850
|
+
backup_manager = ctx.backup_manager()
|
|
845
851
|
backup_manager.add()
|
|
846
852
|
|
|
847
853
|
@workspace_backup_cli.command('list')
|
|
@@ -850,7 +856,7 @@ def workspace_backup_list(ctx):
|
|
|
850
856
|
"""
|
|
851
857
|
List backups
|
|
852
858
|
"""
|
|
853
|
-
backup_manager =
|
|
859
|
+
backup_manager = ctx.backup_manager()
|
|
854
860
|
for b in backup_manager.list():
|
|
855
861
|
print(b)
|
|
856
862
|
|
|
@@ -862,7 +868,7 @@ def workspace_backup_restore(ctx, choose_first, bak):
|
|
|
862
868
|
"""
|
|
863
869
|
Restore backup BAK
|
|
864
870
|
"""
|
|
865
|
-
backup_manager =
|
|
871
|
+
backup_manager = ctx.backup_manager()
|
|
866
872
|
backup_manager.restore(bak, choose_first)
|
|
867
873
|
|
|
868
874
|
@workspace_backup_cli.command('undo')
|
|
@@ -871,7 +877,7 @@ def workspace_backup_undo(ctx):
|
|
|
871
877
|
"""
|
|
872
878
|
Restore the last backup
|
|
873
879
|
"""
|
|
874
|
-
backup_manager =
|
|
880
|
+
backup_manager = ctx.backup_manager()
|
|
875
881
|
backup_manager.undo()
|
|
876
882
|
|
|
877
883
|
|
|
@@ -888,15 +894,24 @@ def workspace_serve_cli(ctx): # pylint: disable=unused-argument
|
|
|
888
894
|
@workspace_serve_cli.command('stop')
|
|
889
895
|
@pass_workspace
|
|
890
896
|
def workspace_serve_stop(ctx): # pylint: disable=unused-argument
|
|
891
|
-
"""Stop the METS server"""
|
|
892
|
-
workspace =
|
|
893
|
-
ctx.resolver,
|
|
894
|
-
directory=ctx.directory,
|
|
895
|
-
mets_basename=ctx.mets_basename,
|
|
896
|
-
mets_server_url=ctx.mets_server_url,
|
|
897
|
-
)
|
|
897
|
+
"""Stop the METS server (saving changes to disk)"""
|
|
898
|
+
workspace = ctx.workspace()
|
|
898
899
|
workspace.mets.stop()
|
|
899
900
|
|
|
901
|
+
@workspace_serve_cli.command('reload')
|
|
902
|
+
@pass_workspace
|
|
903
|
+
def workspace_serve_reload(ctx): # pylint: disable=unused-argument
|
|
904
|
+
"""Reload the METS server from disk"""
|
|
905
|
+
workspace = ctx.workspace()
|
|
906
|
+
workspace.mets.reload()
|
|
907
|
+
|
|
908
|
+
@workspace_serve_cli.command('save')
|
|
909
|
+
@pass_workspace
|
|
910
|
+
def workspace_serve_save(ctx): # pylint: disable=unused-argument
|
|
911
|
+
"""Save the METS changes to disk"""
|
|
912
|
+
workspace = ctx.workspace()
|
|
913
|
+
workspace.mets.save()
|
|
914
|
+
|
|
900
915
|
@workspace_serve_cli.command('start')
|
|
901
916
|
@pass_workspace
|
|
902
917
|
def workspace_serve_start(ctx): # pylint: disable=unused-argument
|
|
@@ -13,7 +13,6 @@ from ocrd_utils import (
|
|
|
13
13
|
redirect_stderr_and_stdout_to_file,
|
|
14
14
|
)
|
|
15
15
|
from ocrd_validators import WorkspaceValidator
|
|
16
|
-
from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
|
|
17
16
|
|
|
18
17
|
from ..resolver import Resolver
|
|
19
18
|
from ..processor.base import ResourceNotFoundError, run_processor
|
|
@@ -23,8 +22,6 @@ from .parameter_option import parameter_option, parameter_override_option
|
|
|
23
22
|
from .ocrd_cli_options import ocrd_cli_options
|
|
24
23
|
from .mets_find_options import mets_find_options
|
|
25
24
|
|
|
26
|
-
SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
|
|
27
|
-
|
|
28
25
|
|
|
29
26
|
def ocrd_cli_wrap_processor(
|
|
30
27
|
processorClass,
|
|
@@ -88,11 +85,9 @@ def ocrd_cli_wrap_processor(
|
|
|
88
85
|
if list_resources:
|
|
89
86
|
processor.list_resources()
|
|
90
87
|
sys.exit()
|
|
91
|
-
if subcommand:
|
|
88
|
+
if subcommand or address or queue or database:
|
|
92
89
|
# Used for checking/starting network agents for the WebAPI architecture
|
|
93
90
|
check_and_run_network_agent(processorClass, subcommand, address, database, queue)
|
|
94
|
-
elif address or queue or database:
|
|
95
|
-
raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
|
|
96
91
|
|
|
97
92
|
# from here: single-run processing context
|
|
98
93
|
initLogging()
|
|
@@ -162,6 +157,11 @@ def ocrd_cli_wrap_processor(
|
|
|
162
157
|
def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
|
|
163
158
|
"""
|
|
164
159
|
"""
|
|
160
|
+
from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
|
|
161
|
+
SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
|
|
162
|
+
|
|
163
|
+
if not subcommand:
|
|
164
|
+
raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
|
|
165
165
|
if subcommand not in SUBCOMMANDS:
|
|
166
166
|
raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
|
|
167
167
|
|
|
@@ -43,6 +43,7 @@ def ocrd_cli_options(f):
|
|
|
43
43
|
option('--address', type=ServerAddressParamType()),
|
|
44
44
|
option('--queue', type=QueueServerParamType()),
|
|
45
45
|
option('--database', type=DatabaseParamType()),
|
|
46
|
+
option('-R', '--resolve-resource'),
|
|
46
47
|
option('-C', '--show-resource'),
|
|
47
48
|
option('-L', '--list-resources', is_flag=True, default=False),
|
|
48
49
|
option('-J', '--dump-json', is_flag=True, default=False),
|
|
@@ -27,8 +27,8 @@ ocrd__log () {
|
|
|
27
27
|
## Ensure minimum version
|
|
28
28
|
# ht https://stackoverflow.com/posts/4025065
|
|
29
29
|
ocrd__minversion () {
|
|
30
|
-
local minversion_raw="$1"
|
|
31
30
|
set -e
|
|
31
|
+
local minversion_raw="$1"
|
|
32
32
|
local version_raw=$(ocrd --version|sed 's/ocrd, version //')
|
|
33
33
|
local version_mmp=$(echo "$version_raw" | grep -Eo '([0-9]+\.?){3}')
|
|
34
34
|
local version_prerelease_suffix="${version_raw#$version_mmp}"
|
|
@@ -123,6 +123,7 @@ ocrd__usage () {
|
|
|
123
123
|
## declare -A ocrd__argv=()
|
|
124
124
|
## ```
|
|
125
125
|
ocrd__parse_argv () {
|
|
126
|
+
set -e
|
|
126
127
|
|
|
127
128
|
# if [[ -n "$ZSH_VERSION" ]];then
|
|
128
129
|
# print -r -- ${+ocrd__argv} ${(t)ocrd__argv}
|
|
@@ -135,11 +136,16 @@ ocrd__parse_argv () {
|
|
|
135
136
|
ocrd__raise "Must set \$params (declare -A params)"
|
|
136
137
|
fi
|
|
137
138
|
|
|
139
|
+
if ! declare -p "params_json" >/dev/null 2>/dev/null ;then
|
|
140
|
+
ocrd__raise "Must set \$params_json (declare params_json)"
|
|
141
|
+
fi
|
|
142
|
+
|
|
138
143
|
if [[ $# = 0 ]];then
|
|
139
144
|
ocrd__usage
|
|
140
145
|
exit 1
|
|
141
146
|
fi
|
|
142
147
|
|
|
148
|
+
ocrd__argv[debug]=false
|
|
143
149
|
ocrd__argv[overwrite]=false
|
|
144
150
|
ocrd__argv[profile]=false
|
|
145
151
|
ocrd__argv[profile_file]=
|
|
@@ -170,6 +176,7 @@ ocrd__parse_argv () {
|
|
|
170
176
|
-w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
|
|
171
177
|
-m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
|
|
172
178
|
-U|--mets-server-url) ocrd__argv[mets_server_url]="$2" ; shift ;;
|
|
179
|
+
--debug) ocrd__argv[debug]=true ;;
|
|
173
180
|
--overwrite) ocrd__argv[overwrite]=true ;;
|
|
174
181
|
--profile) ocrd__argv[profile]=true ;;
|
|
175
182
|
--profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
|
|
@@ -242,17 +249,6 @@ ocrd__parse_argv () {
|
|
|
242
249
|
trap showtime DEBUG
|
|
243
250
|
fi
|
|
244
251
|
|
|
245
|
-
# check fileGrps
|
|
246
|
-
local _valopts=( --workspace "${ocrd__argv[working_dir]}" --mets-basename "$(basename ${ocrd__argv[mets_file]})" )
|
|
247
|
-
if [[ ${ocrd__argv[overwrite]} = true ]]; then
|
|
248
|
-
_valopts+=( --overwrite )
|
|
249
|
-
fi
|
|
250
|
-
if [[ -n "${ocrd__argv[page_id]:-}" ]]; then
|
|
251
|
-
_valopts+=( --page-id "${ocrd__argv[page_id]}" )
|
|
252
|
-
fi
|
|
253
|
-
_valopts+=( "${OCRD_TOOL_NAME#ocrd-} -I ${ocrd__argv[input_file_grp]} -O ${ocrd__argv[output_file_grp]} ${__parameters[*]@Q} ${__parameter_overrides[*]@Q}" )
|
|
254
|
-
ocrd validate tasks "${_valopts[@]}" || exit $?
|
|
255
|
-
|
|
256
252
|
# check parameters
|
|
257
253
|
local params_parsed retval
|
|
258
254
|
params_parsed="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params "${__parameters[@]}" "${__parameter_overrides[@]}")" || {
|
|
@@ -261,10 +257,12 @@ ocrd__parse_argv () {
|
|
|
261
257
|
$params_parsed"
|
|
262
258
|
}
|
|
263
259
|
eval "$params_parsed"
|
|
260
|
+
params_json="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params --json "${__parameters[@]}" "${__parameter_overrides[@]}")"
|
|
264
261
|
|
|
265
262
|
}
|
|
266
263
|
|
|
267
264
|
ocrd__wrap () {
|
|
265
|
+
set -e
|
|
268
266
|
|
|
269
267
|
declare -gx OCRD_TOOL_JSON="$1"
|
|
270
268
|
declare -gx OCRD_TOOL_NAME="$2"
|
|
@@ -272,6 +270,7 @@ ocrd__wrap () {
|
|
|
272
270
|
shift
|
|
273
271
|
declare -Agx params
|
|
274
272
|
params=()
|
|
273
|
+
declare -g params_json
|
|
275
274
|
declare -Agx ocrd__argv
|
|
276
275
|
ocrd__argv=()
|
|
277
276
|
|
|
@@ -293,22 +292,26 @@ ocrd__wrap () {
|
|
|
293
292
|
|
|
294
293
|
ocrd__parse_argv "$@"
|
|
295
294
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
eval declare -Ag "ocrd__file$i=( $line )"
|
|
300
|
-
eval "ocrd__files[$i]=ocrd__file$i"
|
|
301
|
-
let ++i
|
|
302
|
-
done < <(ocrd bashlib input-files \
|
|
295
|
+
declare -ag ocrd__files
|
|
296
|
+
IFS=$'\n'
|
|
297
|
+
ocrd__files=( $(ocrd bashlib input-files \
|
|
303
298
|
--ocrd-tool $OCRD_TOOL_JSON \
|
|
304
299
|
--executable $OCRD_TOOL_NAME \
|
|
300
|
+
$(if [[ ${ocrd__argv[debug]} = true ]]; then echo --debug; fi) \
|
|
301
|
+
$(if [[ ${ocrd__argv[overwrite]} = true ]]; then echo --overwrite; fi) \
|
|
305
302
|
-m "${ocrd__argv[mets_file]}" \
|
|
303
|
+
-d "${ocrd__argv[working_dir]}" \
|
|
304
|
+
${ocrd__argv[mets_server_url]:+-U} ${ocrd__argv[mets_server_url]:-} \
|
|
305
|
+
-p "$params_json" \
|
|
306
306
|
-I "${ocrd__argv[input_file_grp]}" \
|
|
307
307
|
-O "${ocrd__argv[output_file_grp]}" \
|
|
308
|
-
${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-})
|
|
308
|
+
${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) )
|
|
309
|
+
IFS=$' \t\n'
|
|
309
310
|
}
|
|
310
311
|
|
|
311
312
|
## usage: pageId=$(ocrd__input_file 3 pageId)
|
|
312
313
|
ocrd__input_file() {
|
|
313
|
-
|
|
314
|
+
declare -A input_file
|
|
315
|
+
eval input_file=( "${ocrd__files[$1]}" )
|
|
316
|
+
eval echo "${input_file[$2]}"
|
|
314
317
|
}
|
|
@@ -88,6 +88,14 @@ class OcrdFileGroupListModel(BaseModel):
|
|
|
88
88
|
return OcrdFileGroupListModel(file_groups=file_groups)
|
|
89
89
|
|
|
90
90
|
|
|
91
|
+
class OcrdPageListModel(BaseModel):
|
|
92
|
+
physical_pages: List[str] = Field()
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def create(physical_pages: List[str]):
|
|
96
|
+
return OcrdPageListModel(physical_pages=physical_pages)
|
|
97
|
+
|
|
98
|
+
|
|
91
99
|
class OcrdAgentListModel(BaseModel):
|
|
92
100
|
agents: List[OcrdAgentModel] = Field()
|
|
93
101
|
|
|
@@ -210,6 +218,17 @@ class ClientSideOcrdMets:
|
|
|
210
218
|
).json()["text"]
|
|
211
219
|
return self.ws_dir_path
|
|
212
220
|
|
|
221
|
+
@property
|
|
222
|
+
def physical_pages(self) -> List[str]:
|
|
223
|
+
if not self.multiplexing_mode:
|
|
224
|
+
return self.session.request("GET", f"{self.url}/physical_pages").json()["physical_pages"]
|
|
225
|
+
else:
|
|
226
|
+
return self.session.request(
|
|
227
|
+
"POST",
|
|
228
|
+
self.url,
|
|
229
|
+
json=MpxReq.physical_pages(self.ws_dir_path)
|
|
230
|
+
).json()["physical_pages"]
|
|
231
|
+
|
|
213
232
|
@property
|
|
214
233
|
def file_groups(self):
|
|
215
234
|
if not self.multiplexing_mode:
|
|
@@ -284,15 +303,17 @@ class ClientSideOcrdMets:
|
|
|
284
303
|
file_id=ID, page_id=pageId,
|
|
285
304
|
mimetype=mimetype, url=url, local_filename=local_filename
|
|
286
305
|
)
|
|
306
|
+
# add force+ignore
|
|
307
|
+
kwargs = {**kwargs, **data.dict()}
|
|
287
308
|
|
|
288
309
|
if not self.multiplexing_mode:
|
|
289
|
-
r = self.session.request("POST", f"{self.url}/file", data=
|
|
290
|
-
if not r:
|
|
291
|
-
raise RuntimeError("
|
|
310
|
+
r = self.session.request("POST", f"{self.url}/file", data=kwargs)
|
|
311
|
+
if not r.ok:
|
|
312
|
+
raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()}")
|
|
292
313
|
else:
|
|
293
|
-
r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path,
|
|
294
|
-
if
|
|
295
|
-
raise RuntimeError(f"
|
|
314
|
+
r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, kwargs))
|
|
315
|
+
if not r.ok:
|
|
316
|
+
raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()[errors]}")
|
|
296
317
|
|
|
297
318
|
return ClientSideOcrdFile(
|
|
298
319
|
None, fileGrp=file_grp,
|
|
@@ -347,6 +368,11 @@ class MpxReq:
|
|
|
347
368
|
return MpxReq.__args_wrapper(
|
|
348
369
|
ws_dir_path, method_type="GET", response_type="text", request_url="workspace_path", request_data={})
|
|
349
370
|
|
|
371
|
+
@staticmethod
|
|
372
|
+
def physical_pages(ws_dir_path: str) -> Dict:
|
|
373
|
+
return MpxReq.__args_wrapper(
|
|
374
|
+
ws_dir_path, method_type="GET", response_type="dict", request_url="physical_pages", request_data={})
|
|
375
|
+
|
|
350
376
|
@staticmethod
|
|
351
377
|
def file_groups(ws_dir_path: str) -> Dict:
|
|
352
378
|
return MpxReq.__args_wrapper(
|
|
@@ -466,6 +492,10 @@ class OcrdMetsServer:
|
|
|
466
492
|
async def workspace_path():
|
|
467
493
|
return Response(content=workspace.directory, media_type="text/plain")
|
|
468
494
|
|
|
495
|
+
@app.get(path='/physical_pages', response_model=OcrdPageListModel)
|
|
496
|
+
async def physical_pages():
|
|
497
|
+
return {'physical_pages': workspace.mets.physical_pages}
|
|
498
|
+
|
|
469
499
|
@app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
|
|
470
500
|
async def file_groups():
|
|
471
501
|
return {'file_groups': workspace.mets.file_groups}
|
|
@@ -505,7 +535,8 @@ class OcrdMetsServer:
|
|
|
505
535
|
page_id: Optional[str] = Form(),
|
|
506
536
|
mimetype: str = Form(),
|
|
507
537
|
url: Optional[str] = Form(None),
|
|
508
|
-
local_filename: Optional[str] = Form(None)
|
|
538
|
+
local_filename: Optional[str] = Form(None),
|
|
539
|
+
force: bool = Form(False),
|
|
509
540
|
):
|
|
510
541
|
"""
|
|
511
542
|
Add a file
|
|
@@ -517,7 +548,7 @@ class OcrdMetsServer:
|
|
|
517
548
|
)
|
|
518
549
|
# Add to workspace
|
|
519
550
|
kwargs = file_resource.dict()
|
|
520
|
-
workspace.add_file(**kwargs)
|
|
551
|
+
workspace.add_file(**kwargs, force=force)
|
|
521
552
|
return file_resource
|
|
522
553
|
|
|
523
554
|
# ------------- #
|