ocrd 3.0.0a2__tar.gz → 3.0.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocrd-3.0.0a2/src/ocrd.egg-info → ocrd-3.0.0b1}/PKG-INFO +1 -1
- ocrd-3.0.0b1/VERSION +1 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/__init__.py +34 -26
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/bashlib.py +32 -18
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/ocrd_tool.py +7 -5
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/workspace.py +10 -8
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/__init__.py +13 -7
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/lib.bash +2 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/mets_server.py +2 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/base.py +163 -63
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy_processor.py +4 -11
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/helpers.py +23 -17
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/ocrd_page_result.py +3 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resolver.py +0 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resource_manager.py +9 -5
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace.py +8 -9
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace_backup.py +1 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1/src/ocrd.egg-info}/PKG-INFO +1 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/SOURCES.txt +1 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_modelfactory/__init__.py +1 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/constants.py +0 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_exif.py +2 -2
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_file.py +2 -2
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_mets.py +22 -22
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_page.py +0 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_xml_base.py +2 -2
- ocrd-3.0.0b1/src/ocrd_network/cli/client.py +203 -0
- ocrd-3.0.0b1/src/ocrd_network/client.py +63 -0
- ocrd-3.0.0b1/src/ocrd_network/client_utils.py +101 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processing_server.py +1 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/deployer.py +12 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/server_utils.py +12 -10
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/__init__.py +2 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/config.py +16 -2
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/image.py +25 -25
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/logging.py +17 -19
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/os.py +4 -5
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/str.py +10 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/json_validator.py +1 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_tool_validator.py +2 -2
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/page_validator.py +56 -56
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/parameter_validator.py +2 -2
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/resource_list_validator.py +4 -3
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/workspace_validator.py +21 -21
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_validator.py +1 -1
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_workspace.py +1 -1
- ocrd-3.0.0a2/VERSION +0 -1
- ocrd-3.0.0a2/src/ocrd_network/cli/client.py +0 -99
- ocrd-3.0.0a2/src/ocrd_network/client.py +0 -37
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/LICENSE +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/MANIFEST.in +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_bashlib.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_modelfactory.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_models.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_network.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_utils.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/README_ocrd_validators.md +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/pyproject.toml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/requirements.txt +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/setup.cfg +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/log.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/network.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/process.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/resmgr.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/validate.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/cli/zip.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/constants.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/loglevel_option.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/mets_find_options.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/ocrd_cli_options.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/decorators/parameter_option.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/ocrd-all-tool.json +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/processor/builtin/dummy/ocrd-tool.json +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/resource_list.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/task_sequence.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd/workspace_bagger.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/dependency_links.txt +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/entry_points.txt +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/requires.txt +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd.egg-info/top_level.txt +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/mets-empty.xml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_agent.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/ocrd_page_generateds.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/report.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_models/utils.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processing_server.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processing_worker.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/cli/processor_server.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/constants.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/database.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/logging_utils.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/job.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/messages.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/ocrd_tool.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/workflow.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/models/workspace.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/param_validators.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/process_helpers.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processing_worker.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/processor_server.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/connector.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/constants.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/consumer.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/helpers.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/ocrd_messages.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/rabbitmq_utils/publisher.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/config_parser.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/connection_clients.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/hosts.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/network_agents.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/runtime_data/network_services.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/server_cache.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/tcp_to_uds_mets_proxy.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_network/utils.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/constants.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/deprecate.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/introspect.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_utils/ocrd_logging.conf +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/__init__.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/bagit-profile.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/constants.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/message_processing.schema.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/message_result.schema.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/mets.xsd +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_network_message_validator.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_tool.schema.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/ocrd_zip_validator.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/page.xsd +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/processing_server_config.schema.yml +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/processing_server_config_validator.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xlink.xsd +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_mets_validator.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/src/ocrd_validators/xsd_page_validator.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_decorators.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_logging.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_logging_conf.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_mets_server.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_model_factory.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resolver.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resolver_oai.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_resource_manager.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_task_sequence.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_utils.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_version.py +0 -0
- {ocrd-3.0.0a2 → ocrd-3.0.0b1}/tests/test_workspace_remove.py +0 -0
ocrd-3.0.0b1/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.0.0b1
|
|
@@ -10,6 +10,36 @@ import click
|
|
|
10
10
|
|
|
11
11
|
from ocrd_utils import config
|
|
12
12
|
|
|
13
|
+
# pylint: disable=wrong-import-position
|
|
14
|
+
|
|
15
|
+
def command_with_replaced_help(*replacements):
|
|
16
|
+
|
|
17
|
+
class CommandWithReplacedHelp(click.Command):
|
|
18
|
+
def get_help(self, ctx):
|
|
19
|
+
newhelp = super().get_help(ctx)
|
|
20
|
+
for replacement in replacements:
|
|
21
|
+
newhelp = re.sub(*replacement, newhelp)
|
|
22
|
+
# print(newhelp)
|
|
23
|
+
return newhelp
|
|
24
|
+
|
|
25
|
+
return CommandWithReplacedHelp
|
|
26
|
+
|
|
27
|
+
# pylint: enable=wrong-import-position
|
|
28
|
+
|
|
29
|
+
from ..decorators import ocrd_loglevel
|
|
30
|
+
from .ocrd_tool import ocrd_tool_cli
|
|
31
|
+
from .workspace import workspace_cli
|
|
32
|
+
from .process import process_cli
|
|
33
|
+
from .bashlib import bashlib_cli
|
|
34
|
+
from .validate import validate_cli
|
|
35
|
+
from .resmgr import resmgr_cli
|
|
36
|
+
from .zip import zip_cli
|
|
37
|
+
from .log import log_cli
|
|
38
|
+
from .network import network_cli
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
__all__ = ['cli']
|
|
42
|
+
|
|
13
43
|
_epilog = f"""
|
|
14
44
|
|
|
15
45
|
\b
|
|
@@ -41,6 +71,10 @@ Variables:
|
|
|
41
71
|
\b
|
|
42
72
|
{config.describe('OCRD_MAX_PROCESSOR_CACHE')}
|
|
43
73
|
\b
|
|
74
|
+
{config.describe('OCRD_NETWORK_CLIENT_POLLING_SLEEP')}
|
|
75
|
+
\b
|
|
76
|
+
{config.describe('OCRD_NETWORK_CLIENT_POLLING_TIMEOUT')}
|
|
77
|
+
\b
|
|
44
78
|
{config.describe('OCRD_NETWORK_SERVER_ADDR_PROCESSING')}
|
|
45
79
|
\b
|
|
46
80
|
{config.describe('OCRD_NETWORK_SERVER_ADDR_WORKFLOW')}
|
|
@@ -60,30 +94,6 @@ Variables:
|
|
|
60
94
|
{config.describe('OCRD_LOGGING_DEBUG')}
|
|
61
95
|
"""
|
|
62
96
|
|
|
63
|
-
def command_with_replaced_help(*replacements):
|
|
64
|
-
|
|
65
|
-
class CommandWithReplacedHelp(click.Command):
|
|
66
|
-
def get_help(self, ctx):
|
|
67
|
-
help = super().get_help(ctx)
|
|
68
|
-
for replacement in replacements:
|
|
69
|
-
help = re.sub(*replacement, help)
|
|
70
|
-
# print(help)
|
|
71
|
-
return help
|
|
72
|
-
|
|
73
|
-
return CommandWithReplacedHelp
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
from ..decorators import ocrd_loglevel
|
|
77
|
-
from .ocrd_tool import ocrd_tool_cli
|
|
78
|
-
from .workspace import workspace_cli
|
|
79
|
-
from .process import process_cli
|
|
80
|
-
from .bashlib import bashlib_cli
|
|
81
|
-
from .validate import validate_cli
|
|
82
|
-
from .resmgr import resmgr_cli
|
|
83
|
-
from .zip import zip_cli
|
|
84
|
-
from .log import log_cli
|
|
85
|
-
from .network import network_cli
|
|
86
|
-
|
|
87
97
|
@click.group(epilog=_epilog)
|
|
88
98
|
@click.version_option(package_name='ocrd')
|
|
89
99
|
@ocrd_loglevel
|
|
@@ -101,5 +111,3 @@ cli.add_command(validate_cli)
|
|
|
101
111
|
cli.add_command(log_cli)
|
|
102
112
|
cli.add_command(resmgr_cli)
|
|
103
113
|
cli.add_command(network_cli)
|
|
104
|
-
|
|
105
|
-
__all__ = ['cli']
|
|
@@ -8,7 +8,6 @@ OCR-D CLI: bash library
|
|
|
8
8
|
"""
|
|
9
9
|
from __future__ import print_function
|
|
10
10
|
import sys
|
|
11
|
-
from os.path import isfile
|
|
12
11
|
import click
|
|
13
12
|
|
|
14
13
|
from ocrd.constants import BASHLIB_FILENAME
|
|
@@ -23,15 +22,7 @@ from ocrd.decorators import (
|
|
|
23
22
|
ocrd_loglevel,
|
|
24
23
|
ocrd_cli_wrap_processor
|
|
25
24
|
)
|
|
26
|
-
from ocrd_utils import
|
|
27
|
-
is_local_filename,
|
|
28
|
-
get_local_filename,
|
|
29
|
-
initLogging,
|
|
30
|
-
getLogger,
|
|
31
|
-
make_file_id,
|
|
32
|
-
config
|
|
33
|
-
)
|
|
34
|
-
from ocrd.resolver import Resolver
|
|
25
|
+
from ocrd_utils import make_file_id
|
|
35
26
|
from ocrd.processor import Processor
|
|
36
27
|
|
|
37
28
|
# ----------------------------------------------------------------------
|
|
@@ -82,6 +73,8 @@ def bashlib_constants(name):
|
|
|
82
73
|
print(val)
|
|
83
74
|
|
|
84
75
|
@bashlib_cli.command('input-files')
|
|
76
|
+
@click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
|
|
77
|
+
@click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
|
|
85
78
|
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
|
|
86
79
|
@click.option('-w', '--working-dir', help="Working Directory")
|
|
87
80
|
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
|
|
@@ -96,7 +89,7 @@ def bashlib_constants(name):
|
|
|
96
89
|
@parameter_option
|
|
97
90
|
@parameter_override_option
|
|
98
91
|
@ocrd_loglevel
|
|
99
|
-
def bashlib_input_files(**kwargs):
|
|
92
|
+
def bashlib_input_files(ocrd_tool, executable, **kwargs):
|
|
100
93
|
"""
|
|
101
94
|
List input files for processing
|
|
102
95
|
|
|
@@ -108,12 +101,6 @@ def bashlib_input_files(**kwargs):
|
|
|
108
101
|
(The printing format is one associative array initializer per line.)
|
|
109
102
|
"""
|
|
110
103
|
class BashlibProcessor(Processor):
|
|
111
|
-
@property
|
|
112
|
-
def ocrd_tool(self):
|
|
113
|
-
return {'executable': '', 'steps': ['']}
|
|
114
|
-
@property
|
|
115
|
-
def version(self):
|
|
116
|
-
return '1.0'
|
|
117
104
|
# go half way of the normal run_processor / process_workspace call tree
|
|
118
105
|
# by just delegating to process_workspace, overriding process_page_file
|
|
119
106
|
# to ensure all input files exist locally (without persisting them in the METS)
|
|
@@ -129,4 +116,31 @@ def bashlib_input_files(**kwargs):
|
|
|
129
116
|
print(f"[{field}]='{value}'", end=' ')
|
|
130
117
|
output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
|
|
131
118
|
print(f"[outputFileId]='{output_file_id}'")
|
|
132
|
-
|
|
119
|
+
if ocrd_tool and executable:
|
|
120
|
+
class FullBashlibProcessor(BashlibProcessor):
|
|
121
|
+
@property
|
|
122
|
+
def metadata_location(self):
|
|
123
|
+
# needed for metadata loading and validation mechanism
|
|
124
|
+
return ocrd_tool
|
|
125
|
+
@property
|
|
126
|
+
def executable(self):
|
|
127
|
+
# needed for ocrd_tool lookup
|
|
128
|
+
return executable
|
|
129
|
+
else:
|
|
130
|
+
# we have no true metadata file, so fill in just to make it work
|
|
131
|
+
class FullBashlibProcessor(BashlibProcessor):
|
|
132
|
+
@property
|
|
133
|
+
def ocrd_tool(self):
|
|
134
|
+
# needed to satisfy the validator
|
|
135
|
+
return {'executable': '',
|
|
136
|
+
# required now
|
|
137
|
+
'input_file_grp_cardinality': 1,
|
|
138
|
+
'output_file_grp_cardinality': 1,
|
|
139
|
+
'steps': ['']
|
|
140
|
+
}
|
|
141
|
+
@property
|
|
142
|
+
def version(self):
|
|
143
|
+
# needed to satisfy the validator and wrapper
|
|
144
|
+
return '1.0'
|
|
145
|
+
|
|
146
|
+
ocrd_cli_wrap_processor(FullBashlibProcessor, **kwargs)
|
|
@@ -17,7 +17,6 @@ from ocrd.decorators import parameter_option, parameter_override_option
|
|
|
17
17
|
from ocrd.processor import Processor
|
|
18
18
|
from ocrd_utils import (
|
|
19
19
|
set_json_key_value_overrides,
|
|
20
|
-
VERSION as OCRD_VERSION,
|
|
21
20
|
parse_json_string_or_file,
|
|
22
21
|
parse_json_string_with_comments as loads
|
|
23
22
|
)
|
|
@@ -29,23 +28,26 @@ class OcrdToolCtx():
|
|
|
29
28
|
self.filename = filename
|
|
30
29
|
with codecs.open(filename, encoding='utf-8') as f:
|
|
31
30
|
self.content = f.read()
|
|
31
|
+
# perhaps the validator should _always_ run (for default expansion)
|
|
32
|
+
# so validate command only for the report?
|
|
32
33
|
self.json = loads(self.content)
|
|
34
|
+
self.tool_name = ''
|
|
33
35
|
|
|
34
36
|
class BashProcessor(Processor):
|
|
35
37
|
@property
|
|
36
|
-
def metadata(inner_self):
|
|
38
|
+
def metadata(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
37
39
|
return self.json
|
|
38
40
|
@property
|
|
39
|
-
def executable(inner_self):
|
|
41
|
+
def executable(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
40
42
|
return self.tool_name
|
|
41
43
|
@property
|
|
42
|
-
def moduledir(inner_self):
|
|
44
|
+
def moduledir(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
43
45
|
return os.path.dirname(self.filename)
|
|
44
46
|
# set docstrings to empty
|
|
45
47
|
__doc__ = None
|
|
46
48
|
# HACK: override the module-level docstring, too
|
|
47
49
|
getmodule(OcrdToolCtx).__doc__ = None
|
|
48
|
-
def process(inner_self):
|
|
50
|
+
def process(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
49
51
|
return super()
|
|
50
52
|
|
|
51
53
|
self.processor = BashProcessor
|
|
@@ -6,7 +6,7 @@ OCR-D CLI: workspace management
|
|
|
6
6
|
:nested: full
|
|
7
7
|
"""
|
|
8
8
|
import os
|
|
9
|
-
from os import
|
|
9
|
+
from os import rmdir, unlink
|
|
10
10
|
from os.path import dirname, relpath, normpath, exists, join, isabs, isdir
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from json import loads, dumps
|
|
@@ -14,7 +14,6 @@ import sys
|
|
|
14
14
|
from glob import glob # XXX pathlib.Path.glob does not support absolute globs
|
|
15
15
|
import re
|
|
16
16
|
import time
|
|
17
|
-
import numpy as np
|
|
18
17
|
|
|
19
18
|
import click
|
|
20
19
|
|
|
@@ -118,7 +117,7 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
|
|
|
118
117
|
@workspace_cli.command('clone', cls=command_with_replaced_help(
|
|
119
118
|
(r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
|
|
120
119
|
@click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
|
|
121
|
-
@click.option('-a', '--download', is_flag=True, help="Download all files and
|
|
120
|
+
@click.option('-a', '--download', is_flag=True, help="Download all selected files and add local path references in METS file afterwards")
|
|
122
121
|
@click.argument('mets_url')
|
|
123
122
|
@mets_find_options
|
|
124
123
|
# XXX deprecated
|
|
@@ -129,8 +128,10 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
|
|
|
129
128
|
Create a workspace from METS_URL and return the directory
|
|
130
129
|
|
|
131
130
|
METS_URL can be a URL, an absolute path or a path relative to $PWD.
|
|
132
|
-
If METS_URL is not provided, use --mets accordingly.
|
|
133
131
|
METS_URL can also be an OAI-PMH GetRecord URL wrapping a METS file.
|
|
132
|
+
|
|
133
|
+
Additional options pertain to the selection of files / fileGrps / pages
|
|
134
|
+
to be downloaded, if --download is used.
|
|
134
135
|
"""
|
|
135
136
|
LOG = getLogger('ocrd.cli.workspace.clone')
|
|
136
137
|
if workspace_dir:
|
|
@@ -143,6 +144,7 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
|
|
|
143
144
|
mets_basename=ctx.mets_basename,
|
|
144
145
|
clobber_mets=clobber_mets,
|
|
145
146
|
download=download,
|
|
147
|
+
fileGrp=file_grp,
|
|
146
148
|
ID=file_id,
|
|
147
149
|
pageId=page_id,
|
|
148
150
|
mimetype=mimetype,
|
|
@@ -408,7 +410,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
408
410
|
if dry_run:
|
|
409
411
|
log.info('workspace.add_file(%s)' % file_dict)
|
|
410
412
|
else:
|
|
411
|
-
workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)
|
|
413
|
+
workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
|
|
412
414
|
|
|
413
415
|
# save changes to disk
|
|
414
416
|
workspace.save_mets()
|
|
@@ -452,7 +454,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
|
|
|
452
454
|
snake_to_camel = {"file_id": "ID", "page_id": "pageId", "file_grp": "fileGrp"}
|
|
453
455
|
output_field = [snake_to_camel.get(x, x) for x in output_field]
|
|
454
456
|
modified_mets = False
|
|
455
|
-
ret =
|
|
457
|
+
ret = []
|
|
456
458
|
workspace = Workspace(
|
|
457
459
|
ctx.resolver,
|
|
458
460
|
directory=ctx.directory,
|
|
@@ -748,7 +750,7 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
|
|
|
748
750
|
|
|
749
751
|
@workspace_cli.command('update-page')
|
|
750
752
|
@click.option('--set', 'attr_value_pairs', help=f"set mets:div ATTR to VALUE. possible keys: {METS_PAGE_DIV_ATTRIBUTE.names()}", metavar="ATTR VALUE", nargs=2, multiple=True)
|
|
751
|
-
@click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
|
|
753
|
+
@click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
|
|
752
754
|
@click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
|
|
753
755
|
@click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
|
|
754
756
|
@click.argument('PAGE_ID')
|
|
@@ -757,7 +759,7 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
|
|
|
757
759
|
"""
|
|
758
760
|
Update the @ID, @ORDER, @ORDERLABEL, @LABEL or @CONTENTIDS attributes of the mets:div with @ID=PAGE_ID
|
|
759
761
|
"""
|
|
760
|
-
update_kwargs =
|
|
762
|
+
update_kwargs = dict(attr_value_pairs)
|
|
761
763
|
if order:
|
|
762
764
|
update_kwargs['ORDER'] = order
|
|
763
765
|
if orderlabel:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import sys
|
|
2
|
+
from contextlib import nullcontext
|
|
2
3
|
|
|
3
4
|
from ocrd_utils import (
|
|
4
5
|
config,
|
|
@@ -9,6 +10,7 @@ from ocrd_utils import (
|
|
|
9
10
|
parse_json_string_with_comments,
|
|
10
11
|
set_json_key_value_overrides,
|
|
11
12
|
parse_json_string_or_file,
|
|
13
|
+
redirect_stderr_and_stdout_to_file,
|
|
12
14
|
)
|
|
13
15
|
from ocrd_validators import WorkspaceValidator
|
|
14
16
|
from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
|
|
@@ -104,10 +106,10 @@ def ocrd_cli_wrap_processor(
|
|
|
104
106
|
kwargs['parameter'] = parse_json_string_or_file(*kwargs['parameter'],
|
|
105
107
|
resolve_preset_file=resolve)
|
|
106
108
|
else:
|
|
107
|
-
kwargs['parameter'] =
|
|
109
|
+
kwargs['parameter'] = {}
|
|
108
110
|
# Merge parameter overrides and parameters
|
|
109
111
|
if 'parameter_override' in kwargs:
|
|
110
|
-
set_json_key_value_overrides(kwargs['parameter'], *kwargs
|
|
112
|
+
set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override'))
|
|
111
113
|
# Assert -I / -O
|
|
112
114
|
if not kwargs['input_file_grp']:
|
|
113
115
|
raise ValueError('-I/--input-file-grp is required')
|
|
@@ -140,17 +142,21 @@ def ocrd_cli_wrap_processor(
|
|
|
140
142
|
print("Profiling...")
|
|
141
143
|
pr = cProfile.Profile()
|
|
142
144
|
pr.enable()
|
|
143
|
-
def
|
|
145
|
+
def goexit():
|
|
144
146
|
pr.disable()
|
|
145
147
|
print("Profiling completed")
|
|
146
148
|
if profile_file:
|
|
147
|
-
|
|
148
|
-
pr.dump_stats(profile_file)
|
|
149
|
+
pr.dump_stats(profile_file)
|
|
149
150
|
s = io.StringIO()
|
|
150
151
|
pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats()
|
|
151
152
|
print(s.getvalue())
|
|
152
|
-
atexit.register(
|
|
153
|
-
|
|
153
|
+
atexit.register(goexit)
|
|
154
|
+
if log_filename:
|
|
155
|
+
log_ctx = redirect_stderr_and_stdout_to_file(log_filename)
|
|
156
|
+
else:
|
|
157
|
+
log_ctx = nullcontext()
|
|
158
|
+
with log_ctx:
|
|
159
|
+
run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
|
|
154
160
|
|
|
155
161
|
|
|
156
162
|
def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
|
|
@@ -299,6 +299,8 @@ ocrd__wrap () {
|
|
|
299
299
|
eval "ocrd__files[$i]=ocrd__file$i"
|
|
300
300
|
let ++i
|
|
301
301
|
done < <(ocrd bashlib input-files \
|
|
302
|
+
--ocrd-tool $OCRD_TOOL_JSON \
|
|
303
|
+
--executable $OCRD_TOOL_NAME \
|
|
302
304
|
-m "${ocrd__argv[mets_file]}" \
|
|
303
305
|
-I "${ocrd__argv[input_file_grp]}" \
|
|
304
306
|
-O "${ocrd__argv[output_file_grp]}" \
|
|
@@ -21,7 +21,7 @@ from pydantic import BaseModel, Field, ValidationError
|
|
|
21
21
|
import uvicorn
|
|
22
22
|
|
|
23
23
|
from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
|
|
24
|
-
from ocrd_utils import getLogger
|
|
24
|
+
from ocrd_utils import getLogger
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
#
|
|
@@ -236,7 +236,7 @@ class ClientSideOcrdMets:
|
|
|
236
236
|
agent_dict["_type"] = agent_dict.pop("type")
|
|
237
237
|
return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in agent_dicts]
|
|
238
238
|
|
|
239
|
-
def add_agent(self,
|
|
239
|
+
def add_agent(self, **kwargs):
|
|
240
240
|
if not self.multiplexing_mode:
|
|
241
241
|
return self.session.request("POST", f"{self.url}/agent", json=OcrdAgentModel.create(**kwargs).dict())
|
|
242
242
|
else:
|
|
@@ -403,7 +403,6 @@ class OcrdMetsServer:
|
|
|
403
403
|
@staticmethod
|
|
404
404
|
def kill_process(mets_server_pid: int):
|
|
405
405
|
subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
|
|
406
|
-
return
|
|
407
406
|
|
|
408
407
|
def shutdown(self):
|
|
409
408
|
if self.is_uds:
|