ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +8 -6
- ocrd/cli/bashlib.py +8 -114
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/mets_server.py +11 -15
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
- ocrd-3.7.0.dist-info/RECORD +123 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +31 -33
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +5 -5
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +15 -7
- ocrd_utils/os.py +17 -4
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd/cli/log.py +0 -51
- ocrd/lib.bash +0 -317
- ocrd-3.5.1.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd/cli/__init__.py
CHANGED
|
@@ -10,13 +10,14 @@ import click
|
|
|
10
10
|
|
|
11
11
|
from ocrd_utils import config
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
# pylint: disable=wrong-import-position
|
|
14
15
|
|
|
15
16
|
def command_with_replaced_help(*replacements):
|
|
16
17
|
|
|
17
18
|
class CommandWithReplacedHelp(click.Command):
|
|
18
19
|
def get_help(self, ctx):
|
|
19
|
-
newhelp
|
|
20
|
+
newhelp: str = super().get_help(ctx)
|
|
20
21
|
for replacement in replacements:
|
|
21
22
|
newhelp = re.sub(*replacement, newhelp)
|
|
22
23
|
# print(newhelp)
|
|
@@ -24,18 +25,18 @@ def command_with_replaced_help(*replacements):
|
|
|
24
25
|
|
|
25
26
|
return CommandWithReplacedHelp
|
|
26
27
|
|
|
28
|
+
|
|
27
29
|
# pylint: enable=wrong-import-position
|
|
28
30
|
|
|
29
31
|
from ..decorators import ocrd_loglevel
|
|
30
32
|
from .ocrd_tool import ocrd_tool_cli
|
|
31
33
|
from .workspace import workspace_cli
|
|
32
34
|
from .process import process_cli
|
|
33
|
-
from .bashlib import bashlib_cli
|
|
34
35
|
from .validate import validate_cli
|
|
35
36
|
from .resmgr import resmgr_cli
|
|
36
37
|
from .zip import zip_cli
|
|
37
|
-
from .log import log_cli
|
|
38
38
|
from .network import network_cli
|
|
39
|
+
from .bashlib import bashlib_cli
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
__all__ = ['cli']
|
|
@@ -102,20 +103,21 @@ Variables:
|
|
|
102
103
|
{config.describe('OCRD_LOGGING_DEBUG')}
|
|
103
104
|
"""
|
|
104
105
|
|
|
106
|
+
|
|
105
107
|
@click.group(epilog=_epilog)
|
|
106
108
|
@click.version_option(package_name='ocrd')
|
|
107
109
|
@ocrd_loglevel
|
|
108
|
-
def cli(**kwargs):
|
|
110
|
+
def cli(**kwargs): # pylint: disable=unused-argument
|
|
109
111
|
"""
|
|
110
112
|
Entry-point of multi-purpose CLI for OCR-D
|
|
111
113
|
"""
|
|
112
114
|
|
|
115
|
+
|
|
113
116
|
cli.add_command(ocrd_tool_cli)
|
|
114
117
|
cli.add_command(workspace_cli)
|
|
115
118
|
cli.add_command(process_cli)
|
|
116
|
-
cli.add_command(bashlib_cli)
|
|
117
119
|
cli.add_command(zip_cli)
|
|
118
120
|
cli.add_command(validate_cli)
|
|
119
|
-
cli.add_command(log_cli)
|
|
120
121
|
cli.add_command(resmgr_cli)
|
|
121
122
|
cli.add_command(network_cli)
|
|
123
|
+
cli.add_command(bashlib_cli)
|
ocrd/cli/bashlib.py
CHANGED
|
@@ -6,29 +6,20 @@ OCR-D CLI: bash library
|
|
|
6
6
|
:nested: full
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
|
-
from __future__ import print_function
|
|
10
|
-
import sys
|
|
11
|
-
import click
|
|
12
9
|
|
|
10
|
+
# WARNING: bashlib processors have been deprecated as of v3 of the OCR-D/core API
|
|
11
|
+
# and will be removed in v3.7.0. We retain the `ocrd bashlib` CLI only
|
|
12
|
+
# to not break the `ocrd bashlib filename` command, which is used in CD
|
|
13
|
+
# scripts to get the `share` directory of the core installation.
|
|
14
|
+
|
|
15
|
+
import click
|
|
13
16
|
from ocrd.constants import BASHLIB_FILENAME
|
|
14
|
-
import ocrd.constants
|
|
15
|
-
import ocrd_utils.constants
|
|
16
|
-
from ocrd_utils.constants import DEFAULT_METS_BASENAME
|
|
17
|
-
import ocrd_models.constants
|
|
18
|
-
import ocrd_validators.constants
|
|
19
|
-
from ocrd.decorators import (
|
|
20
|
-
parameter_option,
|
|
21
|
-
parameter_override_option,
|
|
22
|
-
ocrd_loglevel,
|
|
23
|
-
ocrd_cli_wrap_processor
|
|
24
|
-
)
|
|
25
|
-
from ocrd_utils import make_file_id
|
|
26
|
-
from ocrd.processor import Processor
|
|
27
17
|
|
|
28
18
|
# ----------------------------------------------------------------------
|
|
29
19
|
# ocrd bashlib
|
|
30
20
|
# ----------------------------------------------------------------------
|
|
31
21
|
|
|
22
|
+
|
|
32
23
|
@click.group('bashlib')
|
|
33
24
|
def bashlib_cli():
|
|
34
25
|
"""
|
|
@@ -39,6 +30,7 @@ def bashlib_cli():
|
|
|
39
30
|
# ocrd bashlib filename
|
|
40
31
|
# ----------------------------------------------------------------------
|
|
41
32
|
|
|
33
|
+
|
|
42
34
|
@bashlib_cli.command('filename')
|
|
43
35
|
def bashlib_filename():
|
|
44
36
|
"""
|
|
@@ -48,101 +40,3 @@ def bashlib_filename():
|
|
|
48
40
|
"""
|
|
49
41
|
print(BASHLIB_FILENAME)
|
|
50
42
|
|
|
51
|
-
@bashlib_cli.command('constants')
|
|
52
|
-
@click.argument('name')
|
|
53
|
-
def bashlib_constants(name):
|
|
54
|
-
"""
|
|
55
|
-
Query constants from ocrd_utils and ocrd_models
|
|
56
|
-
"""
|
|
57
|
-
all_constants = {}
|
|
58
|
-
for src in [ocrd.constants, ocrd_utils.constants, ocrd_models.constants, ocrd_validators.constants]:
|
|
59
|
-
for k in src.__all__:
|
|
60
|
-
all_constants[k] = src.__dict__[k]
|
|
61
|
-
if name in ['*', 'KEYS', '__all__']:
|
|
62
|
-
print(sorted(all_constants.keys()))
|
|
63
|
-
sys.exit(0)
|
|
64
|
-
if name not in all_constants:
|
|
65
|
-
print("ERROR: name '%s' is not a known constant" % name, file=sys.stderr)
|
|
66
|
-
sys.exit(1)
|
|
67
|
-
val = all_constants[name]
|
|
68
|
-
if isinstance(val, dict):
|
|
69
|
-
# make this bash-friendly (show initialization for associative array)
|
|
70
|
-
for key in val:
|
|
71
|
-
print("[%s]=%s" % (key, val[key]), end=' ')
|
|
72
|
-
else:
|
|
73
|
-
print(val)
|
|
74
|
-
|
|
75
|
-
@bashlib_cli.command('input-files')
|
|
76
|
-
@click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
|
|
77
|
-
@click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
|
|
78
|
-
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
|
|
79
|
-
@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server', default=None)
|
|
80
|
-
@click.option('-d', '--working-dir', help="Working Directory")
|
|
81
|
-
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
|
|
82
|
-
@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
|
|
83
|
-
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
|
|
84
|
-
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
|
|
85
|
-
"(with '--page-id', remove only those).\n"
|
|
86
|
-
"Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE")
|
|
87
|
-
@click.option('--debug', is_flag=True, default=False, help="Abort on any errors with full stack trace.\n"
|
|
88
|
-
"Short-hand for OCRD_MISSING_OUTPUT=ABORT")
|
|
89
|
-
@parameter_option
|
|
90
|
-
@parameter_override_option
|
|
91
|
-
@ocrd_loglevel
|
|
92
|
-
def bashlib_input_files(ocrd_tool, executable, **kwargs):
|
|
93
|
-
"""
|
|
94
|
-
List input files for processing
|
|
95
|
-
|
|
96
|
-
Instantiate a processor and workspace from the given processing options.
|
|
97
|
-
Then loop through the input files of the input fileGrp, and for each one,
|
|
98
|
-
print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended
|
|
99
|
-
`outputFileId` (from ``make_file_id``).
|
|
100
|
-
|
|
101
|
-
(The printing format is one associative array initializer per line.)
|
|
102
|
-
"""
|
|
103
|
-
class BashlibProcessor(Processor):
|
|
104
|
-
# go half way of the normal run_processor / process_workspace call tree
|
|
105
|
-
# by just delegating to process_workspace, overriding process_page_file
|
|
106
|
-
# to ensure all input files exist locally (without persisting them in the METS)
|
|
107
|
-
# and print what needs to be acted on in bash-friendly way
|
|
108
|
-
def process_page_file(self, *input_files):
|
|
109
|
-
for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
|
|
110
|
-
# make this bash-friendly (show initialization for associative array)
|
|
111
|
-
if len(input_files) > 1:
|
|
112
|
-
# single quotes allow us to preserve the list value inside the alist
|
|
113
|
-
value = ' '.join(str(getattr(res, field)) for res in input_files)
|
|
114
|
-
else:
|
|
115
|
-
value = str(getattr(input_files[0], field))
|
|
116
|
-
print(f"[{field}]='{value}'", end=' ')
|
|
117
|
-
output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
|
|
118
|
-
print(f"[outputFileId]='{output_file_id}'")
|
|
119
|
-
if ocrd_tool and executable:
|
|
120
|
-
class FullBashlibProcessor(BashlibProcessor):
|
|
121
|
-
@property
|
|
122
|
-
def metadata_location(self):
|
|
123
|
-
# needed for metadata loading and validation mechanism
|
|
124
|
-
return ocrd_tool
|
|
125
|
-
@property
|
|
126
|
-
def executable(self):
|
|
127
|
-
# needed for ocrd_tool lookup
|
|
128
|
-
return executable
|
|
129
|
-
processor_class = FullBashlibProcessor
|
|
130
|
-
else:
|
|
131
|
-
# we have no true metadata file, so fill in just to make it work
|
|
132
|
-
class UnknownBashlibProcessor(BashlibProcessor):
|
|
133
|
-
@property
|
|
134
|
-
def ocrd_tool(self):
|
|
135
|
-
# needed to satisfy the validator
|
|
136
|
-
return {'executable': '',
|
|
137
|
-
# required now
|
|
138
|
-
'input_file_grp_cardinality': 1,
|
|
139
|
-
'output_file_grp_cardinality': 1,
|
|
140
|
-
'steps': ['']
|
|
141
|
-
}
|
|
142
|
-
@property
|
|
143
|
-
def version(self):
|
|
144
|
-
# needed to satisfy the validator and wrapper
|
|
145
|
-
return '1.0'
|
|
146
|
-
processor_class = UnknownBashlibProcessor
|
|
147
|
-
|
|
148
|
-
ocrd_cli_wrap_processor(processor_class, **kwargs)
|
ocrd/cli/network.py
CHANGED
|
@@ -12,7 +12,6 @@ from ocrd_network.cli import (
|
|
|
12
12
|
client_cli,
|
|
13
13
|
processing_server_cli,
|
|
14
14
|
processing_worker_cli,
|
|
15
|
-
processor_server_cli,
|
|
16
15
|
)
|
|
17
16
|
|
|
18
17
|
|
|
@@ -27,4 +26,3 @@ def network_cli():
|
|
|
27
26
|
network_cli.add_command(client_cli)
|
|
28
27
|
network_cli.add_command(processing_server_cli)
|
|
29
28
|
network_cli.add_command(processing_worker_cli)
|
|
30
|
-
network_cli.add_command(processor_server_cli)
|
ocrd/cli/ocrd_tool.py
CHANGED
|
@@ -23,6 +23,7 @@ from ocrd_utils import (
|
|
|
23
23
|
)
|
|
24
24
|
from ocrd_validators import ParameterValidator, OcrdToolValidator
|
|
25
25
|
|
|
26
|
+
|
|
26
27
|
class OcrdToolCtx():
|
|
27
28
|
|
|
28
29
|
def __init__(self, filename):
|
|
@@ -36,25 +37,31 @@ class OcrdToolCtx():
|
|
|
36
37
|
|
|
37
38
|
class BashProcessor(Processor):
|
|
38
39
|
@property
|
|
39
|
-
def metadata(inner_self):
|
|
40
|
+
def metadata(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
40
41
|
return self.json
|
|
42
|
+
|
|
41
43
|
@property
|
|
42
|
-
def executable(inner_self):
|
|
44
|
+
def executable(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
43
45
|
return self.tool_name
|
|
46
|
+
|
|
44
47
|
@property
|
|
45
|
-
def moduledir(inner_self):
|
|
48
|
+
def moduledir(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
46
49
|
return os.path.dirname(self.filename)
|
|
50
|
+
|
|
47
51
|
# set docstrings to empty
|
|
48
52
|
__doc__ = None
|
|
49
53
|
# HACK: override the module-level docstring, too
|
|
50
54
|
getmodule(OcrdToolCtx).__doc__ = None
|
|
51
|
-
|
|
55
|
+
|
|
56
|
+
def process(inner_self): # pylint: disable=no-self-argument,arguments-renamed
|
|
52
57
|
return super()
|
|
53
58
|
|
|
54
59
|
self.processor = BashProcessor
|
|
55
60
|
|
|
61
|
+
|
|
56
62
|
pass_ocrd_tool = click.make_pass_decorator(OcrdToolCtx)
|
|
57
63
|
|
|
64
|
+
|
|
58
65
|
# ----------------------------------------------------------------------
|
|
59
66
|
# ocrd ocrd-tool
|
|
60
67
|
# ----------------------------------------------------------------------
|
|
@@ -65,6 +72,7 @@ pass_ocrd_tool = click.make_pass_decorator(OcrdToolCtx)
|
|
|
65
72
|
def ocrd_tool_cli(ctx, json_file):
|
|
66
73
|
ctx.obj = OcrdToolCtx(json_file)
|
|
67
74
|
|
|
75
|
+
|
|
68
76
|
# ----------------------------------------------------------------------
|
|
69
77
|
# ocrd ocrd-tool version
|
|
70
78
|
# ----------------------------------------------------------------------
|
|
@@ -74,6 +82,7 @@ def ocrd_tool_cli(ctx, json_file):
|
|
|
74
82
|
def ocrd_tool_version(ctx):
|
|
75
83
|
print(ctx.json['version'])
|
|
76
84
|
|
|
85
|
+
|
|
77
86
|
# ----------------------------------------------------------------------
|
|
78
87
|
# ocrd ocrd-tool validate
|
|
79
88
|
# ----------------------------------------------------------------------
|
|
@@ -86,6 +95,7 @@ def ocrd_tool_validate(ctx):
|
|
|
86
95
|
if not report.is_valid:
|
|
87
96
|
return 128
|
|
88
97
|
|
|
98
|
+
|
|
89
99
|
# ----------------------------------------------------------------------
|
|
90
100
|
# ocrd ocrd-tool list-tools
|
|
91
101
|
# ----------------------------------------------------------------------
|
|
@@ -96,6 +106,7 @@ def ocrd_tool_list(ctx):
|
|
|
96
106
|
for tool in ctx.json['tools']:
|
|
97
107
|
print(tool)
|
|
98
108
|
|
|
109
|
+
|
|
99
110
|
# ----------------------------------------------------------------------
|
|
100
111
|
# ocrd ocrd-tool dump-tools
|
|
101
112
|
# ----------------------------------------------------------------------
|
|
@@ -105,6 +116,7 @@ def ocrd_tool_list(ctx):
|
|
|
105
116
|
def ocrd_tool_dump(ctx):
|
|
106
117
|
print(dumps(ctx.json['tools'], indent=True))
|
|
107
118
|
|
|
119
|
+
|
|
108
120
|
@ocrd_tool_cli.command('dump-module-dirs', help="Dump module directory of each tool")
|
|
109
121
|
@pass_ocrd_tool
|
|
110
122
|
def ocrd_tool_dump_module_dirs(ctx):
|
|
@@ -112,6 +124,7 @@ def ocrd_tool_dump_module_dirs(ctx):
|
|
|
112
124
|
for tool_name in ctx.json['tools']},
|
|
113
125
|
indent=True))
|
|
114
126
|
|
|
127
|
+
|
|
115
128
|
# ----------------------------------------------------------------------
|
|
116
129
|
# ocrd ocrd-tool tool
|
|
117
130
|
# ----------------------------------------------------------------------
|
|
@@ -124,6 +137,7 @@ def ocrd_tool_tool(ctx, tool_name):
|
|
|
124
137
|
raise Exception("No such tool: %s" % tool_name)
|
|
125
138
|
ctx.tool_name = tool_name
|
|
126
139
|
|
|
140
|
+
|
|
127
141
|
# ----------------------------------------------------------------------
|
|
128
142
|
# ocrd ocrd-tool tool description
|
|
129
143
|
# ----------------------------------------------------------------------
|
|
@@ -133,29 +147,34 @@ def ocrd_tool_tool(ctx, tool_name):
|
|
|
133
147
|
def ocrd_tool_tool_description(ctx):
|
|
134
148
|
print(ctx.json['tools'][ctx.tool_name]['description'])
|
|
135
149
|
|
|
150
|
+
|
|
136
151
|
@ocrd_tool_tool.command('list-resources', help="List tool's file resources")
|
|
137
152
|
@pass_ocrd_tool
|
|
138
153
|
def ocrd_tool_tool_list_resources(ctx):
|
|
139
154
|
ctx.processor(None).list_resources()
|
|
140
155
|
|
|
156
|
+
|
|
141
157
|
@ocrd_tool_tool.command('resolve-resource', help="Get a tool's file resource full path name")
|
|
142
158
|
@click.argument('res_name')
|
|
143
159
|
@pass_ocrd_tool
|
|
144
160
|
def ocrd_tool_tool_resolve_resource(ctx, res_name):
|
|
145
161
|
print(ctx.processor(None).resolve_resource(res_name))
|
|
146
162
|
|
|
163
|
+
|
|
147
164
|
@ocrd_tool_tool.command('show-resource', help="Dump a tool's file resource")
|
|
148
165
|
@click.argument('res_name')
|
|
149
166
|
@pass_ocrd_tool
|
|
150
167
|
def ocrd_tool_tool_show_resource(ctx, res_name):
|
|
151
168
|
ctx.processor(None).show_resource(res_name)
|
|
152
169
|
|
|
170
|
+
|
|
153
171
|
@ocrd_tool_tool.command('help', help="Generate help for processors")
|
|
154
172
|
@click.argument('subcommand', required=False)
|
|
155
173
|
@pass_ocrd_tool
|
|
156
174
|
def ocrd_tool_tool_params_help(ctx, subcommand):
|
|
157
175
|
ctx.processor(None).show_help(subcommand=subcommand)
|
|
158
176
|
|
|
177
|
+
|
|
159
178
|
# ----------------------------------------------------------------------
|
|
160
179
|
# ocrd ocrd-tool tool categories
|
|
161
180
|
# ----------------------------------------------------------------------
|
|
@@ -165,6 +184,7 @@ def ocrd_tool_tool_params_help(ctx, subcommand):
|
|
|
165
184
|
def ocrd_tool_tool_categories(ctx):
|
|
166
185
|
print('\n'.join(ctx.json['tools'][ctx.tool_name]['categories']))
|
|
167
186
|
|
|
187
|
+
|
|
168
188
|
# ----------------------------------------------------------------------
|
|
169
189
|
# ocrd ocrd-tool tool steps
|
|
170
190
|
# ----------------------------------------------------------------------
|
|
@@ -174,6 +194,7 @@ def ocrd_tool_tool_categories(ctx):
|
|
|
174
194
|
def ocrd_tool_tool_steps(ctx):
|
|
175
195
|
print('\n'.join(ctx.json['tools'][ctx.tool_name]['steps']))
|
|
176
196
|
|
|
197
|
+
|
|
177
198
|
# ----------------------------------------------------------------------
|
|
178
199
|
# ocrd ocrd-tool tool dump
|
|
179
200
|
# ----------------------------------------------------------------------
|
|
@@ -183,6 +204,7 @@ def ocrd_tool_tool_steps(ctx):
|
|
|
183
204
|
def ocrd_tool_tool_dump(ctx):
|
|
184
205
|
print(dumps(ctx.json['tools'][ctx.tool_name], indent=True))
|
|
185
206
|
|
|
207
|
+
|
|
186
208
|
# ----------------------------------------------------------------------
|
|
187
209
|
# ocrd ocrd-tool tool parse-params
|
|
188
210
|
# ----------------------------------------------------------------------
|
ocrd/cli/process.py
CHANGED
ocrd/cli/resmgr.py
CHANGED
ocrd/cli/validate.py
CHANGED
|
@@ -17,12 +17,13 @@ from ocrd.task_sequence import ProcessorTask, validate_tasks
|
|
|
17
17
|
from ocrd_utils import initLogging, parse_json_string_or_file, DEFAULT_METS_BASENAME
|
|
18
18
|
from ocrd_validators import (
|
|
19
19
|
OcrdToolValidator,
|
|
20
|
-
OcrdZipValidator,
|
|
20
|
+
# OcrdZipValidator,
|
|
21
21
|
PageValidator,
|
|
22
22
|
ParameterValidator,
|
|
23
|
-
WorkspaceValidator,
|
|
23
|
+
# WorkspaceValidator,
|
|
24
24
|
)
|
|
25
25
|
|
|
26
|
+
|
|
26
27
|
def _inform_of_result(report):
|
|
27
28
|
if not report.is_valid:
|
|
28
29
|
print(report.to_xml())
|
|
@@ -36,6 +37,7 @@ def validate_cli():
|
|
|
36
37
|
"""
|
|
37
38
|
initLogging()
|
|
38
39
|
|
|
40
|
+
|
|
39
41
|
@validate_cli.command('tool-json')
|
|
40
42
|
@click.argument('ocrd_tool', required=False, nargs=1)
|
|
41
43
|
def validate_ocrd_tool(ocrd_tool):
|
|
@@ -48,6 +50,7 @@ def validate_ocrd_tool(ocrd_tool):
|
|
|
48
50
|
ocrd_tool = loads(f.read())
|
|
49
51
|
_inform_of_result(OcrdToolValidator.validate(ocrd_tool))
|
|
50
52
|
|
|
53
|
+
|
|
51
54
|
@validate_cli.command('parameters')
|
|
52
55
|
@click.argument('ocrd_tool')
|
|
53
56
|
@click.argument('executable')
|
|
@@ -60,24 +63,31 @@ def validate_parameters(ocrd_tool, executable, param_json):
|
|
|
60
63
|
ocrd_tool = loads(f.read())
|
|
61
64
|
_inform_of_result(ParameterValidator(ocrd_tool['tools'][executable]).validate(parse_json_string_or_file(param_json)))
|
|
62
65
|
|
|
66
|
+
|
|
63
67
|
@validate_cli.command('page')
|
|
64
68
|
@click.argument('page', required=True, nargs=1)
|
|
65
|
-
@click.option('--page-textequiv-consistency', help="How strict to check PAGE multi-level textequiv consistency",
|
|
66
|
-
|
|
69
|
+
@click.option('--page-textequiv-consistency', help="How strict to check PAGE multi-level textequiv consistency",
|
|
70
|
+
type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
|
|
71
|
+
@click.option('--page-textequiv-strategy', help="Strategy to determine the correct textequiv",
|
|
72
|
+
type=click.Choice(['first']), default='first')
|
|
67
73
|
@click.option('--check-baseline', help="Whether Baseline must be fully within TextLine/Coords", is_flag=True, default=False)
|
|
68
|
-
@click.option('--check-coords', help="Whether *Region/TextLine/Word/Glyph must each be fully contained
|
|
74
|
+
@click.option('--check-coords', help="Whether *Region/TextLine/Word/Glyph must each be fully contained "
|
|
75
|
+
"within Border/*Region/TextLine/Word, resp.", is_flag=True, default=False)
|
|
69
76
|
def validate_page(page, **kwargs):
|
|
70
77
|
'''
|
|
71
78
|
Validate PAGE against OCR-D conventions
|
|
72
79
|
'''
|
|
73
80
|
_inform_of_result(PageValidator.validate(filename=page, **kwargs))
|
|
74
81
|
|
|
82
|
+
|
|
75
83
|
# @validate_cli.command('zip')
|
|
76
84
|
# @click.argument('src', type=click.Path(dir_okay=True, readable=True, resolve_path=True), required=True)
|
|
77
85
|
# @click.option('-Z', '--skip-unzip', help="Treat SRC as a directory not a ZIP", is_flag=True, default=False)
|
|
78
86
|
# @click.option('-B', '--skip-bag', help="Whether to skip all checks of manifests and files", is_flag=True, default=False)
|
|
79
|
-
# @click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance",
|
|
80
|
-
#
|
|
87
|
+
# @click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance",
|
|
88
|
+
# is_flag=True, default=False)
|
|
89
|
+
# @click.option('-D', '--skip-delete', help="Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation",
|
|
90
|
+
# is_flag=True, default=False)
|
|
81
91
|
# @click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
|
|
82
92
|
# def validate(src, **kwargs):
|
|
83
93
|
# """
|
|
@@ -87,11 +97,16 @@ def validate_page(page, **kwargs):
|
|
|
87
97
|
# """
|
|
88
98
|
# _inform_of_result(OcrdZipValidator(Resolver(), src).validate(**kwargs))
|
|
89
99
|
|
|
100
|
+
|
|
90
101
|
# @validate_cli.command('workspace')
|
|
91
102
|
# @click.option('-a', '--download', is_flag=True, help="Download all files")
|
|
92
|
-
# @click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
|
|
93
|
-
#
|
|
94
|
-
#
|
|
103
|
+
# @click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
|
|
104
|
+
# ['imagefilename', 'dimension', 'mets_unique_identifier', 'mets_file_group_names', 'mets_files',
|
|
105
|
+
# 'pixel_density', 'page', 'url']))
|
|
106
|
+
# @click.option('--page-textequiv-consistency', '--page-strictness', type=click.Choice(['strict', 'lax', 'fix', 'off']),
|
|
107
|
+
# help="How strict to check PAGE multi-level textequiv consistency", default='strict')
|
|
108
|
+
# @click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency",
|
|
109
|
+
# type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
|
|
95
110
|
# @click.argument('mets_url')
|
|
96
111
|
# def validate_workspace(mets_url, **kwargs):
|
|
97
112
|
# '''
|
|
@@ -99,11 +114,15 @@ def validate_page(page, **kwargs):
|
|
|
99
114
|
# '''
|
|
100
115
|
# _inform_of_result(WorkspaceValidator.validate(Resolver(), mets_url, **kwargs))
|
|
101
116
|
|
|
117
|
+
|
|
102
118
|
@validate_cli.command('tasks')
|
|
103
|
-
@click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run.
|
|
104
|
-
|
|
119
|
+
@click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. '
|
|
120
|
+
'If omitted, only validate syntax')
|
|
121
|
+
@click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME,
|
|
122
|
+
help='Basename of the METS file, used in conjunction with --workspace')
|
|
105
123
|
@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server')
|
|
106
|
-
@click.option('--overwrite', is_flag=True, default=False,
|
|
124
|
+
@click.option('--overwrite', is_flag=True, default=False,
|
|
125
|
+
help='When checking against a concrete workspace, simulate overwriting output or page range.')
|
|
107
126
|
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
|
|
108
127
|
@click.argument('tasks', nargs=-1, required=True)
|
|
109
128
|
def validate_process(tasks, workspace, mets_basename, mets_server_url, overwrite, page_id):
|