ocrd 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +6 -2
- ocrd/cli/bashlib.py +7 -2
- ocrd/cli/log.py +7 -2
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/lib.bash +6 -13
- ocrd/mets_server.py +6 -10
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/METADATA +3 -2
- ocrd-3.6.0.dist-info/RECORD +125 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +2 -5
- ocrd_network/models/workspace.py +1 -1
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +12 -7
- ocrd_utils/os.py +16 -3
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd-3.5.0.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/top_level.txt +0 -0
ocrd/cli/workspace.py
CHANGED
|
@@ -5,7 +5,6 @@ OCR-D CLI: workspace management
|
|
|
5
5
|
:prog: ocrd workspace
|
|
6
6
|
:nested: full
|
|
7
7
|
"""
|
|
8
|
-
import os
|
|
9
8
|
from os import rmdir, unlink
|
|
10
9
|
from os.path import dirname, relpath, normpath, exists, join, isabs, isdir
|
|
11
10
|
from pathlib import Path
|
|
@@ -19,7 +18,16 @@ import click
|
|
|
19
18
|
|
|
20
19
|
from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager
|
|
21
20
|
from ocrd.mets_server import OcrdMetsServer
|
|
22
|
-
from ocrd_utils import
|
|
21
|
+
from ocrd_utils import (
|
|
22
|
+
getLogger,
|
|
23
|
+
initLogging,
|
|
24
|
+
pushd_popd,
|
|
25
|
+
EXT_TO_MIME,
|
|
26
|
+
safe_filename,
|
|
27
|
+
parse_json_string_or_file,
|
|
28
|
+
partition_list,
|
|
29
|
+
DEFAULT_METS_BASENAME,
|
|
30
|
+
)
|
|
23
31
|
from ocrd.decorators import mets_find_options
|
|
24
32
|
from . import command_with_replaced_help
|
|
25
33
|
from ocrd_models.constants import METS_PAGE_DIV_ATTRIBUTE
|
|
@@ -32,8 +40,8 @@ class WorkspaceCtx():
|
|
|
32
40
|
if mets_basename:
|
|
33
41
|
self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.'))
|
|
34
42
|
self.resolver = Resolver()
|
|
35
|
-
self.directory, self.mets_url, self.mets_basename, self.mets_server_url \
|
|
36
|
-
|
|
43
|
+
self.directory, self.mets_url, self.mets_basename, self.mets_server_url = \
|
|
44
|
+
self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
|
|
37
45
|
self.automatic_backup = automatic_backup
|
|
38
46
|
|
|
39
47
|
def workspace(self):
|
|
@@ -44,20 +52,24 @@ class WorkspaceCtx():
|
|
|
44
52
|
automatic_backup=self.automatic_backup,
|
|
45
53
|
mets_server_url=self.mets_server_url,
|
|
46
54
|
)
|
|
55
|
+
|
|
47
56
|
def backup_manager(self):
|
|
48
57
|
return WorkspaceBackupManager(self.workspace())
|
|
49
58
|
|
|
50
59
|
|
|
51
60
|
pass_workspace = click.make_pass_decorator(WorkspaceCtx)
|
|
52
61
|
|
|
62
|
+
|
|
53
63
|
# ----------------------------------------------------------------------
|
|
54
64
|
# ocrd workspace
|
|
55
65
|
# ----------------------------------------------------------------------
|
|
56
66
|
|
|
57
67
|
@click.group("workspace")
|
|
58
|
-
@click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR',
|
|
68
|
+
@click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR',
|
|
69
|
+
help='Changes the workspace folder location [default: METS_URL directory or .]"')
|
|
59
70
|
@click.option('-M', '--mets-basename', default=None, help='METS file basename. Deprecated, use --mets/--directory')
|
|
60
|
-
@click.option('-m', '--mets', default=None,
|
|
71
|
+
@click.option('-m', '--mets', default=None, metavar="METS_URL",
|
|
72
|
+
help='The path/URL of the METS file [default: WORKSPACE_DIR/mets.xml]')
|
|
61
73
|
@click.option('-U', '--mets-server-url', 'mets_server_url', help="TCP host URI or UDS path of METS server")
|
|
62
74
|
@click.option('--backup', default=False, help="Backup mets.xml whenever it is saved.", is_flag=True)
|
|
63
75
|
@click.pass_context
|
|
@@ -67,7 +79,7 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
|
|
|
67
79
|
|
|
68
80
|
A workspace comprises a METS file and a directory as point of reference.
|
|
69
81
|
|
|
70
|
-
Operates on the file system directly or via a METS server
|
|
82
|
+
Operates on the file system directly or via a METS server
|
|
71
83
|
(already running via some prior `server start` subcommand).
|
|
72
84
|
"""
|
|
73
85
|
initLogging()
|
|
@@ -79,6 +91,7 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
|
|
|
79
91
|
automatic_backup=backup
|
|
80
92
|
)
|
|
81
93
|
|
|
94
|
+
|
|
82
95
|
# ----------------------------------------------------------------------
|
|
83
96
|
# ocrd workspace validate
|
|
84
97
|
# ----------------------------------------------------------------------
|
|
@@ -88,10 +101,12 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
|
|
|
88
101
|
@pass_workspace
|
|
89
102
|
@click.option('-a', '--download', is_flag=True, help="Download all files")
|
|
90
103
|
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
|
|
91
|
-
['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density',
|
|
92
|
-
'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
|
|
93
|
-
@click.option('--page-textequiv-consistency', '--page-strictness',
|
|
94
|
-
|
|
104
|
+
['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density',
|
|
105
|
+
'page', 'page_xsd', 'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
|
|
106
|
+
@click.option('--page-textequiv-consistency', '--page-strictness', type=click.Choice(['strict', 'lax', 'fix', 'off']),
|
|
107
|
+
default='strict', help="How strict to check PAGE multi-level textequiv consistency")
|
|
108
|
+
@click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency",
|
|
109
|
+
type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
|
|
95
110
|
@click.argument('mets_url', default=None, required=False)
|
|
96
111
|
def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency):
|
|
97
112
|
"""
|
|
@@ -105,7 +120,8 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
|
|
|
105
120
|
"""
|
|
106
121
|
LOG = getLogger('ocrd.cli.workspace.validate')
|
|
107
122
|
if mets_url:
|
|
108
|
-
LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of
|
|
123
|
+
LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of "
|
|
124
|
+
"argument 'METS_URL' ('%s')" % mets_url))
|
|
109
125
|
else:
|
|
110
126
|
mets_url = ctx.mets_url
|
|
111
127
|
report = WorkspaceValidator.validate(
|
|
@@ -121,6 +137,7 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
|
|
|
121
137
|
if not report.is_valid:
|
|
122
138
|
sys.exit(128)
|
|
123
139
|
|
|
140
|
+
|
|
124
141
|
# ----------------------------------------------------------------------
|
|
125
142
|
# ocrd workspace clone
|
|
126
143
|
# ----------------------------------------------------------------------
|
|
@@ -128,13 +145,15 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
|
|
|
128
145
|
@workspace_cli.command('clone', cls=command_with_replaced_help(
|
|
129
146
|
(r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
|
|
130
147
|
@click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
|
|
131
|
-
@click.option('-a', '--download', is_flag=True, help="Download all selected files and add local
|
|
148
|
+
@click.option('-a', '--download', is_flag=True, help="Download all selected files and add local FLocat "
|
|
149
|
+
"path references in METS file afterwards")
|
|
132
150
|
@click.argument('mets_url')
|
|
133
151
|
@mets_find_options
|
|
134
152
|
# XXX deprecated
|
|
135
153
|
@click.argument('workspace_dir', default=None, required=False)
|
|
136
154
|
@pass_workspace
|
|
137
|
-
def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mimetype,
|
|
155
|
+
def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mimetype,
|
|
156
|
+
include_fileGrp, exclude_fileGrp, mets_url, workspace_dir):
|
|
138
157
|
"""
|
|
139
158
|
Create a workspace from METS_URL and return the directory
|
|
140
159
|
|
|
@@ -146,7 +165,8 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
|
|
|
146
165
|
"""
|
|
147
166
|
LOG = getLogger('ocrd.cli.workspace.clone')
|
|
148
167
|
if workspace_dir:
|
|
149
|
-
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of
|
|
168
|
+
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of "
|
|
169
|
+
"argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
|
|
150
170
|
ctx.directory = workspace_dir
|
|
151
171
|
|
|
152
172
|
assert not ctx.mets_server_url, \
|
|
@@ -167,6 +187,7 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
|
|
|
167
187
|
workspace.save_mets()
|
|
168
188
|
print(workspace.directory)
|
|
169
189
|
|
|
190
|
+
|
|
170
191
|
# ----------------------------------------------------------------------
|
|
171
192
|
# ocrd workspace init
|
|
172
193
|
# ----------------------------------------------------------------------
|
|
@@ -184,7 +205,8 @@ def workspace_init(ctx, clobber_mets, directory):
|
|
|
184
205
|
"""
|
|
185
206
|
LOG = getLogger('ocrd.cli.workspace.init')
|
|
186
207
|
if directory:
|
|
187
|
-
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of
|
|
208
|
+
LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of "
|
|
209
|
+
"argument 'DIRECTORY' ('%s')" % directory))
|
|
188
210
|
ctx.directory = directory
|
|
189
211
|
assert not ctx.mets_server_url, \
|
|
190
212
|
f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
|
|
@@ -196,6 +218,7 @@ def workspace_init(ctx, clobber_mets, directory):
|
|
|
196
218
|
workspace.save_mets()
|
|
197
219
|
print(workspace.directory)
|
|
198
220
|
|
|
221
|
+
|
|
199
222
|
# ----------------------------------------------------------------------
|
|
200
223
|
# ocrd workspace add
|
|
201
224
|
# ----------------------------------------------------------------------
|
|
@@ -203,11 +226,13 @@ def workspace_init(ctx, clobber_mets, directory):
|
|
|
203
226
|
@workspace_cli.command('add')
|
|
204
227
|
@click.option('-G', '--file-grp', help="fileGrp USE", required=True, metavar='FILE_GRP')
|
|
205
228
|
@click.option('-i', '--file-id', help="ID for the file", required=True, metavar='FILE_ID')
|
|
206
|
-
@click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided",
|
|
229
|
+
@click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided",
|
|
230
|
+
required=False, metavar='TYPE')
|
|
207
231
|
@click.option('-g', '--page-id', help="ID of the physical page", metavar='PAGE_ID')
|
|
208
232
|
@click.option('-C', '--check-file-exists', help="Whether to ensure FNAME exists", is_flag=True, default=False)
|
|
209
233
|
@click.option('--ignore', help="Do not check whether file exists.", default=False, is_flag=True)
|
|
210
|
-
@click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.",
|
|
234
|
+
@click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.",
|
|
235
|
+
default=False, is_flag=True)
|
|
211
236
|
@click.argument('fname', required=True)
|
|
212
237
|
@pass_workspace
|
|
213
238
|
def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_file_exists, force, fname):
|
|
@@ -223,7 +248,8 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
|
|
|
223
248
|
mimetype = EXT_TO_MIME[Path(fname).suffix]
|
|
224
249
|
log.info("Guessed mimetype to be %s" % mimetype)
|
|
225
250
|
except KeyError:
|
|
226
|
-
log.error("Cannot guess mimetype from extension '%s' for '%s'.
|
|
251
|
+
log.error("Cannot guess mimetype from extension '%s' for '%s'. "
|
|
252
|
+
"Set --mimetype explicitly" % (Path(fname).suffix, fname))
|
|
227
253
|
|
|
228
254
|
log.debug("Adding '%s'", fname)
|
|
229
255
|
local_filename = None
|
|
@@ -260,27 +286,34 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
|
|
|
260
286
|
workspace.add_file(file_grp, **kwargs)
|
|
261
287
|
workspace.save_mets()
|
|
262
288
|
|
|
289
|
+
|
|
263
290
|
# ----------------------------------------------------------------------
|
|
264
291
|
# ocrd workspace bulk-add
|
|
265
292
|
# ----------------------------------------------------------------------
|
|
266
293
|
|
|
267
294
|
# pylint: disable=broad-except
|
|
268
295
|
@workspace_cli.command('bulk-add')
|
|
269
|
-
@click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths
|
|
296
|
+
@click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths "
|
|
297
|
+
"to define named captures usable in the other parameters", required=True)
|
|
270
298
|
@click.option('-m', '--mimetype', help="Media type of the file. If not provided, guess from filename", required=False)
|
|
271
299
|
@click.option('-g', '--page-id', help="physical page ID of the file", required=False)
|
|
272
300
|
@click.option('-i', '--file-id', help="ID of the file. If not provided, derive from fileGrp and filename", required=False)
|
|
273
301
|
@click.option('-u', '--url', help="Remote URL of the file", required=False)
|
|
274
|
-
@click.option('-l', '--local-filename', help="Local filesystem path in the workspace directory
|
|
302
|
+
@click.option('-l', '--local-filename', help="Local filesystem path in the workspace directory "
|
|
303
|
+
"(copied from source file if different)", required=False)
|
|
275
304
|
@click.option('-G', '--file-grp', help="File group USE of the file", required=True)
|
|
276
|
-
@click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview",
|
|
277
|
-
|
|
305
|
+
@click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview",
|
|
306
|
+
default=False, is_flag=True)
|
|
307
|
+
@click.option('-S', '--source-path', 'src_path_option', help="File path to copy from (if different from FILE_GLOB values)",
|
|
308
|
+
required=False)
|
|
278
309
|
@click.option('-I', '--ignore', help="Disable checking for existing file entries (faster)", default=False, is_flag=True)
|
|
279
|
-
@click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)",
|
|
310
|
+
@click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)",
|
|
311
|
+
default=False, is_flag=True)
|
|
280
312
|
@click.option('-s', '--skip', help="Skip files not matching --regex (instead of failing)", default=False, is_flag=True)
|
|
281
313
|
@click.argument('file_glob', nargs=-1, required=True)
|
|
282
314
|
@pass_workspace
|
|
283
|
-
def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_filename, file_grp, dry_run,
|
|
315
|
+
def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_filename, file_grp, dry_run,
|
|
316
|
+
file_glob, src_path_option, ignore, force, skip):
|
|
284
317
|
"""
|
|
285
318
|
Add files in bulk to an OCR-D workspace.
|
|
286
319
|
|
|
@@ -321,7 +354,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
321
354
|
-G '{{ filegrp }}' -g '{{ pageid }}' -i '{{ fileid }}' -S '{{ local_filename }}' -
|
|
322
355
|
|
|
323
356
|
"""
|
|
324
|
-
log = getLogger('ocrd.cli.workspace.bulk-add')
|
|
357
|
+
log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
|
|
325
358
|
workspace = ctx.workspace()
|
|
326
359
|
|
|
327
360
|
try:
|
|
@@ -355,7 +388,12 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
355
388
|
group_dict = m.groupdict()
|
|
356
389
|
|
|
357
390
|
# set up file info
|
|
358
|
-
file_dict = {'local_filename': local_filename,
|
|
391
|
+
file_dict = {'local_filename': local_filename,
|
|
392
|
+
'url': url,
|
|
393
|
+
'mimetype': mimetype,
|
|
394
|
+
'file_id': file_id,
|
|
395
|
+
'page_id': page_id,
|
|
396
|
+
'file_grp': file_grp}
|
|
359
397
|
|
|
360
398
|
# Flag to track whether 'local_filename' should be 'src'
|
|
361
399
|
local_filename_is_src = False
|
|
@@ -394,7 +432,8 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
394
432
|
try:
|
|
395
433
|
file_dict['mimetype'] = EXT_TO_MIME[srcpath.suffix]
|
|
396
434
|
except KeyError:
|
|
397
|
-
log.error("Cannot guess MIME type from extension '%s' for '%s'.
|
|
435
|
+
log.error("Cannot guess MIME type from extension '%s' for '%s'. "
|
|
436
|
+
"Set --mimetype explicitly" % (srcpath.suffix, srcpath))
|
|
398
437
|
|
|
399
438
|
# copy files if src != url
|
|
400
439
|
if local_filename_is_src:
|
|
@@ -413,7 +452,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
413
452
|
if dry_run:
|
|
414
453
|
log.info('workspace.add_file(%s)' % file_dict)
|
|
415
454
|
else:
|
|
416
|
-
workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)
|
|
455
|
+
workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
|
|
417
456
|
|
|
418
457
|
# save changes to disk
|
|
419
458
|
workspace.save_mets()
|
|
@@ -447,7 +486,8 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
|
|
|
447
486
|
@click.option('--keep-files', is_flag=True, help="Do not remove downloaded files from the workspace with --undo-download")
|
|
448
487
|
@click.option('--wait', type=int, default=0, help="Wait this many seconds between download requests")
|
|
449
488
|
@pass_workspace
|
|
450
|
-
def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field,
|
|
489
|
+
def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field,
|
|
490
|
+
include_fileGrp, exclude_fileGrp, download, undo_download, keep_files, wait):
|
|
451
491
|
"""
|
|
452
492
|
Find files.
|
|
453
493
|
|
|
@@ -467,7 +507,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
|
|
|
467
507
|
page_id=page_id,
|
|
468
508
|
include_fileGrp=include_fileGrp,
|
|
469
509
|
exclude_fileGrp=exclude_fileGrp,
|
|
470
|
-
|
|
510
|
+
):
|
|
471
511
|
if download and not f.local_filename:
|
|
472
512
|
workspace.download_file(f)
|
|
473
513
|
modified_mets = True
|
|
@@ -492,13 +532,15 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
|
|
|
492
532
|
for fields in ret:
|
|
493
533
|
print('\t'.join(fields))
|
|
494
534
|
|
|
535
|
+
|
|
495
536
|
# ----------------------------------------------------------------------
|
|
496
537
|
# ocrd workspace remove
|
|
497
538
|
# ----------------------------------------------------------------------
|
|
498
539
|
|
|
499
540
|
@workspace_cli.command('remove')
|
|
500
541
|
@click.option('-k', '--keep-file', help="Do not delete file from file system", default=False, is_flag=True)
|
|
501
|
-
@click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist",
|
|
542
|
+
@click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist",
|
|
543
|
+
default=False, is_flag=True)
|
|
502
544
|
@click.argument('ID', nargs=-1)
|
|
503
545
|
@pass_workspace
|
|
504
546
|
def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefined-builtin
|
|
@@ -534,13 +576,16 @@ def rename_group(ctx, old, new):
|
|
|
534
576
|
workspace.rename_file_group(old, new)
|
|
535
577
|
workspace.save_mets()
|
|
536
578
|
|
|
579
|
+
|
|
537
580
|
# ----------------------------------------------------------------------
|
|
538
581
|
# ocrd workspace remove-group
|
|
539
582
|
# ----------------------------------------------------------------------
|
|
540
583
|
|
|
541
584
|
@workspace_cli.command('remove-group')
|
|
542
|
-
@click.option('-r', '--recursive', help="Delete any files in the group before the group itself",
|
|
543
|
-
|
|
585
|
+
@click.option('-r', '--recursive', help="Delete any files in the group before the group itself",
|
|
586
|
+
default=False, is_flag=True)
|
|
587
|
+
@click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS",
|
|
588
|
+
default=False, is_flag=True)
|
|
544
589
|
@click.option('-k', '--keep-files', help="Do not delete files from file system", default=False, is_flag=True)
|
|
545
590
|
@click.argument('GROUP', nargs=-1)
|
|
546
591
|
@pass_workspace
|
|
@@ -558,6 +603,7 @@ def remove_group(ctx, group, recursive, force, keep_files):
|
|
|
558
603
|
workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
|
|
559
604
|
workspace.save_mets()
|
|
560
605
|
|
|
606
|
+
|
|
561
607
|
# ----------------------------------------------------------------------
|
|
562
608
|
# ocrd workspace prune-files
|
|
563
609
|
# ----------------------------------------------------------------------
|
|
@@ -590,16 +636,19 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id):
|
|
|
590
636
|
workspace.mets.remove_file(f.ID)
|
|
591
637
|
except Exception as e:
|
|
592
638
|
ctx.log.exception("Error removing %f: %s", f, e)
|
|
593
|
-
raise
|
|
639
|
+
raise e
|
|
594
640
|
workspace.save_mets()
|
|
595
641
|
|
|
642
|
+
|
|
596
643
|
# ----------------------------------------------------------------------
|
|
597
644
|
# ocrd workspace clean
|
|
598
645
|
# ----------------------------------------------------------------------
|
|
599
646
|
|
|
600
647
|
@workspace_cli.command('clean')
|
|
601
|
-
@click.option('-n', '--dry-run', help="Don't actually do anything to the filesystem, just preview",
|
|
602
|
-
|
|
648
|
+
@click.option('-n', '--dry-run', help="Don't actually do anything to the filesystem, just preview",
|
|
649
|
+
default=False, is_flag=True)
|
|
650
|
+
@click.option('-d', '--directories', help="Remove untracked directories in addition to untracked files",
|
|
651
|
+
default=False, is_flag=True)
|
|
603
652
|
@click.argument('path_glob', nargs=-1, required=False)
|
|
604
653
|
@pass_workspace
|
|
605
654
|
def clean(ctx, dry_run, directories, path_glob):
|
|
@@ -646,6 +695,7 @@ def clean(ctx, dry_run, directories, path_glob):
|
|
|
646
695
|
else:
|
|
647
696
|
rmdir(path)
|
|
648
697
|
|
|
698
|
+
|
|
649
699
|
# ----------------------------------------------------------------------
|
|
650
700
|
# ocrd workspace list-group
|
|
651
701
|
# ----------------------------------------------------------------------
|
|
@@ -659,6 +709,7 @@ def list_groups(ctx):
|
|
|
659
709
|
workspace = ctx.workspace()
|
|
660
710
|
print("\n".join(workspace.mets.file_groups))
|
|
661
711
|
|
|
712
|
+
|
|
662
713
|
# ----------------------------------------------------------------------
|
|
663
714
|
# ocrd workspace list-page
|
|
664
715
|
# ----------------------------------------------------------------------
|
|
@@ -669,11 +720,15 @@ def list_groups(ctx):
|
|
|
669
720
|
show_default=True,
|
|
670
721
|
multiple=True,
|
|
671
722
|
type=click.Choice(METS_PAGE_DIV_ATTRIBUTE.names()))
|
|
672
|
-
@click.option('-f', '--output-format', help="Output format", type=click.Choice(['one-per-line', 'comma-separated', 'json']),
|
|
673
|
-
|
|
723
|
+
@click.option('-f', '--output-format', help="Output format", type=click.Choice(['one-per-line', 'comma-separated', 'json']),
|
|
724
|
+
default='one-per-line')
|
|
725
|
+
@click.option('-D', '--chunk-number', help="Partition the return value into n roughly equally sized chunks",
|
|
726
|
+
default=1, type=int)
|
|
674
727
|
@click.option('-C', '--chunk-index', help="Output the nth chunk of results, -1 for all of them.", default=None, type=int)
|
|
675
|
-
@click.option('-r', '--page-id-range', help="Restrict the pages to those matching the provided range,
|
|
676
|
-
|
|
728
|
+
@click.option('-r', '--page-id-range', help="Restrict the pages to those matching the provided range, "
|
|
729
|
+
"based on the @ID attribute. Separate start/end with ..")
|
|
730
|
+
@click.option('-R', '--numeric-range', help="Restrict the pages to those in the range, in numerical document order. "
|
|
731
|
+
"Separate start/end with ..")
|
|
677
732
|
@pass_workspace
|
|
678
733
|
def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page_id_range, numeric_range):
|
|
679
734
|
"""
|
|
@@ -715,6 +770,7 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page
|
|
|
715
770
|
lines.append(dumps(chunks))
|
|
716
771
|
print('\n'.join(lines))
|
|
717
772
|
|
|
773
|
+
|
|
718
774
|
# ----------------------------------------------------------------------
|
|
719
775
|
# ocrd workspace get-id
|
|
720
776
|
# ----------------------------------------------------------------------
|
|
@@ -730,6 +786,7 @@ def get_id(ctx):
|
|
|
730
786
|
if ID:
|
|
731
787
|
print(ID)
|
|
732
788
|
|
|
789
|
+
|
|
733
790
|
# ----------------------------------------------------------------------
|
|
734
791
|
# ocrd workspace set-id
|
|
735
792
|
# ----------------------------------------------------------------------
|
|
@@ -749,8 +806,10 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
|
|
|
749
806
|
workspace.mets.unique_identifier = id
|
|
750
807
|
workspace.save_mets()
|
|
751
808
|
|
|
809
|
+
|
|
752
810
|
@workspace_cli.command('update-page')
|
|
753
|
-
@click.option('--set', 'attr_value_pairs', help=
|
|
811
|
+
@click.option('--set', 'attr_value_pairs', help="set mets:div ATTR to VALUE", metavar="ATTR VALUE",
|
|
812
|
+
type=(click.Choice(METS_PAGE_DIV_ATTRIBUTE.names()), str), nargs=2, multiple=True)
|
|
754
813
|
@click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
|
|
755
814
|
@click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
|
|
756
815
|
@click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
|
|
@@ -777,6 +836,7 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
|
|
|
777
836
|
print(f"Error: {err}")
|
|
778
837
|
sys.exit(1)
|
|
779
838
|
|
|
839
|
+
|
|
780
840
|
# ----------------------------------------------------------------------
|
|
781
841
|
# ocrd workspace merge
|
|
782
842
|
# ----------------------------------------------------------------------
|
|
@@ -784,17 +844,21 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
|
|
|
784
844
|
def _handle_json_option(ctx, param, value):
|
|
785
845
|
return parse_json_string_or_file(value) if value else None
|
|
786
846
|
|
|
847
|
+
|
|
787
848
|
@workspace_cli.command('merge')
|
|
788
849
|
@click.argument('METS_PATH')
|
|
789
|
-
@click.option('--overwrite/--no-overwrite', is_flag=True, default=False,
|
|
790
|
-
|
|
850
|
+
@click.option('--overwrite/--no-overwrite', is_flag=True, default=False,
|
|
851
|
+
help="Overwrite on-disk file in case of file name conflicts with data from METS_PATH")
|
|
852
|
+
@click.option('--force/--no-force', is_flag=True, default=False,
|
|
853
|
+
help="Overwrite mets:file from --mets with mets:file from METS_PATH if IDs clash")
|
|
791
854
|
@click.option('--copy-files/--no-copy-files', is_flag=True, help="Copy files as well", default=True, show_default=True)
|
|
792
855
|
@click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp", callback=_handle_json_option)
|
|
793
856
|
@click.option('--fileId-mapping', help="JSON object mapping src to dest file ID", callback=_handle_json_option)
|
|
794
857
|
@click.option('--pageId-mapping', help="JSON object mapping src to dest page ID", callback=_handle_json_option)
|
|
795
858
|
@mets_find_options
|
|
796
859
|
@pass_workspace
|
|
797
|
-
def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping,
|
|
860
|
+
def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping,
|
|
861
|
+
file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_path): # pylint: disable=redefined-builtin
|
|
798
862
|
"""
|
|
799
863
|
Merges this workspace with the workspace that contains ``METS_PATH``
|
|
800
864
|
|
|
@@ -829,18 +893,20 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
|
|
|
829
893
|
)
|
|
830
894
|
workspace.save_mets()
|
|
831
895
|
|
|
896
|
+
|
|
832
897
|
# ----------------------------------------------------------------------
|
|
833
898
|
# ocrd workspace backup
|
|
834
899
|
# ----------------------------------------------------------------------
|
|
835
900
|
|
|
836
901
|
@workspace_cli.group('backup')
|
|
837
902
|
@pass_workspace
|
|
838
|
-
def workspace_backup_cli(ctx):
|
|
903
|
+
def workspace_backup_cli(ctx): # pylint: disable=unused-argument
|
|
839
904
|
"""
|
|
840
905
|
Backing and restoring workspaces - dev edition
|
|
841
906
|
"""
|
|
842
907
|
assert not ctx.mets_server_url, "Workspace backups currently not interoperable with METS Server"
|
|
843
908
|
|
|
909
|
+
|
|
844
910
|
@workspace_backup_cli.command('add')
|
|
845
911
|
@pass_workspace
|
|
846
912
|
def workspace_backup_add(ctx):
|
|
@@ -850,6 +916,7 @@ def workspace_backup_add(ctx):
|
|
|
850
916
|
backup_manager = ctx.backup_manager()
|
|
851
917
|
backup_manager.add()
|
|
852
918
|
|
|
919
|
+
|
|
853
920
|
@workspace_backup_cli.command('list')
|
|
854
921
|
@pass_workspace
|
|
855
922
|
def workspace_backup_list(ctx):
|
|
@@ -860,9 +927,10 @@ def workspace_backup_list(ctx):
|
|
|
860
927
|
for b in backup_manager.list():
|
|
861
928
|
print(b)
|
|
862
929
|
|
|
930
|
+
|
|
863
931
|
@workspace_backup_cli.command('restore')
|
|
864
932
|
@click.option('-f', '--choose-first', help="Restore first matching version if more than one", is_flag=True)
|
|
865
|
-
@click.argument('bak')
|
|
933
|
+
@click.argument('bak') # type=click.Path(dir_okay=False, readable=True, resolve_path=True))
|
|
866
934
|
@pass_workspace
|
|
867
935
|
def workspace_backup_restore(ctx, choose_first, bak):
|
|
868
936
|
"""
|
|
@@ -871,6 +939,7 @@ def workspace_backup_restore(ctx, choose_first, bak):
|
|
|
871
939
|
backup_manager = ctx.backup_manager()
|
|
872
940
|
backup_manager.restore(bak, choose_first)
|
|
873
941
|
|
|
942
|
+
|
|
874
943
|
@workspace_backup_cli.command('undo')
|
|
875
944
|
@pass_workspace
|
|
876
945
|
def workspace_backup_undo(ctx):
|
|
@@ -887,34 +956,38 @@ def workspace_backup_undo(ctx):
|
|
|
887
956
|
|
|
888
957
|
@workspace_cli.group('server')
|
|
889
958
|
@pass_workspace
|
|
890
|
-
def workspace_serve_cli(ctx):
|
|
959
|
+
def workspace_serve_cli(ctx): # pylint: disable=unused-argument
|
|
891
960
|
"""Control a METS server for this workspace"""
|
|
892
961
|
assert ctx.mets_server_url, "For METS server commands, you must provide '-U/--mets-server-url'"
|
|
893
962
|
|
|
963
|
+
|
|
894
964
|
@workspace_serve_cli.command('stop')
|
|
895
965
|
@pass_workspace
|
|
896
|
-
def workspace_serve_stop(ctx):
|
|
966
|
+
def workspace_serve_stop(ctx): # pylint: disable=unused-argument
|
|
897
967
|
"""Stop the METS server (saving changes to disk)"""
|
|
898
968
|
workspace = ctx.workspace()
|
|
899
969
|
workspace.mets.stop()
|
|
900
970
|
|
|
971
|
+
|
|
901
972
|
@workspace_serve_cli.command('reload')
|
|
902
973
|
@pass_workspace
|
|
903
|
-
def workspace_serve_reload(ctx):
|
|
974
|
+
def workspace_serve_reload(ctx): # pylint: disable=unused-argument
|
|
904
975
|
"""Reload the METS server from disk"""
|
|
905
976
|
workspace = ctx.workspace()
|
|
906
977
|
workspace.mets.reload()
|
|
907
978
|
|
|
979
|
+
|
|
908
980
|
@workspace_serve_cli.command('save')
|
|
909
981
|
@pass_workspace
|
|
910
|
-
def workspace_serve_save(ctx):
|
|
982
|
+
def workspace_serve_save(ctx): # pylint: disable=unused-argument
|
|
911
983
|
"""Save the METS changes to disk"""
|
|
912
984
|
workspace = ctx.workspace()
|
|
913
985
|
workspace.mets.save()
|
|
914
986
|
|
|
987
|
+
|
|
915
988
|
@workspace_serve_cli.command('start')
|
|
916
989
|
@pass_workspace
|
|
917
|
-
def workspace_serve_start(ctx):
|
|
990
|
+
def workspace_serve_start(ctx): # pylint: disable=unused-argument
|
|
918
991
|
"""
|
|
919
992
|
Start a METS server
|
|
920
993
|
|
ocrd/cli/zip.py
CHANGED
|
@@ -16,6 +16,7 @@ from ..resolver import Resolver
|
|
|
16
16
|
from ..workspace import Workspace
|
|
17
17
|
from ..workspace_bagger import WorkspaceBagger
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
@click.group("zip")
|
|
20
21
|
def zip_cli():
|
|
21
22
|
"""
|
|
@@ -23,6 +24,7 @@ def zip_cli():
|
|
|
23
24
|
"""
|
|
24
25
|
initLogging()
|
|
25
26
|
|
|
27
|
+
|
|
26
28
|
# ----------------------------------------------------------------------
|
|
27
29
|
# ocrd zip bag
|
|
28
30
|
# ----------------------------------------------------------------------
|
|
@@ -43,10 +45,12 @@ def zip_cli():
|
|
|
43
45
|
@click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True)
|
|
44
46
|
@click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default=DEFAULT_METS_BASENAME)
|
|
45
47
|
@click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum")
|
|
46
|
-
@click.option('-t', '--tag-file', help="Add a non-payload file to bag",
|
|
48
|
+
@click.option('-t', '--tag-file', help="Add a non-payload file to bag", multiple=True,
|
|
49
|
+
type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True))
|
|
47
50
|
@click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False)
|
|
48
51
|
@click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
|
|
49
|
-
def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identifier, mets,
|
|
52
|
+
def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identifier, mets,
|
|
53
|
+
base_version_checksum, tag_file, skip_zip, processes):
|
|
50
54
|
"""
|
|
51
55
|
Bag workspace as OCRD-ZIP at DEST
|
|
52
56
|
"""
|
|
@@ -66,6 +70,7 @@ def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identi
|
|
|
66
70
|
exclude_fileGrp=exclude_fileGrp,
|
|
67
71
|
)
|
|
68
72
|
|
|
73
|
+
|
|
69
74
|
# ----------------------------------------------------------------------
|
|
70
75
|
# ocrd zip spill
|
|
71
76
|
# ----------------------------------------------------------------------
|
|
@@ -89,6 +94,7 @@ def spill(dest, src):
|
|
|
89
94
|
workspace = workspace_bagger.spill(src, dest)
|
|
90
95
|
print(workspace)
|
|
91
96
|
|
|
97
|
+
|
|
92
98
|
# ----------------------------------------------------------------------
|
|
93
99
|
# ocrd zip validate
|
|
94
100
|
# ----------------------------------------------------------------------
|
|
@@ -97,8 +103,10 @@ def spill(dest, src):
|
|
|
97
103
|
@click.argument('src', type=click.Path(dir_okay=True, readable=True, resolve_path=True), required=True)
|
|
98
104
|
@click.option('-Z', '--skip-unzip', help="Treat SRC as a directory not a ZIP", is_flag=True, default=False)
|
|
99
105
|
@click.option('-B', '--skip-bag', help="Whether to skip all checks of manifests and files", is_flag=True, default=False)
|
|
100
|
-
@click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance",
|
|
101
|
-
|
|
106
|
+
@click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance",
|
|
107
|
+
is_flag=True, default=False)
|
|
108
|
+
@click.option('-D', '--skip-delete', help="Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation",
|
|
109
|
+
is_flag=True, default=False)
|
|
102
110
|
@click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
|
|
103
111
|
def validate(src, **kwargs):
|
|
104
112
|
"""
|
|
@@ -113,6 +121,7 @@ def validate(src, **kwargs):
|
|
|
113
121
|
if not report.is_valid:
|
|
114
122
|
sys.exit(1)
|
|
115
123
|
|
|
124
|
+
|
|
116
125
|
# ----------------------------------------------------------------------
|
|
117
126
|
# ocrd zip update
|
|
118
127
|
# ----------------------------------------------------------------------
|