ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ocrd/cli/__init__.py +8 -6
  2. ocrd/cli/bashlib.py +8 -114
  3. ocrd/cli/network.py +0 -2
  4. ocrd/cli/ocrd_tool.py +26 -4
  5. ocrd/cli/process.py +1 -0
  6. ocrd/cli/resmgr.py +0 -1
  7. ocrd/cli/validate.py +32 -13
  8. ocrd/cli/workspace.py +125 -52
  9. ocrd/cli/zip.py +13 -4
  10. ocrd/decorators/__init__.py +28 -52
  11. ocrd/decorators/loglevel_option.py +4 -0
  12. ocrd/decorators/mets_find_options.py +2 -1
  13. ocrd/decorators/ocrd_cli_options.py +3 -7
  14. ocrd/decorators/parameter_option.py +12 -11
  15. ocrd/mets_server.py +11 -15
  16. ocrd/processor/base.py +88 -71
  17. ocrd/processor/builtin/dummy_processor.py +7 -4
  18. ocrd/processor/builtin/filter_processor.py +3 -2
  19. ocrd/processor/helpers.py +5 -6
  20. ocrd/processor/ocrd_page_result.py +7 -5
  21. ocrd/resolver.py +42 -32
  22. ocrd/task_sequence.py +11 -4
  23. ocrd/workspace.py +64 -54
  24. ocrd/workspace_backup.py +3 -0
  25. ocrd/workspace_bagger.py +15 -8
  26. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
  27. ocrd-3.7.0.dist-info/RECORD +123 -0
  28. ocrd_modelfactory/__init__.py +4 -2
  29. ocrd_models/constants.py +18 -1
  30. ocrd_models/ocrd_agent.py +1 -1
  31. ocrd_models/ocrd_exif.py +7 -3
  32. ocrd_models/ocrd_file.py +24 -19
  33. ocrd_models/ocrd_mets.py +90 -67
  34. ocrd_models/ocrd_page.py +17 -13
  35. ocrd_models/ocrd_xml_base.py +1 -0
  36. ocrd_models/report.py +2 -1
  37. ocrd_models/utils.py +4 -3
  38. ocrd_models/xpath_functions.py +3 -1
  39. ocrd_network/__init__.py +1 -2
  40. ocrd_network/cli/__init__.py +0 -2
  41. ocrd_network/cli/client.py +122 -50
  42. ocrd_network/cli/processing_server.py +1 -2
  43. ocrd_network/client.py +2 -2
  44. ocrd_network/client_utils.py +30 -13
  45. ocrd_network/constants.py +1 -6
  46. ocrd_network/database.py +3 -3
  47. ocrd_network/logging_utils.py +2 -7
  48. ocrd_network/models/__init__.py +0 -2
  49. ocrd_network/models/job.py +31 -33
  50. ocrd_network/models/messages.py +3 -2
  51. ocrd_network/models/workspace.py +5 -5
  52. ocrd_network/process_helpers.py +54 -17
  53. ocrd_network/processing_server.py +63 -114
  54. ocrd_network/processing_worker.py +6 -5
  55. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  56. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  57. ocrd_network/runtime_data/__init__.py +1 -2
  58. ocrd_network/runtime_data/deployer.py +12 -85
  59. ocrd_network/runtime_data/hosts.py +61 -130
  60. ocrd_network/runtime_data/network_agents.py +7 -31
  61. ocrd_network/runtime_data/network_services.py +1 -1
  62. ocrd_network/server_cache.py +1 -1
  63. ocrd_network/server_utils.py +13 -52
  64. ocrd_network/utils.py +1 -0
  65. ocrd_utils/__init__.py +4 -4
  66. ocrd_utils/config.py +86 -76
  67. ocrd_utils/deprecate.py +3 -0
  68. ocrd_utils/image.py +51 -23
  69. ocrd_utils/introspect.py +8 -3
  70. ocrd_utils/logging.py +15 -7
  71. ocrd_utils/os.py +17 -4
  72. ocrd_utils/str.py +32 -16
  73. ocrd_validators/json_validator.py +4 -1
  74. ocrd_validators/ocrd_tool_validator.py +2 -1
  75. ocrd_validators/ocrd_zip_validator.py +5 -4
  76. ocrd_validators/page_validator.py +21 -9
  77. ocrd_validators/parameter_validator.py +3 -2
  78. ocrd_validators/processing_server_config.schema.yml +1 -33
  79. ocrd_validators/resource_list_validator.py +3 -1
  80. ocrd_validators/workspace_validator.py +30 -20
  81. ocrd_validators/xsd_mets_validator.py +2 -1
  82. ocrd_validators/xsd_page_validator.py +2 -1
  83. ocrd_validators/xsd_validator.py +4 -2
  84. ocrd/cli/log.py +0 -51
  85. ocrd/lib.bash +0 -317
  86. ocrd-3.5.1.dist-info/RECORD +0 -128
  87. ocrd_network/cli/processor_server.py +0 -31
  88. ocrd_network/models/ocrd_tool.py +0 -12
  89. ocrd_network/processor_server.py +0 -255
  90. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
  91. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
  92. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
  93. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd/cli/workspace.py CHANGED
@@ -5,7 +5,6 @@ OCR-D CLI: workspace management
5
5
  :prog: ocrd workspace
6
6
  :nested: full
7
7
  """
8
- import os
9
8
  from os import rmdir, unlink
10
9
  from os.path import dirname, relpath, normpath, exists, join, isabs, isdir
11
10
  from pathlib import Path
@@ -19,7 +18,16 @@ import click
19
18
 
20
19
  from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager
21
20
  from ocrd.mets_server import OcrdMetsServer
22
- from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list, DEFAULT_METS_BASENAME
21
+ from ocrd_utils import (
22
+ getLogger,
23
+ initLogging,
24
+ pushd_popd,
25
+ EXT_TO_MIME,
26
+ safe_filename,
27
+ parse_json_string_or_file,
28
+ partition_list,
29
+ DEFAULT_METS_BASENAME,
30
+ )
23
31
  from ocrd.decorators import mets_find_options
24
32
  from . import command_with_replaced_help
25
33
  from ocrd_models.constants import METS_PAGE_DIV_ATTRIBUTE
@@ -32,8 +40,8 @@ class WorkspaceCtx():
32
40
  if mets_basename:
33
41
  self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.'))
34
42
  self.resolver = Resolver()
35
- self.directory, self.mets_url, self.mets_basename, self.mets_server_url \
36
- = self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
43
+ self.directory, self.mets_url, self.mets_basename, self.mets_server_url = \
44
+ self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
37
45
  self.automatic_backup = automatic_backup
38
46
 
39
47
  def workspace(self):
@@ -44,20 +52,24 @@ class WorkspaceCtx():
44
52
  automatic_backup=self.automatic_backup,
45
53
  mets_server_url=self.mets_server_url,
46
54
  )
55
+
47
56
  def backup_manager(self):
48
57
  return WorkspaceBackupManager(self.workspace())
49
58
 
50
59
 
51
60
  pass_workspace = click.make_pass_decorator(WorkspaceCtx)
52
61
 
62
+
53
63
  # ----------------------------------------------------------------------
54
64
  # ocrd workspace
55
65
  # ----------------------------------------------------------------------
56
66
 
57
67
  @click.group("workspace")
58
- @click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR', help='Changes the workspace folder location [default: METS_URL directory or .]"')
68
+ @click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR',
69
+ help='Changes the workspace folder location [default: METS_URL directory or .]"')
59
70
  @click.option('-M', '--mets-basename', default=None, help='METS file basename. Deprecated, use --mets/--directory')
60
- @click.option('-m', '--mets', default=None, help='The path/URL of the METS file [default: WORKSPACE_DIR/mets.xml]', metavar="METS_URL")
71
+ @click.option('-m', '--mets', default=None, metavar="METS_URL",
72
+ help='The path/URL of the METS file [default: WORKSPACE_DIR/mets.xml]')
61
73
  @click.option('-U', '--mets-server-url', 'mets_server_url', help="TCP host URI or UDS path of METS server")
62
74
  @click.option('--backup', default=False, help="Backup mets.xml whenever it is saved.", is_flag=True)
63
75
  @click.pass_context
@@ -67,7 +79,7 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
67
79
 
68
80
  A workspace comprises a METS file and a directory as point of reference.
69
81
 
70
- Operates on the file system directly or via a METS server
82
+ Operates on the file system directly or via a METS server
71
83
  (already running via some prior `server start` subcommand).
72
84
  """
73
85
  initLogging()
@@ -79,6 +91,7 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
79
91
  automatic_backup=backup
80
92
  )
81
93
 
94
+
82
95
  # ----------------------------------------------------------------------
83
96
  # ocrd workspace validate
84
97
  # ----------------------------------------------------------------------
@@ -88,10 +101,12 @@ def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):
88
101
  @pass_workspace
89
102
  @click.option('-a', '--download', is_flag=True, help="Download all files")
90
103
  @click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(
91
- ['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density', 'page', 'page_xsd',
92
- 'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
93
- @click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
94
- @click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
104
+ ['imagefilename', 'alternativeimage_filename', 'alternativeimage_comments', 'dimension', 'pixel_density',
105
+ 'page', 'page_xsd', 'url', 'mets_fileid_page_pcgtsid', 'mets_unique_identifier', 'mets_files', 'mets_xsd']))
106
+ @click.option('--page-textequiv-consistency', '--page-strictness', type=click.Choice(['strict', 'lax', 'fix', 'off']),
107
+ default='strict', help="How strict to check PAGE multi-level textequiv consistency")
108
+ @click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency",
109
+ type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
95
110
  @click.argument('mets_url', default=None, required=False)
96
111
  def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency):
97
112
  """
@@ -105,7 +120,8 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
105
120
  """
106
121
  LOG = getLogger('ocrd.cli.workspace.validate')
107
122
  if mets_url:
108
- LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of argument 'METS_URL' ('%s')" % mets_url))
123
+ LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of "
124
+ "argument 'METS_URL' ('%s')" % mets_url))
109
125
  else:
110
126
  mets_url = ctx.mets_url
111
127
  report = WorkspaceValidator.validate(
@@ -121,6 +137,7 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
121
137
  if not report.is_valid:
122
138
  sys.exit(128)
123
139
 
140
+
124
141
  # ----------------------------------------------------------------------
125
142
  # ocrd workspace clone
126
143
  # ----------------------------------------------------------------------
@@ -128,13 +145,15 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
128
145
  @workspace_cli.command('clone', cls=command_with_replaced_help(
129
146
  (r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
130
147
  @click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
131
- @click.option('-a', '--download', is_flag=True, help="Download all selected files and add local path references in METS file afterwards")
148
+ @click.option('-a', '--download', is_flag=True, help="Download all selected files and add local FLocat "
149
+ "path references in METS file afterwards")
132
150
  @click.argument('mets_url')
133
151
  @mets_find_options
134
152
  # XXX deprecated
135
153
  @click.argument('workspace_dir', default=None, required=False)
136
154
  @pass_workspace
137
- def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_url, workspace_dir):
155
+ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mimetype,
156
+ include_fileGrp, exclude_fileGrp, mets_url, workspace_dir):
138
157
  """
139
158
  Create a workspace from METS_URL and return the directory
140
159
 
@@ -146,7 +165,8 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
146
165
  """
147
166
  LOG = getLogger('ocrd.cli.workspace.clone')
148
167
  if workspace_dir:
149
- LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
168
+ LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of "
169
+ "argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
150
170
  ctx.directory = workspace_dir
151
171
 
152
172
  assert not ctx.mets_server_url, \
@@ -167,6 +187,7 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
167
187
  workspace.save_mets()
168
188
  print(workspace.directory)
169
189
 
190
+
170
191
  # ----------------------------------------------------------------------
171
192
  # ocrd workspace init
172
193
  # ----------------------------------------------------------------------
@@ -184,7 +205,8 @@ def workspace_init(ctx, clobber_mets, directory):
184
205
  """
185
206
  LOG = getLogger('ocrd.cli.workspace.init')
186
207
  if directory:
187
- LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory))
208
+ LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of "
209
+ "argument 'DIRECTORY' ('%s')" % directory))
188
210
  ctx.directory = directory
189
211
  assert not ctx.mets_server_url, \
190
212
  f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
@@ -196,6 +218,7 @@ def workspace_init(ctx, clobber_mets, directory):
196
218
  workspace.save_mets()
197
219
  print(workspace.directory)
198
220
 
221
+
199
222
  # ----------------------------------------------------------------------
200
223
  # ocrd workspace add
201
224
  # ----------------------------------------------------------------------
@@ -203,11 +226,13 @@ def workspace_init(ctx, clobber_mets, directory):
203
226
  @workspace_cli.command('add')
204
227
  @click.option('-G', '--file-grp', help="fileGrp USE", required=True, metavar='FILE_GRP')
205
228
  @click.option('-i', '--file-id', help="ID for the file", required=True, metavar='FILE_ID')
206
- @click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided", required=False, metavar='TYPE')
229
+ @click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided",
230
+ required=False, metavar='TYPE')
207
231
  @click.option('-g', '--page-id', help="ID of the physical page", metavar='PAGE_ID')
208
232
  @click.option('-C', '--check-file-exists', help="Whether to ensure FNAME exists", is_flag=True, default=False)
209
233
  @click.option('--ignore', help="Do not check whether file exists.", default=False, is_flag=True)
210
- @click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.", default=False, is_flag=True)
234
+ @click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.",
235
+ default=False, is_flag=True)
211
236
  @click.argument('fname', required=True)
212
237
  @pass_workspace
213
238
  def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_file_exists, force, fname):
@@ -223,7 +248,8 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
223
248
  mimetype = EXT_TO_MIME[Path(fname).suffix]
224
249
  log.info("Guessed mimetype to be %s" % mimetype)
225
250
  except KeyError:
226
- log.error("Cannot guess mimetype from extension '%s' for '%s'. Set --mimetype explicitly" % (Path(fname).suffix, fname))
251
+ log.error("Cannot guess mimetype from extension '%s' for '%s'. "
252
+ "Set --mimetype explicitly" % (Path(fname).suffix, fname))
227
253
 
228
254
  log.debug("Adding '%s'", fname)
229
255
  local_filename = None
@@ -260,27 +286,34 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
260
286
  workspace.add_file(file_grp, **kwargs)
261
287
  workspace.save_mets()
262
288
 
289
+
263
290
  # ----------------------------------------------------------------------
264
291
  # ocrd workspace bulk-add
265
292
  # ----------------------------------------------------------------------
266
293
 
267
294
  # pylint: disable=broad-except
268
295
  @workspace_cli.command('bulk-add')
269
- @click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths to define named captures usable in the other parameters", required=True)
296
+ @click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths "
297
+ "to define named captures usable in the other parameters", required=True)
270
298
  @click.option('-m', '--mimetype', help="Media type of the file. If not provided, guess from filename", required=False)
271
299
  @click.option('-g', '--page-id', help="physical page ID of the file", required=False)
272
300
  @click.option('-i', '--file-id', help="ID of the file. If not provided, derive from fileGrp and filename", required=False)
273
301
  @click.option('-u', '--url', help="Remote URL of the file", required=False)
274
- @click.option('-l', '--local-filename', help="Local filesystem path in the workspace directory (copied from source file if different)", required=False)
302
+ @click.option('-l', '--local-filename', help="Local filesystem path in the workspace directory "
303
+ "(copied from source file if different)", required=False)
275
304
  @click.option('-G', '--file-grp', help="File group USE of the file", required=True)
276
- @click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview", default=False, is_flag=True)
277
- @click.option('-S', '--source-path', 'src_path_option', help="File path to copy from (if different from FILE_GLOB values)", required=False)
305
+ @click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview",
306
+ default=False, is_flag=True)
307
+ @click.option('-S', '--source-path', 'src_path_option', help="File path to copy from (if different from FILE_GLOB values)",
308
+ required=False)
278
309
  @click.option('-I', '--ignore', help="Disable checking for existing file entries (faster)", default=False, is_flag=True)
279
- @click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)", default=False, is_flag=True)
310
+ @click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)",
311
+ default=False, is_flag=True)
280
312
  @click.option('-s', '--skip', help="Skip files not matching --regex (instead of failing)", default=False, is_flag=True)
281
313
  @click.argument('file_glob', nargs=-1, required=True)
282
314
  @pass_workspace
283
- def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_filename, file_grp, dry_run, file_glob, src_path_option, ignore, force, skip):
315
+ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_filename, file_grp, dry_run,
316
+ file_glob, src_path_option, ignore, force, skip):
284
317
  """
285
318
  Add files in bulk to an OCR-D workspace.
286
319
 
@@ -321,7 +354,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
321
354
  -G '{{ filegrp }}' -g '{{ pageid }}' -i '{{ fileid }}' -S '{{ local_filename }}' -
322
355
 
323
356
  """
324
- log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
357
+ log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
325
358
  workspace = ctx.workspace()
326
359
 
327
360
  try:
@@ -355,7 +388,12 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
355
388
  group_dict = m.groupdict()
356
389
 
357
390
  # set up file info
358
- file_dict = {'local_filename': local_filename, 'url': url, 'mimetype': mimetype, 'file_id': file_id, 'page_id': page_id, 'file_grp': file_grp}
391
+ file_dict = {'local_filename': local_filename,
392
+ 'url': url,
393
+ 'mimetype': mimetype,
394
+ 'file_id': file_id,
395
+ 'page_id': page_id,
396
+ 'file_grp': file_grp}
359
397
 
360
398
  # Flag to track whether 'local_filename' should be 'src'
361
399
  local_filename_is_src = False
@@ -394,7 +432,8 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
394
432
  try:
395
433
  file_dict['mimetype'] = EXT_TO_MIME[srcpath.suffix]
396
434
  except KeyError:
397
- log.error("Cannot guess MIME type from extension '%s' for '%s'. Set --mimetype explicitly" % (srcpath.suffix, srcpath))
435
+ log.error("Cannot guess MIME type from extension '%s' for '%s'. "
436
+ "Set --mimetype explicitly" % (srcpath.suffix, srcpath))
398
437
 
399
438
  # copy files if src != url
400
439
  if local_filename_is_src:
@@ -413,7 +452,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
413
452
  if dry_run:
414
453
  log.info('workspace.add_file(%s)' % file_dict)
415
454
  else:
416
- workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
455
+ workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
417
456
 
418
457
  # save changes to disk
419
458
  workspace.save_mets()
@@ -447,7 +486,8 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
447
486
  @click.option('--keep-files', is_flag=True, help="Do not remove downloaded files from the workspace with --undo-download")
448
487
  @click.option('--wait', type=int, default=0, help="Wait this many seconds between download requests")
449
488
  @pass_workspace
450
- def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, include_fileGrp, exclude_fileGrp, download, undo_download, keep_files, wait):
489
+ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field,
490
+ include_fileGrp, exclude_fileGrp, download, undo_download, keep_files, wait):
451
491
  """
452
492
  Find files.
453
493
 
@@ -467,7 +507,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
467
507
  page_id=page_id,
468
508
  include_fileGrp=include_fileGrp,
469
509
  exclude_fileGrp=exclude_fileGrp,
470
- ):
510
+ ):
471
511
  if download and not f.local_filename:
472
512
  workspace.download_file(f)
473
513
  modified_mets = True
@@ -492,13 +532,15 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
492
532
  for fields in ret:
493
533
  print('\t'.join(fields))
494
534
 
535
+
495
536
  # ----------------------------------------------------------------------
496
537
  # ocrd workspace remove
497
538
  # ----------------------------------------------------------------------
498
539
 
499
540
  @workspace_cli.command('remove')
500
541
  @click.option('-k', '--keep-file', help="Do not delete file from file system", default=False, is_flag=True)
501
- @click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist", default=False, is_flag=True)
542
+ @click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist",
543
+ default=False, is_flag=True)
502
544
  @click.argument('ID', nargs=-1)
503
545
  @pass_workspace
504
546
  def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefined-builtin
@@ -534,13 +576,16 @@ def rename_group(ctx, old, new):
534
576
  workspace.rename_file_group(old, new)
535
577
  workspace.save_mets()
536
578
 
579
+
537
580
  # ----------------------------------------------------------------------
538
581
  # ocrd workspace remove-group
539
582
  # ----------------------------------------------------------------------
540
583
 
541
584
  @workspace_cli.command('remove-group')
542
- @click.option('-r', '--recursive', help="Delete any files in the group before the group itself", default=False, is_flag=True)
543
- @click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS", default=False, is_flag=True)
585
+ @click.option('-r', '--recursive', help="Delete any files in the group before the group itself",
586
+ default=False, is_flag=True)
587
+ @click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS",
588
+ default=False, is_flag=True)
544
589
  @click.option('-k', '--keep-files', help="Do not delete files from file system", default=False, is_flag=True)
545
590
  @click.argument('GROUP', nargs=-1)
546
591
  @pass_workspace
@@ -558,6 +603,7 @@ def remove_group(ctx, group, recursive, force, keep_files):
558
603
  workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
559
604
  workspace.save_mets()
560
605
 
606
+
561
607
  # ----------------------------------------------------------------------
562
608
  # ocrd workspace prune-files
563
609
  # ----------------------------------------------------------------------
@@ -590,16 +636,19 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id):
590
636
  workspace.mets.remove_file(f.ID)
591
637
  except Exception as e:
592
638
  ctx.log.exception("Error removing %f: %s", f, e)
593
- raise(e)
639
+ raise e
594
640
  workspace.save_mets()
595
641
 
642
+
596
643
  # ----------------------------------------------------------------------
597
644
  # ocrd workspace clean
598
645
  # ----------------------------------------------------------------------
599
646
 
600
647
  @workspace_cli.command('clean')
601
- @click.option('-n', '--dry-run', help="Don't actually do anything to the filesystem, just preview", default=False, is_flag=True)
602
- @click.option('-d', '--directories', help="Remove untracked directories in addition to untracked files", default=False, is_flag=True)
648
+ @click.option('-n', '--dry-run', help="Don't actually do anything to the filesystem, just preview",
649
+ default=False, is_flag=True)
650
+ @click.option('-d', '--directories', help="Remove untracked directories in addition to untracked files",
651
+ default=False, is_flag=True)
603
652
  @click.argument('path_glob', nargs=-1, required=False)
604
653
  @pass_workspace
605
654
  def clean(ctx, dry_run, directories, path_glob):
@@ -646,6 +695,7 @@ def clean(ctx, dry_run, directories, path_glob):
646
695
  else:
647
696
  rmdir(path)
648
697
 
698
+
649
699
  # ----------------------------------------------------------------------
650
700
  # ocrd workspace list-group
651
701
  # ----------------------------------------------------------------------
@@ -659,6 +709,7 @@ def list_groups(ctx):
659
709
  workspace = ctx.workspace()
660
710
  print("\n".join(workspace.mets.file_groups))
661
711
 
712
+
662
713
  # ----------------------------------------------------------------------
663
714
  # ocrd workspace list-page
664
715
  # ----------------------------------------------------------------------
@@ -669,11 +720,15 @@ def list_groups(ctx):
669
720
  show_default=True,
670
721
  multiple=True,
671
722
  type=click.Choice(METS_PAGE_DIV_ATTRIBUTE.names()))
672
- @click.option('-f', '--output-format', help="Output format", type=click.Choice(['one-per-line', 'comma-separated', 'json']), default='one-per-line')
673
- @click.option('-D', '--chunk-number', help="Partition the return value into n roughly equally sized chunks", default=1, type=int)
723
+ @click.option('-f', '--output-format', help="Output format", type=click.Choice(['one-per-line', 'comma-separated', 'json']),
724
+ default='one-per-line')
725
+ @click.option('-D', '--chunk-number', help="Partition the return value into n roughly equally sized chunks",
726
+ default=1, type=int)
674
727
  @click.option('-C', '--chunk-index', help="Output the nth chunk of results, -1 for all of them.", default=None, type=int)
675
- @click.option('-r', '--page-id-range', help="Restrict the pages to those matching the provided range, based on the @ID attribute. Separate start/end with ..")
676
- @click.option('-R', '--numeric-range', help="Restrict the pages to those in the range, in numerical document order. Separate start/end with ..")
728
+ @click.option('-r', '--page-id-range', help="Restrict the pages to those matching the provided range, "
729
+ "based on the @ID attribute. Separate start/end with ..")
730
+ @click.option('-R', '--numeric-range', help="Restrict the pages to those in the range, in numerical document order. "
731
+ "Separate start/end with ..")
677
732
  @pass_workspace
678
733
  def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page_id_range, numeric_range):
679
734
  """
@@ -715,6 +770,7 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page
715
770
  lines.append(dumps(chunks))
716
771
  print('\n'.join(lines))
717
772
 
773
+
718
774
  # ----------------------------------------------------------------------
719
775
  # ocrd workspace get-id
720
776
  # ----------------------------------------------------------------------
@@ -730,6 +786,7 @@ def get_id(ctx):
730
786
  if ID:
731
787
  print(ID)
732
788
 
789
+
733
790
  # ----------------------------------------------------------------------
734
791
  # ocrd workspace set-id
735
792
  # ----------------------------------------------------------------------
@@ -749,8 +806,10 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
749
806
  workspace.mets.unique_identifier = id
750
807
  workspace.save_mets()
751
808
 
809
+
752
810
  @workspace_cli.command('update-page')
753
- @click.option('--set', 'attr_value_pairs', help=f"set mets:div ATTR to VALUE. possible keys: {METS_PAGE_DIV_ATTRIBUTE.names()}", metavar="ATTR VALUE", nargs=2, multiple=True)
811
+ @click.option('--set', 'attr_value_pairs', help="set mets:div ATTR to VALUE", metavar="ATTR VALUE",
812
+ type=(click.Choice(METS_PAGE_DIV_ATTRIBUTE.names()), str), nargs=2, multiple=True)
754
813
  @click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
755
814
  @click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
756
815
  @click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
@@ -777,6 +836,7 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
777
836
  print(f"Error: {err}")
778
837
  sys.exit(1)
779
838
 
839
+
780
840
  # ----------------------------------------------------------------------
781
841
  # ocrd workspace merge
782
842
  # ----------------------------------------------------------------------
@@ -784,17 +844,21 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
784
844
  def _handle_json_option(ctx, param, value):
785
845
  return parse_json_string_or_file(value) if value else None
786
846
 
847
+
787
848
  @workspace_cli.command('merge')
788
849
  @click.argument('METS_PATH')
789
- @click.option('--overwrite/--no-overwrite', is_flag=True, default=False, help="Overwrite on-disk file in case of file name conflicts with data from METS_PATH")
790
- @click.option('--force/--no-force', is_flag=True, default=False, help="Overwrite mets:file from --mets with mets:file from METS_PATH if IDs clash")
850
+ @click.option('--overwrite/--no-overwrite', is_flag=True, default=False,
851
+ help="Overwrite on-disk file in case of file name conflicts with data from METS_PATH")
852
+ @click.option('--force/--no-force', is_flag=True, default=False,
853
+ help="Overwrite mets:file from --mets with mets:file from METS_PATH if IDs clash")
791
854
  @click.option('--copy-files/--no-copy-files', is_flag=True, help="Copy files as well", default=True, show_default=True)
792
855
  @click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp", callback=_handle_json_option)
793
856
  @click.option('--fileId-mapping', help="JSON object mapping src to dest file ID", callback=_handle_json_option)
794
857
  @click.option('--pageId-mapping', help="JSON object mapping src to dest page ID", callback=_handle_json_option)
795
858
  @mets_find_options
796
859
  @pass_workspace
797
- def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping, file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_path): # pylint: disable=redefined-builtin
860
+ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping,
861
+ file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_path): # pylint: disable=redefined-builtin
798
862
  """
799
863
  Merges this workspace with the workspace that contains ``METS_PATH``
800
864
 
@@ -829,18 +893,20 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
829
893
  )
830
894
  workspace.save_mets()
831
895
 
896
+
832
897
  # ----------------------------------------------------------------------
833
898
  # ocrd workspace backup
834
899
  # ----------------------------------------------------------------------
835
900
 
836
901
  @workspace_cli.group('backup')
837
902
  @pass_workspace
838
- def workspace_backup_cli(ctx): # pylint: disable=unused-argument
903
+ def workspace_backup_cli(ctx): # pylint: disable=unused-argument
839
904
  """
840
905
  Backing and restoring workspaces - dev edition
841
906
  """
842
907
  assert not ctx.mets_server_url, "Workspace backups currently not interoperable with METS Server"
843
908
 
909
+
844
910
  @workspace_backup_cli.command('add')
845
911
  @pass_workspace
846
912
  def workspace_backup_add(ctx):
@@ -850,6 +916,7 @@ def workspace_backup_add(ctx):
850
916
  backup_manager = ctx.backup_manager()
851
917
  backup_manager.add()
852
918
 
919
+
853
920
  @workspace_backup_cli.command('list')
854
921
  @pass_workspace
855
922
  def workspace_backup_list(ctx):
@@ -860,9 +927,10 @@ def workspace_backup_list(ctx):
860
927
  for b in backup_manager.list():
861
928
  print(b)
862
929
 
930
+
863
931
  @workspace_backup_cli.command('restore')
864
932
  @click.option('-f', '--choose-first', help="Restore first matching version if more than one", is_flag=True)
865
- @click.argument('bak') #, type=click.Path(dir_okay=False, readable=True, resolve_path=True))
933
+ @click.argument('bak') # type=click.Path(dir_okay=False, readable=True, resolve_path=True))
866
934
  @pass_workspace
867
935
  def workspace_backup_restore(ctx, choose_first, bak):
868
936
  """
@@ -871,6 +939,7 @@ def workspace_backup_restore(ctx, choose_first, bak):
871
939
  backup_manager = ctx.backup_manager()
872
940
  backup_manager.restore(bak, choose_first)
873
941
 
942
+
874
943
  @workspace_backup_cli.command('undo')
875
944
  @pass_workspace
876
945
  def workspace_backup_undo(ctx):
@@ -887,34 +956,38 @@ def workspace_backup_undo(ctx):
887
956
 
888
957
  @workspace_cli.group('server')
889
958
  @pass_workspace
890
- def workspace_serve_cli(ctx): # pylint: disable=unused-argument
959
+ def workspace_serve_cli(ctx): # pylint: disable=unused-argument
891
960
  """Control a METS server for this workspace"""
892
961
  assert ctx.mets_server_url, "For METS server commands, you must provide '-U/--mets-server-url'"
893
962
 
963
+
894
964
  @workspace_serve_cli.command('stop')
895
965
  @pass_workspace
896
- def workspace_serve_stop(ctx): # pylint: disable=unused-argument
966
+ def workspace_serve_stop(ctx): # pylint: disable=unused-argument
897
967
  """Stop the METS server (saving changes to disk)"""
898
968
  workspace = ctx.workspace()
899
969
  workspace.mets.stop()
900
970
 
971
+
901
972
  @workspace_serve_cli.command('reload')
902
973
  @pass_workspace
903
- def workspace_serve_reload(ctx): # pylint: disable=unused-argument
974
+ def workspace_serve_reload(ctx): # pylint: disable=unused-argument
904
975
  """Reload the METS server from disk"""
905
976
  workspace = ctx.workspace()
906
977
  workspace.mets.reload()
907
978
 
979
+
908
980
  @workspace_serve_cli.command('save')
909
981
  @pass_workspace
910
- def workspace_serve_save(ctx): # pylint: disable=unused-argument
982
+ def workspace_serve_save(ctx): # pylint: disable=unused-argument
911
983
  """Save the METS changes to disk"""
912
984
  workspace = ctx.workspace()
913
985
  workspace.mets.save()
914
986
 
987
+
915
988
  @workspace_serve_cli.command('start')
916
989
  @pass_workspace
917
- def workspace_serve_start(ctx): # pylint: disable=unused-argument
990
+ def workspace_serve_start(ctx): # pylint: disable=unused-argument
918
991
  """
919
992
  Start a METS server
920
993
 
ocrd/cli/zip.py CHANGED
@@ -16,6 +16,7 @@ from ..resolver import Resolver
16
16
  from ..workspace import Workspace
17
17
  from ..workspace_bagger import WorkspaceBagger
18
18
 
19
+
19
20
  @click.group("zip")
20
21
  def zip_cli():
21
22
  """
@@ -23,6 +24,7 @@ def zip_cli():
23
24
  """
24
25
  initLogging()
25
26
 
27
+
26
28
  # ----------------------------------------------------------------------
27
29
  # ocrd zip bag
28
30
  # ----------------------------------------------------------------------
@@ -43,10 +45,12 @@ def zip_cli():
43
45
  @click.option('-i', '--identifier', '--id', help="Ocrd-Identifier", required=True)
44
46
  @click.option('-m', '--mets', help="location of mets.xml in the bag's data dir", default=DEFAULT_METS_BASENAME)
45
47
  @click.option('-b', '--base-version-checksum', help="Ocrd-Base-Version-Checksum")
46
- @click.option('-t', '--tag-file', help="Add a non-payload file to bag", type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True), multiple=True)
48
+ @click.option('-t', '--tag-file', help="Add a non-payload file to bag", multiple=True,
49
+ type=click.Path(file_okay=True, dir_okay=False, readable=True, resolve_path=True))
47
50
  @click.option('-Z', '--skip-zip', help="Create a directory but do not ZIP it", is_flag=True, default=False)
48
51
  @click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
49
- def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identifier, mets, base_version_checksum, tag_file, skip_zip, processes):
52
+ def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identifier, mets,
53
+ base_version_checksum, tag_file, skip_zip, processes):
50
54
  """
51
55
  Bag workspace as OCRD-ZIP at DEST
52
56
  """
@@ -66,6 +70,7 @@ def bag(directory, mets_basename, dest, include_fileGrp, exclude_fileGrp, identi
66
70
  exclude_fileGrp=exclude_fileGrp,
67
71
  )
68
72
 
73
+
69
74
  # ----------------------------------------------------------------------
70
75
  # ocrd zip spill
71
76
  # ----------------------------------------------------------------------
@@ -89,6 +94,7 @@ def spill(dest, src):
89
94
  workspace = workspace_bagger.spill(src, dest)
90
95
  print(workspace)
91
96
 
97
+
92
98
  # ----------------------------------------------------------------------
93
99
  # ocrd zip validate
94
100
  # ----------------------------------------------------------------------
@@ -97,8 +103,10 @@ def spill(dest, src):
97
103
  @click.argument('src', type=click.Path(dir_okay=True, readable=True, resolve_path=True), required=True)
98
104
  @click.option('-Z', '--skip-unzip', help="Treat SRC as a directory not a ZIP", is_flag=True, default=False)
99
105
  @click.option('-B', '--skip-bag', help="Whether to skip all checks of manifests and files", is_flag=True, default=False)
100
- @click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance", is_flag=True, default=False)
101
- @click.option('-D', '--skip-delete', help="Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation", is_flag=True, default=False)
106
+ @click.option('-C', '--skip-checksums', help="Whether to omit checksum checks but still check basic BagIt conformance",
107
+ is_flag=True, default=False)
108
+ @click.option('-D', '--skip-delete', help="Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation",
109
+ is_flag=True, default=False)
102
110
  @click.option('-j', '--processes', help="Number of parallel processes", type=int, default=1)
103
111
  def validate(src, **kwargs):
104
112
  """
@@ -113,6 +121,7 @@ def validate(src, **kwargs):
113
121
  if not report.is_valid:
114
122
  sys.exit(1)
115
123
 
124
+
116
125
  # ----------------------------------------------------------------------
117
126
  # ocrd zip update
118
127
  # ----------------------------------------------------------------------