ocrd 3.0.0b4__py3-none-any.whl → 3.0.0b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/bashlib.py CHANGED
@@ -76,10 +76,10 @@ def bashlib_constants(name):
76
76
  @click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
77
77
  @click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
78
78
  @click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
79
- @click.option('-w', '--working-dir', help="Working Directory")
79
+ @click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server', default=None)
80
+ @click.option('-d', '--working-dir', help="Working Directory")
80
81
  @click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
81
82
  @click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
82
- # repeat some other processor options for convenience (will be ignored here)
83
83
  @click.option('-g', '--page-id', help="ID(s) of the pages to process")
84
84
  @click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
85
85
  "(with '--page-id', remove only those).\n"
@@ -126,9 +126,10 @@ def bashlib_input_files(ocrd_tool, executable, **kwargs):
126
126
  def executable(self):
127
127
  # needed for ocrd_tool lookup
128
128
  return executable
129
+ processor_class = FullBashlibProcessor
129
130
  else:
130
131
  # we have no true metadata file, so fill in just to make it work
131
- class FullBashlibProcessor(BashlibProcessor):
132
+ class UnknownBashlibProcessor(BashlibProcessor):
132
133
  @property
133
134
  def ocrd_tool(self):
134
135
  # needed to satisfy the validator
@@ -142,5 +143,6 @@ def bashlib_input_files(ocrd_tool, executable, **kwargs):
142
143
  def version(self):
143
144
  # needed to satisfy the validator and wrapper
144
145
  return '1.0'
146
+ processor_class = UnknownBashlibProcessor
145
147
 
146
- ocrd_cli_wrap_processor(FullBashlibProcessor, **kwargs)
148
+ ocrd_cli_wrap_processor(processor_class, **kwargs)
ocrd/cli/ocrd_tool.py CHANGED
@@ -125,7 +125,7 @@ def ocrd_tool_tool_list_resources(ctx):
125
125
  @click.argument('res_name')
126
126
  @pass_ocrd_tool
127
127
  def ocrd_tool_tool_resolve_resource(ctx, res_name):
128
- ctx.processor(None).resolve_resource(res_name)
128
+ print(ctx.processor(None).resolve_resource(res_name))
129
129
 
130
130
  @ocrd_tool_tool.command('show-resource', help="Dump a tool's file resource")
131
131
  @click.argument('res_name')
ocrd/cli/validate.py CHANGED
@@ -102,16 +102,19 @@ def validate_page(page, **kwargs):
102
102
  @validate_cli.command('tasks')
103
103
  @click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. If omitted, only validate syntax')
104
104
  @click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME, help='Basename of the METS file, used in conjunction with --workspace')
105
+ @click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server')
105
106
  @click.option('--overwrite', is_flag=True, default=False, help='When checking against a concrete workspace, simulate overwriting output or page range.')
106
107
  @click.option('-g', '--page-id', help="ID(s) of the pages to process")
107
108
  @click.argument('tasks', nargs=-1, required=True)
108
- def validate_process(tasks, workspace, mets_basename, overwrite, page_id):
109
+ def validate_process(tasks, workspace, mets_basename, mets_server_url, overwrite, page_id):
109
110
  '''
110
111
  Validate a sequence of tasks passable to `ocrd process`
111
112
  '''
112
113
  if workspace:
113
- _inform_of_result(validate_tasks([ProcessorTask.parse(t) for t in tasks],
114
- Workspace(Resolver(), directory=workspace, mets_basename=mets_basename), page_id=page_id, overwrite=overwrite))
114
+ _inform_of_result(validate_tasks(
115
+ [ProcessorTask.parse(t) for t in tasks],
116
+ Workspace(Resolver(), directory=workspace, mets_basename=mets_basename, mets_server_url=mets_server_url),
117
+ page_id=page_id, overwrite=overwrite))
115
118
  else:
116
119
  for t in [ProcessorTask.parse(t) for t in tasks]:
117
120
  _inform_of_result(t.validate())
ocrd/cli/workspace.py CHANGED
@@ -36,6 +36,17 @@ class WorkspaceCtx():
36
36
  = self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
37
37
  self.automatic_backup = automatic_backup
38
38
 
39
+ def workspace(self):
40
+ return Workspace(
41
+ self.resolver,
42
+ directory=self.directory,
43
+ mets_basename=self.mets_basename,
44
+ automatic_backup=self.automatic_backup,
45
+ mets_server_url=self.mets_server_url,
46
+ )
47
+ def backup_manager(self):
48
+ return WorkspaceBackupManager(self.workspace())
49
+
39
50
 
40
51
  pass_workspace = click.make_pass_decorator(WorkspaceCtx)
41
52
 
@@ -138,6 +149,8 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
138
149
  LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
139
150
  ctx.directory = workspace_dir
140
151
 
152
+ assert not ctx.mets_server_url, \
153
+ f"clone cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
141
154
  workspace = ctx.resolver.workspace_from_url(
142
155
  mets_url,
143
156
  dst_dir=ctx.directory,
@@ -173,10 +186,12 @@ def workspace_init(ctx, clobber_mets, directory):
173
186
  if directory:
174
187
  LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory))
175
188
  ctx.directory = directory
189
+ assert not ctx.mets_server_url, \
190
+ f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
176
191
  workspace = ctx.resolver.workspace_from_nothing(
177
192
  directory=ctx.directory,
178
193
  mets_basename=ctx.mets_basename,
179
- clobber_mets=clobber_mets
194
+ clobber_mets=clobber_mets,
180
195
  )
181
196
  workspace.save_mets()
182
197
  print(workspace.directory)
@@ -200,13 +215,7 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
200
215
  Add a file or http(s) URL FNAME to METS in a workspace.
201
216
  If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.
202
217
  """
203
- workspace = Workspace(
204
- ctx.resolver,
205
- directory=ctx.directory,
206
- mets_basename=ctx.mets_basename,
207
- automatic_backup=ctx.automatic_backup,
208
- mets_server_url=ctx.mets_server_url,
209
- )
218
+ workspace = ctx.workspace()
210
219
 
211
220
  log = getLogger('ocrd.cli.workspace.add')
212
221
  if not mimetype:
@@ -313,13 +322,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
313
322
 
314
323
  """
315
324
  log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
316
- workspace = Workspace(
317
- ctx.resolver,
318
- directory=ctx.directory,
319
- mets_basename=ctx.mets_basename,
320
- automatic_backup=ctx.automatic_backup,
321
- mets_server_url=ctx.mets_server_url,
322
- )
325
+ workspace = ctx.workspace()
323
326
 
324
327
  try:
325
328
  pat = re.compile(regex)
@@ -455,12 +458,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
455
458
  output_field = [snake_to_camel.get(x, x) for x in output_field]
456
459
  modified_mets = False
457
460
  ret = []
458
- workspace = Workspace(
459
- ctx.resolver,
460
- directory=ctx.directory,
461
- mets_basename=ctx.mets_basename,
462
- mets_server_url=ctx.mets_server_url,
463
- )
461
+ workspace = ctx.workspace()
464
462
  with pushd_popd(workspace.directory):
465
463
  for f in workspace.find_files(
466
464
  file_id=file_id,
@@ -510,7 +508,9 @@ def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefin
510
508
  (If any ``ID`` starts with ``//``, then its remainder
511
509
  will be interpreted as a regular expression.)
512
510
  """
513
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
511
+ assert not ctx.mets_server_url, \
512
+ f"remove cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
513
+ workspace = ctx.workspace()
514
514
  for i in id:
515
515
  workspace.remove_file(i, force=force, keep_file=keep_file)
516
516
  workspace.save_mets()
@@ -528,7 +528,9 @@ def rename_group(ctx, old, new):
528
528
  """
529
529
  Rename fileGrp (USE attribute ``NEW`` to ``OLD``).
530
530
  """
531
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
531
+ assert not ctx.mets_server_url, \
532
+ f"rename-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
533
+ workspace = ctx.workspace()
532
534
  workspace.rename_file_group(old, new)
533
535
  workspace.save_mets()
534
536
 
@@ -549,7 +551,9 @@ def remove_group(ctx, group, recursive, force, keep_files):
549
551
  (If any ``GROUP`` starts with ``//``, then its remainder
550
552
  will be interpreted as a regular expression.)
551
553
  """
552
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
554
+ assert not ctx.mets_server_url, \
555
+ f"remove-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
556
+ workspace = ctx.workspace()
553
557
  for g in group:
554
558
  workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
555
559
  workspace.save_mets()
@@ -571,7 +575,9 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id):
571
575
  (If any ``FILTER`` starts with ``//``, then its remainder
572
576
  will be interpreted as a regular expression.)
573
577
  """
574
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
578
+ assert not ctx.mets_server_url, \
579
+ f"prune-files cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
580
+ workspace = ctx.workspace()
575
581
  with pushd_popd(workspace.directory):
576
582
  for f in workspace.find_files(
577
583
  file_id=file_id,
@@ -608,8 +614,7 @@ def clean(ctx, dry_run, directories, path_glob):
608
614
  If no PATH_GLOB are specified, then all files and directories
609
615
  may match.
610
616
  """
611
- log = getLogger('ocrd.cli.workspace.clean')
612
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
617
+ workspace = ctx.workspace()
613
618
  allowed_files = [normpath(f.local_filename) for f in workspace.find_files(local_only=True)]
614
619
  allowed_files.append(relpath(workspace.mets_target, start=workspace.directory))
615
620
  allowed_dirs = set(dirname(path) for path in allowed_files)
@@ -627,7 +632,7 @@ def clean(ctx, dry_run, directories, path_glob):
627
632
  if normpath(path) in allowed_files:
628
633
  continue
629
634
  if dry_run:
630
- log.info('unlink(%s)' % path)
635
+ ctx.log.info('unlink(%s)' % path)
631
636
  else:
632
637
  unlink(path)
633
638
  if not directories:
@@ -637,7 +642,7 @@ def clean(ctx, dry_run, directories, path_glob):
637
642
  if normpath(path) in allowed_dirs:
638
643
  continue
639
644
  if dry_run:
640
- log.info('rmdir(%s)' % path)
645
+ ctx.log.info('rmdir(%s)' % path)
641
646
  else:
642
647
  rmdir(path)
643
648
 
@@ -651,7 +656,7 @@ def list_groups(ctx):
651
656
  """
652
657
  List fileGrp USE attributes
653
658
  """
654
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
659
+ workspace = ctx.workspace()
655
660
  print("\n".join(workspace.mets.file_groups))
656
661
 
657
662
  # ----------------------------------------------------------------------
@@ -677,20 +682,16 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page
677
682
  (If any ``FILTER`` starts with ``//``, then its remainder
678
683
  will be interpreted as a regular expression.)
679
684
  """
680
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
681
- find_kwargs = {}
682
- if page_id_range and 'ID' in output_field:
683
- find_kwargs['pageId'] = page_id_range
684
- page_ids = sorted({x.pageId for x in workspace.mets.find_files(**find_kwargs) if x.pageId})
685
+ workspace = ctx.workspace()
685
686
  ret = []
686
-
687
- if output_field == ['ID']:
688
- ret = [[x] for x in page_ids]
689
- else:
690
- for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=','.join(page_ids), return_divs=True)):
687
+ if page_id_range or list(output_field) != ['ID']:
688
+ for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=page_id_range, return_divs=True)):
691
689
  ret.append([])
692
690
  for k in output_field:
693
691
  ret[i].append(page_div.get(k, 'None'))
692
+ else:
693
+ for page_id in workspace.mets.physical_pages:
694
+ ret.append([page_id])
694
695
 
695
696
  if numeric_range:
696
697
  start, end = map(int, numeric_range.split('..'))
@@ -724,7 +725,7 @@ def get_id(ctx):
724
725
  """
725
726
  Get METS id if any
726
727
  """
727
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
728
+ workspace = ctx.workspace()
728
729
  ID = workspace.mets.unique_identifier
729
730
  if ID:
730
731
  print(ID)
@@ -744,7 +745,7 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin
744
745
 
745
746
  Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>.
746
747
  """
747
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
748
+ workspace = ctx.workspace()
748
749
  workspace.mets.unique_identifier = id
749
750
  workspace.save_mets()
750
751
 
@@ -767,7 +768,9 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
767
768
  if contentids:
768
769
  update_kwargs['CONTENTIDS'] = contentids
769
770
  try:
770
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
771
+ assert not ctx.mets_server_url, \
772
+ f"update-page cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
773
+ workspace = ctx.workspace()
771
774
  workspace.mets.update_physical_page_attributes(page_id, **update_kwargs)
772
775
  workspace.save_mets()
773
776
  except Exception as err:
@@ -805,7 +808,9 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
805
808
  mets_path = Path(mets_path)
806
809
  if filegrp_mapping:
807
810
  filegrp_mapping = loads(filegrp_mapping)
808
- workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
811
+ assert not ctx.mets_server_url, \
812
+ f"merge cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
813
+ workspace = ctx.workspace()
809
814
  other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name))
810
815
  workspace.merge(
811
816
  other_workspace,
@@ -829,11 +834,12 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
829
834
  # ----------------------------------------------------------------------
830
835
 
831
836
  @workspace_cli.group('backup')
832
- @click.pass_context
837
+ @pass_workspace
833
838
  def workspace_backup_cli(ctx): # pylint: disable=unused-argument
834
839
  """
835
840
  Backing and restoring workspaces - dev edition
836
841
  """
842
+ assert not ctx.mets_server_url, "Workspace backups currently not interoperable with METS Server"
837
843
 
838
844
  @workspace_backup_cli.command('add')
839
845
  @pass_workspace
@@ -841,7 +847,7 @@ def workspace_backup_add(ctx):
841
847
  """
842
848
  Create a new backup
843
849
  """
844
- backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
850
+ backup_manager = ctx.backup_manager()
845
851
  backup_manager.add()
846
852
 
847
853
  @workspace_backup_cli.command('list')
@@ -850,7 +856,7 @@ def workspace_backup_list(ctx):
850
856
  """
851
857
  List backups
852
858
  """
853
- backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
859
+ backup_manager = ctx.backup_manager()
854
860
  for b in backup_manager.list():
855
861
  print(b)
856
862
 
@@ -862,7 +868,7 @@ def workspace_backup_restore(ctx, choose_first, bak):
862
868
  """
863
869
  Restore backup BAK
864
870
  """
865
- backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
871
+ backup_manager = ctx.backup_manager()
866
872
  backup_manager.restore(bak, choose_first)
867
873
 
868
874
  @workspace_backup_cli.command('undo')
@@ -871,7 +877,7 @@ def workspace_backup_undo(ctx):
871
877
  """
872
878
  Restore the last backup
873
879
  """
874
- backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
880
+ backup_manager = ctx.backup_manager()
875
881
  backup_manager.undo()
876
882
 
877
883
 
@@ -888,15 +894,24 @@ def workspace_serve_cli(ctx): # pylint: disable=unused-argument
888
894
  @workspace_serve_cli.command('stop')
889
895
  @pass_workspace
890
896
  def workspace_serve_stop(ctx): # pylint: disable=unused-argument
891
- """Stop the METS server"""
892
- workspace = Workspace(
893
- ctx.resolver,
894
- directory=ctx.directory,
895
- mets_basename=ctx.mets_basename,
896
- mets_server_url=ctx.mets_server_url,
897
- )
897
+ """Stop the METS server (saving changes to disk)"""
898
+ workspace = ctx.workspace()
898
899
  workspace.mets.stop()
899
900
 
901
+ @workspace_serve_cli.command('reload')
902
+ @pass_workspace
903
+ def workspace_serve_reload(ctx): # pylint: disable=unused-argument
904
+ """Reload the METS server from disk"""
905
+ workspace = ctx.workspace()
906
+ workspace.mets.reload()
907
+
908
+ @workspace_serve_cli.command('save')
909
+ @pass_workspace
910
+ def workspace_serve_save(ctx): # pylint: disable=unused-argument
911
+ """Save the METS changes to disk"""
912
+ workspace = ctx.workspace()
913
+ workspace.mets.save()
914
+
900
915
  @workspace_serve_cli.command('start')
901
916
  @pass_workspace
902
917
  def workspace_serve_start(ctx): # pylint: disable=unused-argument
@@ -13,7 +13,6 @@ from ocrd_utils import (
13
13
  redirect_stderr_and_stdout_to_file,
14
14
  )
15
15
  from ocrd_validators import WorkspaceValidator
16
- from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
17
16
 
18
17
  from ..resolver import Resolver
19
18
  from ..processor.base import ResourceNotFoundError, run_processor
@@ -23,8 +22,6 @@ from .parameter_option import parameter_option, parameter_override_option
23
22
  from .ocrd_cli_options import ocrd_cli_options
24
23
  from .mets_find_options import mets_find_options
25
24
 
26
- SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
27
-
28
25
 
29
26
  def ocrd_cli_wrap_processor(
30
27
  processorClass,
@@ -88,11 +85,9 @@ def ocrd_cli_wrap_processor(
88
85
  if list_resources:
89
86
  processor.list_resources()
90
87
  sys.exit()
91
- if subcommand:
88
+ if subcommand or address or queue or database:
92
89
  # Used for checking/starting network agents for the WebAPI architecture
93
90
  check_and_run_network_agent(processorClass, subcommand, address, database, queue)
94
- elif address or queue or database:
95
- raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
96
91
 
97
92
  # from here: single-run processing context
98
93
  initLogging()
@@ -162,6 +157,11 @@ def ocrd_cli_wrap_processor(
162
157
  def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
163
158
  """
164
159
  """
160
+ from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
161
+ SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
162
+
163
+ if not subcommand:
164
+ raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
165
165
  if subcommand not in SUBCOMMANDS:
166
166
  raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
167
167
 
@@ -43,6 +43,7 @@ def ocrd_cli_options(f):
43
43
  option('--address', type=ServerAddressParamType()),
44
44
  option('--queue', type=QueueServerParamType()),
45
45
  option('--database', type=DatabaseParamType()),
46
+ option('-R', '--resolve-resource'),
46
47
  option('-C', '--show-resource'),
47
48
  option('-L', '--list-resources', is_flag=True, default=False),
48
49
  option('-J', '--dump-json', is_flag=True, default=False),
ocrd/lib.bash CHANGED
@@ -27,8 +27,8 @@ ocrd__log () {
27
27
  ## Ensure minimum version
28
28
  # ht https://stackoverflow.com/posts/4025065
29
29
  ocrd__minversion () {
30
- local minversion_raw="$1"
31
30
  set -e
31
+ local minversion_raw="$1"
32
32
  local version_raw=$(ocrd --version|sed 's/ocrd, version //')
33
33
  local version_mmp=$(echo "$version_raw" | grep -Eo '([0-9]+\.?){3}')
34
34
  local version_prerelease_suffix="${version_raw#$version_mmp}"
@@ -123,6 +123,7 @@ ocrd__usage () {
123
123
  ## declare -A ocrd__argv=()
124
124
  ## ```
125
125
  ocrd__parse_argv () {
126
+ set -e
126
127
 
127
128
  # if [[ -n "$ZSH_VERSION" ]];then
128
129
  # print -r -- ${+ocrd__argv} ${(t)ocrd__argv}
@@ -135,11 +136,16 @@ ocrd__parse_argv () {
135
136
  ocrd__raise "Must set \$params (declare -A params)"
136
137
  fi
137
138
 
139
+ if ! declare -p "params_json" >/dev/null 2>/dev/null ;then
140
+ ocrd__raise "Must set \$params_json (declare params_json)"
141
+ fi
142
+
138
143
  if [[ $# = 0 ]];then
139
144
  ocrd__usage
140
145
  exit 1
141
146
  fi
142
147
 
148
+ ocrd__argv[debug]=false
143
149
  ocrd__argv[overwrite]=false
144
150
  ocrd__argv[profile]=false
145
151
  ocrd__argv[profile_file]=
@@ -170,6 +176,7 @@ ocrd__parse_argv () {
170
176
  -w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
171
177
  -m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
172
178
  -U|--mets-server-url) ocrd__argv[mets_server_url]="$2" ; shift ;;
179
+ --debug) ocrd__argv[debug]=true ;;
173
180
  --overwrite) ocrd__argv[overwrite]=true ;;
174
181
  --profile) ocrd__argv[profile]=true ;;
175
182
  --profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
@@ -242,17 +249,6 @@ ocrd__parse_argv () {
242
249
  trap showtime DEBUG
243
250
  fi
244
251
 
245
- # check fileGrps
246
- local _valopts=( --workspace "${ocrd__argv[working_dir]}" --mets-basename "$(basename ${ocrd__argv[mets_file]})" )
247
- if [[ ${ocrd__argv[overwrite]} = true ]]; then
248
- _valopts+=( --overwrite )
249
- fi
250
- if [[ -n "${ocrd__argv[page_id]:-}" ]]; then
251
- _valopts+=( --page-id "${ocrd__argv[page_id]}" )
252
- fi
253
- _valopts+=( "${OCRD_TOOL_NAME#ocrd-} -I ${ocrd__argv[input_file_grp]} -O ${ocrd__argv[output_file_grp]} ${__parameters[*]@Q} ${__parameter_overrides[*]@Q}" )
254
- ocrd validate tasks "${_valopts[@]}" || exit $?
255
-
256
252
  # check parameters
257
253
  local params_parsed retval
258
254
  params_parsed="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params "${__parameters[@]}" "${__parameter_overrides[@]}")" || {
@@ -261,10 +257,12 @@ ocrd__parse_argv () {
261
257
  $params_parsed"
262
258
  }
263
259
  eval "$params_parsed"
260
+ params_json="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params --json "${__parameters[@]}" "${__parameter_overrides[@]}")"
264
261
 
265
262
  }
266
263
 
267
264
  ocrd__wrap () {
265
+ set -e
268
266
 
269
267
  declare -gx OCRD_TOOL_JSON="$1"
270
268
  declare -gx OCRD_TOOL_NAME="$2"
@@ -272,6 +270,7 @@ ocrd__wrap () {
272
270
  shift
273
271
  declare -Agx params
274
272
  params=()
273
+ declare -g params_json
275
274
  declare -Agx ocrd__argv
276
275
  ocrd__argv=()
277
276
 
@@ -293,22 +292,26 @@ ocrd__wrap () {
293
292
 
294
293
  ocrd__parse_argv "$@"
295
294
 
296
- i=0
297
- declare -ag ocrd__files=()
298
- while read line; do
299
- eval declare -Ag "ocrd__file$i=( $line )"
300
- eval "ocrd__files[$i]=ocrd__file$i"
301
- let ++i
302
- done < <(ocrd bashlib input-files \
295
+ declare -ag ocrd__files
296
+ IFS=$'\n'
297
+ ocrd__files=( $(ocrd bashlib input-files \
303
298
  --ocrd-tool $OCRD_TOOL_JSON \
304
299
  --executable $OCRD_TOOL_NAME \
300
+ $(if [[ ${ocrd__argv[debug]} = true ]]; then echo --debug; fi) \
301
+ $(if [[ ${ocrd__argv[overwrite]} = true ]]; then echo --overwrite; fi) \
305
302
  -m "${ocrd__argv[mets_file]}" \
303
+ -d "${ocrd__argv[working_dir]}" \
304
+ ${ocrd__argv[mets_server_url]:+-U} ${ocrd__argv[mets_server_url]:-} \
305
+ -p "$params_json" \
306
306
  -I "${ocrd__argv[input_file_grp]}" \
307
307
  -O "${ocrd__argv[output_file_grp]}" \
308
- ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-})
308
+ ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) )
309
+ IFS=$' \t\n'
309
310
  }
310
311
 
311
312
  ## usage: pageId=$(ocrd__input_file 3 pageId)
312
313
  ocrd__input_file() {
313
- eval echo "\${${ocrd__files[$1]}[$2]}"
314
+ declare -A input_file
315
+ eval input_file=( "${ocrd__files[$1]}" )
316
+ eval echo "${input_file[$2]}"
314
317
  }
ocrd/mets_server.py CHANGED
@@ -88,6 +88,14 @@ class OcrdFileGroupListModel(BaseModel):
88
88
  return OcrdFileGroupListModel(file_groups=file_groups)
89
89
 
90
90
 
91
+ class OcrdPageListModel(BaseModel):
92
+ physical_pages: List[str] = Field()
93
+
94
+ @staticmethod
95
+ def create(physical_pages: List[str]):
96
+ return OcrdPageListModel(physical_pages=physical_pages)
97
+
98
+
91
99
  class OcrdAgentListModel(BaseModel):
92
100
  agents: List[OcrdAgentModel] = Field()
93
101
 
@@ -210,6 +218,17 @@ class ClientSideOcrdMets:
210
218
  ).json()["text"]
211
219
  return self.ws_dir_path
212
220
 
221
+ @property
222
+ def physical_pages(self) -> List[str]:
223
+ if not self.multiplexing_mode:
224
+ return self.session.request("GET", f"{self.url}/physical_pages").json()["physical_pages"]
225
+ else:
226
+ return self.session.request(
227
+ "POST",
228
+ self.url,
229
+ json=MpxReq.physical_pages(self.ws_dir_path)
230
+ ).json()["physical_pages"]
231
+
213
232
  @property
214
233
  def file_groups(self):
215
234
  if not self.multiplexing_mode:
@@ -284,15 +303,17 @@ class ClientSideOcrdMets:
284
303
  file_id=ID, page_id=pageId,
285
304
  mimetype=mimetype, url=url, local_filename=local_filename
286
305
  )
306
+ # add force+ignore
307
+ kwargs = {**kwargs, **data.dict()}
287
308
 
288
309
  if not self.multiplexing_mode:
289
- r = self.session.request("POST", f"{self.url}/file", data=data.dict())
290
- if not r:
291
- raise RuntimeError("Add file failed. Please check provided parameters")
310
+ r = self.session.request("POST", f"{self.url}/file", data=kwargs)
311
+ if not r.ok:
312
+ raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()}")
292
313
  else:
293
- r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, data.dict()))
294
- if "error" in r:
295
- raise RuntimeError(f"Add file failed: Msg: {r['error']}")
314
+ r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, kwargs))
315
+ if not r.ok:
316
+ raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()[errors]}")
296
317
 
297
318
  return ClientSideOcrdFile(
298
319
  None, fileGrp=file_grp,
@@ -347,6 +368,11 @@ class MpxReq:
347
368
  return MpxReq.__args_wrapper(
348
369
  ws_dir_path, method_type="GET", response_type="text", request_url="workspace_path", request_data={})
349
370
 
371
+ @staticmethod
372
+ def physical_pages(ws_dir_path: str) -> Dict:
373
+ return MpxReq.__args_wrapper(
374
+ ws_dir_path, method_type="GET", response_type="dict", request_url="physical_pages", request_data={})
375
+
350
376
  @staticmethod
351
377
  def file_groups(ws_dir_path: str) -> Dict:
352
378
  return MpxReq.__args_wrapper(
@@ -466,6 +492,10 @@ class OcrdMetsServer:
466
492
  async def workspace_path():
467
493
  return Response(content=workspace.directory, media_type="text/plain")
468
494
 
495
+ @app.get(path='/physical_pages', response_model=OcrdPageListModel)
496
+ async def physical_pages():
497
+ return {'physical_pages': workspace.mets.physical_pages}
498
+
469
499
  @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
470
500
  async def file_groups():
471
501
  return {'file_groups': workspace.mets.file_groups}
@@ -505,7 +535,8 @@ class OcrdMetsServer:
505
535
  page_id: Optional[str] = Form(),
506
536
  mimetype: str = Form(),
507
537
  url: Optional[str] = Form(None),
508
- local_filename: Optional[str] = Form(None)
538
+ local_filename: Optional[str] = Form(None),
539
+ force: bool = Form(False),
509
540
  ):
510
541
  """
511
542
  Add a file
@@ -517,7 +548,7 @@ class OcrdMetsServer:
517
548
  )
518
549
  # Add to workspace
519
550
  kwargs = file_resource.dict()
520
- workspace.add_file(**kwargs)
551
+ workspace.add_file(**kwargs, force=force)
521
552
  return file_resource
522
553
 
523
554
  # ------------- #