ocrd 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. ocrd/cli/__init__.py +6 -2
  2. ocrd/cli/bashlib.py +7 -2
  3. ocrd/cli/log.py +7 -2
  4. ocrd/cli/network.py +0 -2
  5. ocrd/cli/ocrd_tool.py +26 -4
  6. ocrd/cli/process.py +1 -0
  7. ocrd/cli/resmgr.py +0 -1
  8. ocrd/cli/validate.py +32 -13
  9. ocrd/cli/workspace.py +125 -52
  10. ocrd/cli/zip.py +13 -4
  11. ocrd/decorators/__init__.py +28 -52
  12. ocrd/decorators/loglevel_option.py +4 -0
  13. ocrd/decorators/mets_find_options.py +2 -1
  14. ocrd/decorators/ocrd_cli_options.py +3 -7
  15. ocrd/decorators/parameter_option.py +12 -11
  16. ocrd/lib.bash +6 -13
  17. ocrd/mets_server.py +6 -10
  18. ocrd/processor/base.py +88 -71
  19. ocrd/processor/builtin/dummy_processor.py +7 -4
  20. ocrd/processor/builtin/filter_processor.py +3 -2
  21. ocrd/processor/helpers.py +5 -6
  22. ocrd/processor/ocrd_page_result.py +7 -5
  23. ocrd/resolver.py +42 -32
  24. ocrd/task_sequence.py +11 -4
  25. ocrd/workspace.py +64 -54
  26. ocrd/workspace_backup.py +3 -0
  27. ocrd/workspace_bagger.py +15 -8
  28. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/METADATA +3 -2
  29. ocrd-3.6.0.dist-info/RECORD +125 -0
  30. ocrd_modelfactory/__init__.py +4 -2
  31. ocrd_models/constants.py +18 -1
  32. ocrd_models/ocrd_agent.py +1 -1
  33. ocrd_models/ocrd_exif.py +7 -3
  34. ocrd_models/ocrd_file.py +24 -19
  35. ocrd_models/ocrd_mets.py +90 -67
  36. ocrd_models/ocrd_page.py +17 -13
  37. ocrd_models/ocrd_xml_base.py +1 -0
  38. ocrd_models/report.py +2 -1
  39. ocrd_models/utils.py +4 -3
  40. ocrd_models/xpath_functions.py +3 -1
  41. ocrd_network/__init__.py +1 -2
  42. ocrd_network/cli/__init__.py +0 -2
  43. ocrd_network/cli/client.py +122 -50
  44. ocrd_network/cli/processing_server.py +1 -2
  45. ocrd_network/client.py +2 -2
  46. ocrd_network/client_utils.py +30 -13
  47. ocrd_network/constants.py +1 -6
  48. ocrd_network/database.py +3 -3
  49. ocrd_network/logging_utils.py +2 -7
  50. ocrd_network/models/__init__.py +0 -2
  51. ocrd_network/models/job.py +2 -5
  52. ocrd_network/models/workspace.py +1 -1
  53. ocrd_network/process_helpers.py +54 -17
  54. ocrd_network/processing_server.py +63 -114
  55. ocrd_network/processing_worker.py +6 -5
  56. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  57. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  58. ocrd_network/runtime_data/__init__.py +1 -2
  59. ocrd_network/runtime_data/deployer.py +12 -85
  60. ocrd_network/runtime_data/hosts.py +61 -130
  61. ocrd_network/runtime_data/network_agents.py +7 -31
  62. ocrd_network/runtime_data/network_services.py +1 -1
  63. ocrd_network/server_cache.py +1 -1
  64. ocrd_network/server_utils.py +13 -52
  65. ocrd_network/utils.py +1 -0
  66. ocrd_utils/__init__.py +4 -4
  67. ocrd_utils/config.py +86 -76
  68. ocrd_utils/deprecate.py +3 -0
  69. ocrd_utils/image.py +51 -23
  70. ocrd_utils/introspect.py +8 -3
  71. ocrd_utils/logging.py +12 -7
  72. ocrd_utils/os.py +16 -3
  73. ocrd_utils/str.py +32 -16
  74. ocrd_validators/json_validator.py +4 -1
  75. ocrd_validators/ocrd_tool_validator.py +2 -1
  76. ocrd_validators/ocrd_zip_validator.py +5 -4
  77. ocrd_validators/page_validator.py +21 -9
  78. ocrd_validators/parameter_validator.py +3 -2
  79. ocrd_validators/processing_server_config.schema.yml +1 -33
  80. ocrd_validators/resource_list_validator.py +3 -1
  81. ocrd_validators/workspace_validator.py +30 -20
  82. ocrd_validators/xsd_mets_validator.py +2 -1
  83. ocrd_validators/xsd_page_validator.py +2 -1
  84. ocrd_validators/xsd_validator.py +4 -2
  85. ocrd-3.5.0.dist-info/RECORD +0 -128
  86. ocrd_network/cli/processor_server.py +0 -31
  87. ocrd_network/models/ocrd_tool.py +0 -12
  88. ocrd_network/processor_server.py +0 -255
  89. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/LICENSE +0 -0
  90. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/WHEEL +0 -0
  91. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/entry_points.txt +0 -0
  92. {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,7 @@ def ocrd_cli_wrap_processor(
30
30
  working_dir=None,
31
31
  dump_json=False,
32
32
  dump_module_dir=False,
33
- help=False, # pylint: disable=redefined-builtin
33
+ help=False, # pylint: disable=redefined-builtin
34
34
  profile=False,
35
35
  profile_file=None,
36
36
  version=False,
@@ -41,7 +41,6 @@ def ocrd_cli_wrap_processor(
41
41
  list_resources=False,
42
42
  # ocrd_network params start #
43
43
  subcommand=None,
44
- address=None,
45
44
  queue=None,
46
45
  log_filename=None,
47
46
  database=None,
@@ -88,9 +87,8 @@ def ocrd_cli_wrap_processor(
88
87
  if list_resources:
89
88
  processor.list_resources()
90
89
  sys.exit()
91
- if subcommand or address or queue or database:
92
- # Used for checking/starting network agents for the WebAPI architecture
93
- check_and_run_network_agent(processorClass, subcommand, address, database, queue)
90
+ if subcommand == "worker" or queue or database:
91
+ check_and_run_processing_worker(processorClass, database, queue)
94
92
 
95
93
  if 'parameter' in kwargs:
96
94
  # Disambiguate parameter file/literal, and resolve file
@@ -110,10 +108,10 @@ def ocrd_cli_wrap_processor(
110
108
  if not kwargs.get('input_file_grp', None):
111
109
  raise ValueError('-I/--input-file-grp is required')
112
110
  if 'output_file_grp' not in kwargs:
113
- raise ValueError('-O/--output-file-grp is required') # actually, it may be None
111
+ raise ValueError('-O/--output-file-grp is required') # actually, it may be None
114
112
  resolver = Resolver()
115
113
  working_dir, mets, _, mets_server_url = \
116
- resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url)
114
+ resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url)
117
115
  workspace = resolver.workspace_from_url(mets, working_dir, mets_server_url=mets_server_url)
118
116
  page_id = kwargs.get('page_id')
119
117
  if debug:
@@ -122,7 +120,10 @@ def ocrd_cli_wrap_processor(
122
120
  config.OCRD_EXISTING_OUTPUT = 'ABORT'
123
121
  if overwrite:
124
122
  config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
125
- report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
123
+ report = WorkspaceValidator.check_file_grp(workspace,
124
+ kwargs['input_file_grp'],
125
+ '' if overwrite else kwargs['output_file_grp'],
126
+ page_id)
126
127
  if not report.is_valid:
127
128
  raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
128
129
  # Set up profiling behavior from environment variables/flags
@@ -138,6 +139,7 @@ def ocrd_cli_wrap_processor(
138
139
  print("Profiling...")
139
140
  pr = cProfile.Profile()
140
141
  pr.enable()
142
+
141
143
  def goexit():
142
144
  pr.disable()
143
145
  print("Profiling completed")
@@ -146,6 +148,7 @@ def ocrd_cli_wrap_processor(
146
148
  s = io.StringIO()
147
149
  pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats()
148
150
  print(s.getvalue())
151
+
149
152
  atexit.register(goexit)
150
153
  if log_filename:
151
154
  log_ctx = redirect_stderr_and_stdout_to_file(log_filename)
@@ -155,53 +158,26 @@ def ocrd_cli_wrap_processor(
155
158
  run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
156
159
 
157
160
 
158
- def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
159
- """
161
+ def check_and_run_processing_worker(ProcessorClass, database: str, queue: str):
162
+ """ Check/start Processing Worker for the WebAPI architecture
160
163
  """
161
- from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
162
- SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
163
-
164
- if not subcommand:
165
- raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
166
- if subcommand not in SUBCOMMANDS:
167
- raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
164
+ from ocrd_network import ProcessingWorker
168
165
 
169
166
  if not database:
170
- raise ValueError(f"Option '--database' is invalid for subcommand {subcommand}")
171
-
172
- if subcommand == AgentType.PROCESSOR_SERVER:
173
- if not address:
174
- raise ValueError(f"Option '--address' required for subcommand {subcommand}")
175
- if queue:
176
- raise ValueError(f"Option '--queue' invalid for subcommand {subcommand}")
177
- if subcommand == AgentType.PROCESSING_WORKER:
178
- if address:
179
- raise ValueError(f"Option '--address' invalid for subcommand {subcommand}")
180
- if not queue:
181
- raise ValueError(f"Option '--queue' required for subcommand {subcommand}")
167
+ raise ValueError("Option '--database' is required for the Processing Worker")
168
+ if not queue:
169
+ raise ValueError("Option '--queue' is required for the Processing Worker")
182
170
 
183
171
  processor = ProcessorClass(workspace=None)
184
- if subcommand == AgentType.PROCESSING_WORKER:
185
- processing_worker = ProcessingWorker(
186
- rabbitmq_addr=queue,
187
- mongodb_addr=database,
188
- processor_name=processor.ocrd_tool['executable'],
189
- ocrd_tool=processor.ocrd_tool,
190
- processor_class=ProcessorClass,
191
- )
192
- # The RMQConsumer is initialized and a connection to the RabbitMQ is performed
193
- processing_worker.connect_consumer()
194
- # Start consuming from the queue with name `processor_name`
195
- processing_worker.start_consuming()
196
- elif subcommand == AgentType.PROCESSOR_SERVER:
197
- # TODO: Better validate that inside the ProcessorServer itself
198
- host, port = address.split(':')
199
- processor_server = ProcessorServer(
200
- mongodb_addr=database,
201
- processor_name=processor.ocrd_tool['executable'],
202
- processor_class=ProcessorClass,
203
- )
204
- processor_server.run_server(host=host, port=int(port))
205
- else:
206
- raise ValueError(f"Unknown network agent type, must be one of: {SUBCOMMANDS}")
172
+ processing_worker = ProcessingWorker(
173
+ rabbitmq_addr=queue,
174
+ mongodb_addr=database,
175
+ processor_name=processor.ocrd_tool['executable'],
176
+ ocrd_tool=processor.ocrd_tool,
177
+ processor_class=ProcessorClass,
178
+ )
179
+ # The RMQConsumer is initialized and a connection to the RabbitMQ is performed
180
+ processing_worker.connect_consumer()
181
+ # Start consuming from the queue with name `processor_name`
182
+ processing_worker.start_consuming()
207
183
  sys.exit(0)
@@ -1,14 +1,17 @@
1
1
  import click
2
2
  from ocrd_utils.logging import setOverrideLogLevel
3
3
 
4
+
4
5
  __all__ = ['ocrd_loglevel']
5
6
 
7
+
6
8
  def _setOverrideLogLevel(ctx, param, value): # pylint: disable=unused-argument
7
9
  if value is None: # Explicitly test for None because logging.DEBUG == 0
8
10
  return
9
11
  setOverrideLogLevel(value)
10
12
  return value
11
13
 
14
+
12
15
  loglevel_option = click.option('-l', '--log-level', help="Log level",
13
16
  type=click.Choice([
14
17
  'OFF', 'ERROR', 'WARN',
@@ -16,6 +19,7 @@ loglevel_option = click.option('-l', '--log-level', help="Log level",
16
19
  ]),
17
20
  default=None, callback=_setOverrideLogLevel)
18
21
 
22
+
19
23
  def ocrd_loglevel(f):
20
24
  """
21
25
  Add an option '--log-level' to set the log level.
@@ -1,7 +1,8 @@
1
1
  from click import option
2
2
 
3
+
3
4
  def mets_find_options(f):
4
- for opt in [
5
+ for opt in [
5
6
  option('-G', '--file-grp', help="fileGrp USE", metavar='FILTER'),
6
7
  option('-m', '--mimetype', help="Media type to look for", metavar='FILTER'),
7
8
  option('-g', '--page-id', help="Page ID", metavar='FILTER'),
@@ -1,12 +1,10 @@
1
1
  import click
2
- from click import option, Path, group, command, argument
2
+ from click import option, Path, argument
3
3
  from ocrd_utils import DEFAULT_METS_BASENAME
4
- from ocrd_network import AgentType
5
4
  from .parameter_option import parameter_option, parameter_override_option
6
5
  from .loglevel_option import loglevel_option
7
6
  from ocrd_network import (
8
7
  DatabaseParamType,
9
- ServerAddressParamType,
10
8
  QueueServerParamType
11
9
  )
12
10
 
@@ -40,7 +38,6 @@ def ocrd_cli_options(f):
40
38
  parameter_override_option,
41
39
  loglevel_option,
42
40
  option('--log-filename', default=None),
43
- option('--address', type=ServerAddressParamType()),
44
41
  option('--queue', type=QueueServerParamType()),
45
42
  option('--database', type=DatabaseParamType()),
46
43
  option('-R', '--resolve-resource'),
@@ -50,13 +47,12 @@ def ocrd_cli_options(f):
50
47
  option('-D', '--dump-module-dir', is_flag=True, default=False),
51
48
  option('-h', '--help', is_flag=True, default=False),
52
49
  option('-V', '--version', is_flag=True, default=False),
53
- # Subcommand, only used for 'worker'/'server'. Cannot be handled in
50
+ # Subcommand, only used for 'worker'. Cannot be handled in
54
51
  # click because processors use the @command decorator and even if they
55
52
  # were using `group`, you cannot combine have a command with
56
53
  # subcommands. So we have to work around that by creating a
57
54
  # pseudo-subcommand handled in ocrd_cli_wrap_processor
58
- argument('subcommand', nargs=1, required=False,
59
- type=click.Choice(list(map(str, AgentType)))),
55
+ argument('subcommand', nargs=1, required=False, type=click.Choice(["worker"])),
60
56
  ]
61
57
  for param in params:
62
58
  param(f)
@@ -7,17 +7,18 @@ def _handle_param_option(ctx, param, value):
7
7
  from ocrd_utils import parse_json_string_or_file
8
8
  return parse_json_string_or_file(*list(value))
9
9
 
10
+
10
11
  parameter_option = option('-p', '--parameter',
11
- help="Parameters, either JSON string or path to JSON file",
12
- multiple=True,
13
- default=[],
14
- # now handled in ocrd_cli_wrap_processor to resolve processor preset files
15
- # callback=_handle_param_option
16
- callback=lambda ctx, param, kv: list(kv))
12
+ help="Parameters, either JSON string or path to JSON file",
13
+ multiple=True,
14
+ default=[],
15
+ # now handled in ocrd_cli_wrap_processor to resolve processor preset files
16
+ # callback=_handle_param_option
17
+ callback=lambda ctx, param, kv: list(kv))
17
18
 
18
19
  parameter_override_option = option('-P', '--parameter-override',
19
- help="Parameter override",
20
- nargs=2,
21
- multiple=True,
22
- callback=lambda ctx, param, kv: kv)
23
- # callback=lambda ctx, param, kv: {kv[0]: kv[1]})
20
+ help="Parameter override",
21
+ nargs=2,
22
+ multiple=True,
23
+ # callback=lambda ctx, param, kv: {kv[0]: kv[1]})
24
+ callback=lambda ctx, param, kv: kv)
ocrd/lib.bash CHANGED
@@ -183,30 +183,23 @@ ocrd__parse_argv () {
183
183
  -V|--version) ocrd ocrd-tool "$OCRD_TOOL_JSON" version; exit ;;
184
184
  --queue) ocrd__worker_queue="$2" ; shift ;;
185
185
  --database) ocrd__worker_database="$2" ; shift ;;
186
- --address) ocrd__worker_address="$2" ; shift ;;
187
186
  *) ocrd__raise "Unknown option '$1'" ;;
188
187
  esac
189
188
  shift
190
189
  done
191
190
 
192
- if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand -o -v ocrd__worker_address ]; then
191
+ if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand ]; then
193
192
  if ! [ -v ocrd__subcommand ] ; then
194
- ocrd__raise "Provide subcommand 'worker' or 'server' for Processing Worker / Processor Server"
193
+ ocrd__raise "Provide subcommand 'worker' for Processing Worker"
195
194
  elif ! [ -v ocrd__worker_database ]; then
196
- ocrd__raise "For the Processing Worker / Processor Server --database is required"
195
+ ocrd__raise "For the Processing Worker --database is required"
196
+ elif ! [ -v ocrd__worker_queue ]; then
197
+ ocrd__raise "For the Processing Worker --queue is required"
197
198
  fi
198
199
  if [ ${ocrd__subcommand} = "worker" ]; then
199
- if ! [ -v ocrd__worker_queue ]; then
200
- ocrd__raise "For the Processing Worker --queue is required"
201
- fi
202
200
  ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
203
- elif [ ${ocrd__subcommand} = "server" ]; then
204
- if ! [ -v ocrd__worker_address ]; then
205
- ocrd__raise "For the Processor Server --address is required"
206
- fi
207
- ocrd network processor-server $OCRD_TOOL_NAME --database "${ocrd__worker_database}" --address "${ocrd__worker_address}"
208
201
  else
209
- ocrd__raise "subcommand must be either 'worker' or 'server' not '${ocrd__subcommand}'"
202
+ ocrd__raise "subcommand must be 'worker' not '${ocrd__subcommand}'"
210
203
  fi
211
204
  exit
212
205
  fi
ocrd/mets_server.py CHANGED
@@ -3,12 +3,12 @@
3
3
  """
4
4
  import os
5
5
  import re
6
- from os import _exit, chmod
6
+ from os import chmod
7
7
  import signal
8
8
  from typing import Dict, Optional, Union, List, Tuple
9
9
  from time import sleep
10
10
  from pathlib import Path
11
- from subprocess import Popen, run as subprocess_run
11
+ from subprocess import Popen
12
12
  from urllib.parse import urlparse
13
13
  import socket
14
14
  import atexit
@@ -424,7 +424,7 @@ class OcrdMetsServer:
424
424
  # Wait for the mets server to start
425
425
  sleep(2)
426
426
  if sub_process.poll():
427
- raise RuntimeError(f"Mets server starting failed. See {log_file} for errors")
427
+ raise RuntimeError(f"Starting METS Server failed. See {log_file} for errors")
428
428
  return sub_process.pid
429
429
 
430
430
  @staticmethod
@@ -433,12 +433,12 @@ class OcrdMetsServer:
433
433
  sleep(3)
434
434
  try:
435
435
  os.kill(mets_server_pid, signal.SIGKILL)
436
- except ProcessLookupError as e:
436
+ except ProcessLookupError:
437
437
  pass
438
438
 
439
439
  def shutdown(self):
440
440
  pid = os.getpid()
441
- self.log.info(f"Shutdown method of mets server[{pid}] invoked, sending SIGTERM signal.")
441
+ self.log.info(f"Shutdown method of METS Server[{pid}] invoked, sending SIGTERM signal.")
442
442
  os.kill(pid, signal.SIGTERM)
443
443
  if self.is_uds:
444
444
  if Path(self.url).exists():
@@ -446,7 +446,7 @@ class OcrdMetsServer:
446
446
  Path(self.url).unlink()
447
447
 
448
448
  def startup(self):
449
- self.log.info(f"Configuring the Mets Server")
449
+ self.log.info("Configuring the METS Server")
450
450
 
451
451
  workspace = self.workspace
452
452
 
@@ -516,10 +516,6 @@ class OcrdMetsServer:
516
516
  self.log.debug(f"GET /physical_pages -> {response}")
517
517
  return response
518
518
 
519
- @app.get(path='/physical_pages', response_model=OcrdPageListModel)
520
- async def physical_pages():
521
- return {'physical_pages': workspace.mets.physical_pages}
522
-
523
519
  @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
524
520
  async def file_groups():
525
521
  response = {'file_groups': workspace.mets.file_groups}