ocrd 3.5.1__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. ocrd/cli/__init__.py +6 -2
  2. ocrd/cli/bashlib.py +7 -2
  3. ocrd/cli/log.py +7 -2
  4. ocrd/cli/network.py +0 -2
  5. ocrd/cli/ocrd_tool.py +26 -4
  6. ocrd/cli/process.py +1 -0
  7. ocrd/cli/resmgr.py +0 -1
  8. ocrd/cli/validate.py +32 -13
  9. ocrd/cli/workspace.py +125 -52
  10. ocrd/cli/zip.py +13 -4
  11. ocrd/decorators/__init__.py +28 -52
  12. ocrd/decorators/loglevel_option.py +4 -0
  13. ocrd/decorators/mets_find_options.py +2 -1
  14. ocrd/decorators/ocrd_cli_options.py +3 -7
  15. ocrd/decorators/parameter_option.py +12 -11
  16. ocrd/lib.bash +6 -13
  17. ocrd/mets_server.py +6 -10
  18. ocrd/processor/base.py +88 -71
  19. ocrd/processor/builtin/dummy_processor.py +7 -4
  20. ocrd/processor/builtin/filter_processor.py +3 -2
  21. ocrd/processor/helpers.py +5 -6
  22. ocrd/processor/ocrd_page_result.py +7 -5
  23. ocrd/resolver.py +42 -32
  24. ocrd/task_sequence.py +11 -4
  25. ocrd/workspace.py +64 -54
  26. ocrd/workspace_backup.py +3 -0
  27. ocrd/workspace_bagger.py +15 -8
  28. {ocrd-3.5.1.dist-info → ocrd-3.6.0.dist-info}/METADATA +1 -1
  29. ocrd-3.6.0.dist-info/RECORD +125 -0
  30. ocrd_modelfactory/__init__.py +4 -2
  31. ocrd_models/constants.py +18 -1
  32. ocrd_models/ocrd_agent.py +1 -1
  33. ocrd_models/ocrd_exif.py +7 -3
  34. ocrd_models/ocrd_file.py +24 -19
  35. ocrd_models/ocrd_mets.py +90 -67
  36. ocrd_models/ocrd_page.py +17 -13
  37. ocrd_models/ocrd_xml_base.py +1 -0
  38. ocrd_models/report.py +2 -1
  39. ocrd_models/utils.py +4 -3
  40. ocrd_models/xpath_functions.py +3 -1
  41. ocrd_network/__init__.py +1 -2
  42. ocrd_network/cli/__init__.py +0 -2
  43. ocrd_network/cli/client.py +122 -50
  44. ocrd_network/cli/processing_server.py +1 -2
  45. ocrd_network/client.py +2 -2
  46. ocrd_network/client_utils.py +30 -13
  47. ocrd_network/constants.py +1 -6
  48. ocrd_network/database.py +3 -3
  49. ocrd_network/logging_utils.py +2 -7
  50. ocrd_network/models/__init__.py +0 -2
  51. ocrd_network/models/job.py +2 -5
  52. ocrd_network/models/workspace.py +1 -1
  53. ocrd_network/process_helpers.py +54 -17
  54. ocrd_network/processing_server.py +63 -114
  55. ocrd_network/processing_worker.py +6 -5
  56. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  57. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  58. ocrd_network/runtime_data/__init__.py +1 -2
  59. ocrd_network/runtime_data/deployer.py +12 -85
  60. ocrd_network/runtime_data/hosts.py +61 -130
  61. ocrd_network/runtime_data/network_agents.py +7 -31
  62. ocrd_network/runtime_data/network_services.py +1 -1
  63. ocrd_network/server_cache.py +1 -1
  64. ocrd_network/server_utils.py +13 -52
  65. ocrd_network/utils.py +1 -0
  66. ocrd_utils/__init__.py +4 -4
  67. ocrd_utils/config.py +86 -76
  68. ocrd_utils/deprecate.py +3 -0
  69. ocrd_utils/image.py +51 -23
  70. ocrd_utils/introspect.py +8 -3
  71. ocrd_utils/logging.py +12 -7
  72. ocrd_utils/os.py +16 -3
  73. ocrd_utils/str.py +32 -16
  74. ocrd_validators/json_validator.py +4 -1
  75. ocrd_validators/ocrd_tool_validator.py +2 -1
  76. ocrd_validators/ocrd_zip_validator.py +5 -4
  77. ocrd_validators/page_validator.py +21 -9
  78. ocrd_validators/parameter_validator.py +3 -2
  79. ocrd_validators/processing_server_config.schema.yml +1 -33
  80. ocrd_validators/resource_list_validator.py +3 -1
  81. ocrd_validators/workspace_validator.py +30 -20
  82. ocrd_validators/xsd_mets_validator.py +2 -1
  83. ocrd_validators/xsd_page_validator.py +2 -1
  84. ocrd_validators/xsd_validator.py +4 -2
  85. ocrd-3.5.1.dist-info/RECORD +0 -128
  86. ocrd_network/cli/processor_server.py +0 -31
  87. ocrd_network/models/ocrd_tool.py +0 -12
  88. ocrd_network/processor_server.py +0 -255
  89. {ocrd-3.5.1.dist-info → ocrd-3.6.0.dist-info}/LICENSE +0 -0
  90. {ocrd-3.5.1.dist-info → ocrd-3.6.0.dist-info}/WHEEL +0 -0
  91. {ocrd-3.5.1.dist-info → ocrd-3.6.0.dist-info}/entry_points.txt +0 -0
  92. {ocrd-3.5.1.dist-info → ocrd-3.6.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,36 @@
1
+ import sys
1
2
  import click
2
3
  from json import dumps
3
4
  from typing import List, Optional, Tuple
5
+ from urllib.parse import urlparse
6
+ from tempfile import NamedTemporaryFile
7
+
4
8
  from ocrd.decorators.parameter_option import parameter_option, parameter_override_option
5
9
  from ocrd_network.constants import JobState
6
10
  from ocrd_utils import DEFAULT_METS_BASENAME
7
11
  from ocrd_utils.introspect import set_json_key_value_overrides
8
12
  from ocrd_utils.str import parse_json_string_or_file
9
13
  from ..client import Client
14
+ from requests import RequestException
15
+
16
+
17
+ ADDRESS_HELP = 'The URL of the Processing Server. If not provided, ' + \
18
+ 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" environment variable is used by default'
10
19
 
11
20
 
21
+ class URLType(click.types.StringParamType):
22
+ name = "url"
23
+ def convert(self, value, param, ctx):
24
+ try:
25
+ parsed = urlparse(value)
26
+ if parsed.scheme not in ("http", "https"):
27
+ self.fail(f"invalid URL scheme ({parsed.scheme}): only HTTP allowed",
28
+ param, ctx)
29
+ return value
30
+ except ValueError as err:
31
+ self.fail(err, param, ctx)
32
+ URL = URLType()
33
+
12
34
  @click.group('client')
13
35
  def client_cli():
14
36
  """
@@ -27,30 +49,40 @@ def discovery_cli():
27
49
 
28
50
 
29
51
  @discovery_cli.command('processors')
30
- @click.option('--address',
31
- help='The address of the Processing Server. If not provided, '
32
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
52
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
33
53
  def check_deployed_processors(address: Optional[str]):
34
54
  """
35
- Get a list of deployed processing workers/processor servers.
55
+ Get a list of deployed processing workers.
36
56
  Each processor is shown only once regardless of the amount of deployed instances.
37
57
  """
38
58
  client = Client(server_addr_processing=address)
39
- processors_list = client.check_deployed_processors()
59
+ try:
60
+ processors_list = client.check_deployed_processors()
61
+ except RequestException as e:
62
+ print(
63
+ getattr(e, 'detail_message', str(e)),
64
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
65
+ )
66
+ sys.exit(1)
40
67
  print(dumps(processors_list, indent=4))
41
68
 
42
69
 
43
70
  @discovery_cli.command('processor')
44
- @click.option('--address',
45
- help='The address of the Processing Server. If not provided, '
46
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
71
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
47
72
  @click.argument('processor_name', required=True, type=click.STRING)
48
73
  def check_processor_ocrd_tool(address: Optional[str], processor_name: str):
49
74
  """
50
75
  Get the json tool of a deployed processor specified with `processor_name`
51
76
  """
52
77
  client = Client(server_addr_processing=address)
53
- ocrd_tool = client.check_deployed_processor_ocrd_tool(processor_name=processor_name)
78
+ try:
79
+ ocrd_tool = client.check_deployed_processor_ocrd_tool(processor_name=processor_name)
80
+ except RequestException as e:
81
+ print(
82
+ getattr(e, 'detail_message', str(e)),
83
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
84
+ )
85
+ sys.exit(1)
54
86
  print(dumps(ocrd_tool, indent=4))
55
87
 
56
88
 
@@ -63,39 +95,46 @@ def processing_cli():
63
95
 
64
96
 
65
97
  @processing_cli.command('check-log')
66
- @click.option('--address',
67
- help='The address of the Processing Server. If not provided, '
68
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
98
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
69
99
  @click.option('-j', '--processing-job-id', required=True)
70
- def check_processing_job_status(address: Optional[str], processing_job_id: str):
100
+ def check_processing_job_log(address: Optional[str], processing_job_id: str):
71
101
  """
72
102
  Check the log of a previously submitted processing job.
73
103
  """
74
104
  client = Client(server_addr_processing=address)
75
- response = client.check_job_log(job_id=processing_job_id)
105
+ try:
106
+ response = client.check_job_log(job_id=processing_job_id)
107
+ except RequestException as e:
108
+ print(
109
+ getattr(e, 'detail_message', str(e)),
110
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
111
+ )
112
+ sys.exit(1)
76
113
  print(response._content.decode(encoding='utf-8'))
77
114
 
78
115
 
79
116
  @processing_cli.command('check-status')
80
- @click.option('--address',
81
- help='The address of the Processing Server. If not provided, '
82
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
117
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
83
118
  @click.option('-j', '--processing-job-id', required=True)
84
119
  def check_processing_job_status(address: Optional[str], processing_job_id: str):
85
120
  """
86
121
  Check the status of a previously submitted processing job.
87
122
  """
88
123
  client = Client(server_addr_processing=address)
89
- job_status = client.check_job_status(processing_job_id)
90
- assert job_status
124
+ try:
125
+ job_status = client.check_job_status(processing_job_id)
126
+ except RequestException as e:
127
+ print(
128
+ getattr(e, 'detail_message', str(e)),
129
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
130
+ )
131
+ sys.exit(1)
91
132
  print(f"Processing job status: {job_status}")
92
133
 
93
134
 
94
135
  @processing_cli.command('run')
95
136
  @click.argument('processor_name', required=True, type=click.STRING)
96
- @click.option('--address',
97
- help='The address of the Processing Server. If not provided, '
98
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
137
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
99
138
  @click.option('-m', '--mets', required=True, default=DEFAULT_METS_BASENAME)
100
139
  @click.option('-I', '--input-file-grp', default='OCR-D-INPUT')
101
140
  @click.option('-O', '--output-file-grp', default='OCR-D-OUTPUT')
@@ -104,7 +143,6 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
104
143
  @parameter_override_option
105
144
  @click.option('--result-queue-name')
106
145
  @click.option('--callback-url')
107
- @click.option('--agent-type', default='worker')
108
146
  @click.option('-b', '--block', default=False, is_flag=True,
109
147
  help='If set, the client will block till job timeout, fail or success.')
110
148
  @click.option('-p', '--print-state', default=False, is_flag=True,
@@ -120,9 +158,6 @@ def send_processing_job_request(
120
158
  parameter_override: List[Tuple[str, str]],
121
159
  result_queue_name: Optional[str],
122
160
  callback_url: Optional[str],
123
- # TODO: This is temporally available to toggle
124
- # between the ProcessingWorker/ProcessorServer
125
- agent_type: Optional[str],
126
161
  block: Optional[bool],
127
162
  print_state: Optional[bool]
128
163
  ):
@@ -133,7 +168,6 @@ def send_processing_job_request(
133
168
  "path_to_mets": mets,
134
169
  "description": "OCR-D Network client request",
135
170
  "input_file_grps": input_file_grp.split(','),
136
- "agent_type": agent_type
137
171
  }
138
172
  if output_file_grp:
139
173
  req_params["output_file_grps"] = output_file_grp.split(',')
@@ -145,12 +179,26 @@ def send_processing_job_request(
145
179
  if callback_url:
146
180
  req_params["callback_url"] = callback_url
147
181
  client = Client(server_addr_processing=address)
148
- processing_job_id = client.send_processing_job_request(
149
- processor_name=processor_name, req_params=req_params)
150
- assert processing_job_id
182
+ try:
183
+ processing_job_id = client.send_processing_job_request(
184
+ processor_name=processor_name, req_params=req_params)
185
+ except RequestException as e:
186
+ print(
187
+ getattr(e, 'detail_message', str(e)),
188
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
189
+ )
190
+ sys.exit(1)
151
191
  print(f"Processing job id: {processing_job_id}")
192
+
152
193
  if block:
153
- client.poll_job_status(job_id=processing_job_id, print_state=print_state)
194
+ try:
195
+ client.poll_job_status(job_id=processing_job_id, print_state=print_state)
196
+ except RequestException as e:
197
+ print(
198
+ getattr(e, 'detail_message', str(e)),
199
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
200
+ )
201
+ sys.exit(1)
154
202
 
155
203
 
156
204
  @client_cli.group('workflow')
@@ -162,58 +210,82 @@ def workflow_cli():
162
210
 
163
211
 
164
212
  @workflow_cli.command('check-status')
165
- @click.option('--address', help='The address of the Processing Server. If not provided, '
166
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
213
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
167
214
  @click.option('-j', '--workflow-job-id', required=True)
168
215
  def check_workflow_job_status(address: Optional[str], workflow_job_id: str):
169
216
  """
170
217
  Check the status of a previously submitted workflow job.
171
218
  """
172
219
  client = Client(server_addr_processing=address)
173
- job_status = client.check_workflow_status(workflow_job_id)
174
- assert job_status
220
+ try:
221
+ job_status = client.check_workflow_status(workflow_job_id)
222
+ except RequestException as e:
223
+ print(
224
+ getattr(e, 'detail_message', str(e)),
225
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
226
+ )
227
+ sys.exit(1)
175
228
  print(f"Workflow job status: {job_status}")
176
229
 
177
230
 
178
231
  @workflow_cli.command('run')
179
- @click.option('--address', help='The address of the Processing Server. If not provided, '
180
- 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
181
- @click.option('-m', '--path-to-mets', required=True)
182
- @click.option('-w', '--path-to-workflow', required=True)
183
- @click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
232
+ @click.option('--address', type=URL, help=ADDRESS_HELP)
233
+ @click.option('-m', '--path-to-mets', required=True, help="path to METS file of workspace to be processed (server-side path)")
234
+ @click.option('-w', '--path-to-workflow', required=False, help="path to workflow file (server- or client-side path)")
235
+ @click.option('--page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
184
236
  @click.option('-b', '--block', default=False, is_flag=True,
185
237
  help='If set, the client will block till job timeout, fail or success.')
186
238
  @click.option('-p', '--print-state', default=False, is_flag=True,
187
239
  help='If set, the client will print job states by each iteration.')
240
+ @click.argument('tasks', nargs=-1)
188
241
  def send_workflow_job_request(
189
242
  address: Optional[str],
190
243
  path_to_mets: str,
191
- path_to_workflow: str,
244
+ path_to_workflow: Optional[str],
192
245
  page_wise: bool,
193
246
  block: bool,
194
- print_state: bool
247
+ print_state: bool,
248
+ tasks: List[str]
195
249
  ):
196
250
  """
197
251
  Submit a workflow job to the processing server.
252
+
253
+ Provide workflow either via `tasks` arguments (same syntax
254
+ as in ``ocrd process`` tasks arguments), or via `-w` file path
255
+ (same syntax, but newline separated).
198
256
  """
257
+ if (path_to_workflow) != bool(len(tasks)):
258
+ raise ValueError("either -w/path-to-workflow or task argument(s) is required")
259
+
199
260
  client = Client(server_addr_processing=address)
200
- workflow_job_id = client.send_workflow_job_request(
201
- path_to_wf=path_to_workflow,
202
- path_to_mets=path_to_mets,
203
- page_wise=page_wise,
204
- )
205
- assert workflow_job_id
261
+ with NamedTemporaryFile() as workflow_file:
262
+ for task in tasks:
263
+ workflow_file.write((task + '\n').encode('utf-8'))
264
+ workflow_file.flush()
265
+ workflow_job_id = client.send_workflow_job_request(
266
+ path_to_wf=path_to_workflow or workflow_file.name,
267
+ path_to_mets=path_to_mets,
268
+ page_wise=page_wise,
269
+ )
206
270
  print(f"Workflow job id: {workflow_job_id}")
207
271
  if block:
208
272
  print(f"Polling state of workflow job {workflow_job_id}")
209
- state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state)
273
+ try:
274
+ state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state)
275
+ except RequestException as e:
276
+ print(
277
+ getattr(e, 'detail_message', str(e)),
278
+ f"Requested URL: {getattr(getattr(e, 'response', ''), 'url', '')}"
279
+ )
280
+ sys.exit(1)
210
281
  if state != JobState.success:
211
282
  print(f"Workflow failed with {state}")
212
283
  exit(1)
213
284
  else:
214
- print(f"Workflow succeeded")
285
+ print("Workflow succeeded")
215
286
  exit(0)
216
287
 
288
+
217
289
  @client_cli.group('workspace')
218
290
  def workspace_cli():
219
291
  """
@@ -12,8 +12,7 @@ from ocrd_network import ProcessingServer, ServerAddressParamType
12
12
  def processing_server_cli(path_to_config, address: str):
13
13
  """
14
14
  Start the Processing Server
15
- (proxy between the user and the
16
- Processing Worker(s) / Processor Server(s))
15
+ (proxy between the user and the Processing Worker(s))
17
16
  """
18
17
 
19
18
  # Note, the address is already validated with the type field
ocrd_network/client.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from typing import Optional
2
- from ocrd_utils import config, getLogger, LOG_FORMAT
2
+ from ocrd_utils import config, getLogger
3
3
  from .client_utils import (
4
4
  get_ps_deployed_processors,
5
5
  get_ps_deployed_processor_ocrd_tool,
@@ -21,7 +21,7 @@ class Client:
21
21
  timeout: int = config.OCRD_NETWORK_CLIENT_POLLING_TIMEOUT,
22
22
  wait: int = config.OCRD_NETWORK_CLIENT_POLLING_SLEEP
23
23
  ):
24
- self.log = getLogger(f"ocrd_network.client")
24
+ self.log = getLogger("ocrd_network.client")
25
25
  if not server_addr_processing:
26
26
  server_addr_processing = config.OCRD_NETWORK_SERVER_ADDR_PROCESSING
27
27
  self.server_addr_processing = server_addr_processing
@@ -1,10 +1,13 @@
1
1
  import json
2
- from requests import get as request_get, post as request_post
2
+ import os
3
+ from requests import get as request_get, post as request_post, RequestException, Response
4
+ from requests.exceptions import JSONDecodeError
3
5
  from time import sleep
4
6
  from .constants import JobState, NETWORK_PROTOCOLS
5
7
 
6
8
 
7
- def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int, print_state: bool = False) -> JobState:
9
+ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int,
10
+ print_state: bool = False) -> JobState:
8
11
  if job_type not in ["workflow", "processor"]:
9
12
  raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'")
10
13
  job_state = JobState.unset
@@ -22,6 +25,19 @@ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries
22
25
  return job_state
23
26
 
24
27
 
28
+ def _raise_if_error(response: Response) -> None:
29
+ """Check the requests-response and raise an exception if its status code indicates an error"""
30
+ try:
31
+ response.raise_for_status()
32
+ except RequestException as e:
33
+ try:
34
+ message = response.json()["detail"]
35
+ except JSONDecodeError:
36
+ message = response.text
37
+ e.detail_message = message
38
+ raise e
39
+
40
+
25
41
  def poll_job_status_till_timeout_fail_or_success(
26
42
  ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
27
43
  return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait, print_state)
@@ -35,14 +51,14 @@ def poll_wf_status_till_timeout_fail_or_success(
35
51
  def get_ps_deployed_processors(ps_server_host: str):
36
52
  request_url = f"{ps_server_host}/processor"
37
53
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
38
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
54
+ _raise_if_error(response)
39
55
  return response.json()
40
56
 
41
57
 
42
58
  def get_ps_deployed_processor_ocrd_tool(ps_server_host: str, processor_name: str):
43
59
  request_url = f"{ps_server_host}/processor/info/{processor_name}"
44
60
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
45
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
61
+ _raise_if_error(response)
46
62
  return response.json()
47
63
 
48
64
 
@@ -55,17 +71,18 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str):
55
71
  def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState:
56
72
  request_url = f"{ps_server_host}/processor/job/{processing_job_id}"
57
73
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
58
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
74
+ _raise_if_error(response)
59
75
  job_state = response.json()["state"]
60
- assert job_state
76
+ assert job_state, "Propery 'state' is expected to always have a value"
61
77
  return getattr(JobState, job_state.lower())
62
78
 
79
+
63
80
  def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState:
64
81
  request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}"
65
82
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
66
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
83
+ _raise_if_error(response)
67
84
  job_state = response.json()["state"]
68
- assert job_state
85
+ assert job_state, "Property 'state' is expected to always have a value"
69
86
  return getattr(JobState, job_state.lower())
70
87
 
71
88
 
@@ -76,9 +93,9 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d
76
93
  headers={"accept": "application/json; charset=utf-8"},
77
94
  json=job_input
78
95
  )
79
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
96
+ _raise_if_error(response)
80
97
  processing_job_id = response.json()["job_id"]
81
- assert processing_job_id
98
+ assert processing_job_id, "Property 'job_id' is expected to always have a value"
82
99
  return processing_job_id
83
100
 
84
101
 
@@ -92,16 +109,16 @@ def post_ps_workflow_request(
92
109
  response = request_post(
93
110
  url=request_url,
94
111
  headers={"accept": "application/json; charset=utf-8"},
95
- files={"workflow": open(path_to_wf, "rb")}
112
+ files={"workflow": open(path_to_wf, "rb") if os.path.exists(path_to_wf) else path_to_wf}
96
113
  )
97
114
  # print(response.json())
98
115
  # print(response.__dict__)
99
116
  json_resp_raw = response.text
100
117
  # print(f'post_ps_workflow_request >> {response.status_code}')
101
118
  # print(f'post_ps_workflow_request >> {json_resp_raw}')
102
- assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
119
+ _raise_if_error(response)
103
120
  wf_job_id = json.loads(json_resp_raw)["job_id"]
104
- assert wf_job_id
121
+ assert wf_job_id, "Property 'job_id' is expected to always have a value"
105
122
  return wf_job_id
106
123
 
107
124
 
ocrd_network/constants.py CHANGED
@@ -15,10 +15,6 @@ class StrEnum(str, Enum):
15
15
  def __str__(self):
16
16
  return self.value
17
17
 
18
- class AgentType(StrEnum):
19
- PROCESSING_WORKER = "worker"
20
- PROCESSOR_SERVER = "server"
21
-
22
18
 
23
19
  class DeployType(StrEnum):
24
20
  # Deployed by the Processing Server config file
@@ -39,7 +35,7 @@ class JobState(StrEnum):
39
35
  failed = "FAILED"
40
36
  # The processing job is queued inside the RabbitMQ
41
37
  queued = "QUEUED"
42
- # Processing job is currently running in a Worker or Processor Server
38
+ # Processing job is currently running on a Worker
43
39
  running = "RUNNING"
44
40
  # Processing job finished successfully
45
41
  success = "SUCCESS"
@@ -52,7 +48,6 @@ class NetworkLoggingDirs(StrEnum):
52
48
  PROCESSING_JOBS = "processing_jobs"
53
49
  PROCESSING_SERVERS = "processing_servers"
54
50
  PROCESSING_WORKERS = "processing_workers"
55
- PROCESSOR_SERVERS = "processor_servers"
56
51
 
57
52
 
58
53
  class ServerApiTags(StrEnum):
ocrd_network/database.py CHANGED
@@ -65,7 +65,7 @@ async def sync_db_create_workspace(mets_path: str) -> DBWorkspace:
65
65
  async def db_get_workspace(workspace_id: str = None, workspace_mets_path: str = None) -> DBWorkspace:
66
66
  workspace = None
67
67
  if not workspace_id and not workspace_mets_path:
68
- raise ValueError(f'Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
68
+ raise ValueError('Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
69
69
  if workspace_id:
70
70
  workspace = await DBWorkspace.find_one(
71
71
  DBWorkspace.workspace_id == workspace_id
@@ -89,7 +89,7 @@ async def sync_db_get_workspace(workspace_id: str = None, workspace_mets_path: s
89
89
  async def db_update_workspace(workspace_id: str = None, workspace_mets_path: str = None, **kwargs) -> DBWorkspace:
90
90
  workspace = None
91
91
  if not workspace_id and not workspace_mets_path:
92
- raise ValueError(f'Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
92
+ raise ValueError('Either `workspace_id` or `workspace_mets_path` field must be used as a search key')
93
93
  if workspace_id:
94
94
  workspace = await DBWorkspace.find_one(DBWorkspace.workspace_id == workspace_id)
95
95
  if not workspace:
@@ -274,4 +274,4 @@ def verify_mongodb_available(mongo_url: str) -> None:
274
274
  client = MongoClient(mongo_url, serverSelectionTimeoutMS=60000.0)
275
275
  client.admin.command("ismaster")
276
276
  except Exception:
277
- raise RuntimeError(f'Cannot connect to MongoDB: {re_sub(r":[^@]+@", ":****@", mongo_url)}')
277
+ raise RuntimeError(f'Cannot connect to MongoDB: {re_sub(r":[^@]+@", ":****@", mongo_url)}')
@@ -2,7 +2,7 @@ from logging import FileHandler, Formatter, Logger
2
2
  from pathlib import Path
3
3
 
4
4
  from ocrd_utils import config, LOG_FORMAT, safe_filename
5
- from .constants import AgentType, NetworkLoggingDirs
5
+ from .constants import NetworkLoggingDirs
6
6
 
7
7
 
8
8
  def configure_file_handler_with_formatter(logger: Logger, log_file: Path, mode: str = "a") -> None:
@@ -54,10 +54,5 @@ def get_processing_server_logging_file_path(pid: int) -> Path:
54
54
 
55
55
 
56
56
  def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Path:
57
- log_file: str = f"{AgentType.PROCESSING_WORKER}.{pid}.{processor_name}.log"
57
+ log_file: str = f"worker.{pid}.{processor_name}.log"
58
58
  return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSING_WORKERS), log_file)
59
-
60
-
61
- def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
62
- log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
63
- return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
@@ -10,13 +10,11 @@ __all__ = [
10
10
  'DBWorkflowScript',
11
11
  'PYJobInput',
12
12
  'PYJobOutput',
13
- 'PYOcrdTool',
14
13
  'PYResultMessage',
15
14
  'PYWorkflowJobOutput'
16
15
  ]
17
16
 
18
17
  from .job import DBProcessorJob, DBWorkflowJob, PYJobInput, PYJobOutput, PYWorkflowJobOutput
19
18
  from .messages import PYResultMessage
20
- from .ocrd_tool import PYOcrdTool
21
19
  from .workspace import DBWorkspace
22
20
  from .workflow import DBWorkflowScript
@@ -2,7 +2,7 @@ from beanie import Document
2
2
  from datetime import datetime
3
3
  from pydantic import BaseModel
4
4
  from typing import Dict, List, Optional
5
- from ..constants import AgentType, JobState
5
+ from ..constants import JobState
6
6
 
7
7
 
8
8
  class PYJobInput(BaseModel):
@@ -18,9 +18,7 @@ class PYJobInput(BaseModel):
18
18
  parameters: dict = {} # Always set to empty dict when None, otherwise it fails ocr-d-validation
19
19
  result_queue_name: Optional[str] = None
20
20
  callback_url: Optional[str] = None
21
- # Used to toggle between sending requests to different network agents
22
- agent_type: AgentType = AgentType.PROCESSING_WORKER
23
- # Auto generated by the Processing Server when forwarding to the Processor Server
21
+ # Auto generated by the Processing Server when forwarding to the Processing-Worker
24
22
  job_id: Optional[str] = None
25
23
  # If set, specifies a list of job ids this job depends on
26
24
  depends_on: Optional[List[str]] = None
@@ -32,7 +30,6 @@ class PYJobInput(BaseModel):
32
30
  'description': 'The description of this execution',
33
31
  'input_file_grps': ['DEFAULT'],
34
32
  'output_file_grps': ['OCR-D-BIN'],
35
- 'agent_type': AgentType.PROCESSING_WORKER,
36
33
  'page_id': 'PHYS_0001..PHYS_0003',
37
34
  'parameters': {}
38
35
  }
@@ -17,7 +17,7 @@ class DBWorkspace(Document):
17
17
  key-value-pairs which are saved here
18
18
  deleted the document is deleted if set, however, the record is still preserved
19
19
  pages_locked a data structure that holds output `fileGrp`s and their respective locked `page_id`
20
- that are currently being processed by an OCR-D processor (server or worker).
20
+ that are currently being processed by an OCR-D Processing-Worker.
21
21
  If no `page_id` field is set, an identifier "all_pages" will be used.
22
22
  mets_server_url If set, the reading from and writing to the mets file happens through the METS Server
23
23
  """
@@ -2,6 +2,8 @@ from contextlib import nullcontext
2
2
  from json import dumps
3
3
  from pathlib import Path
4
4
  from typing import List, Optional
5
+ from tempfile import NamedTemporaryFile
6
+ from logging.config import fileConfig
5
7
 
6
8
  from ocrd.processor.helpers import run_cli, run_processor
7
9
  from ocrd_utils import redirect_stderr_and_stdout_to_file, initLogging
@@ -28,23 +30,58 @@ def invoke_processor(
28
30
 
29
31
  workspace = get_ocrd_workspace_instance(mets_path=abs_path_to_mets, mets_server_url=mets_server_url)
30
32
  if processor_class:
31
- ctx_mgr = redirect_stderr_and_stdout_to_file(log_filename) if log_filename else nullcontext()
32
- with ctx_mgr:
33
- initLogging(force_reinit=True)
34
- try:
35
- run_processor(
36
- processorClass=processor_class,
37
- workspace=workspace,
38
- input_file_grp=input_file_grps_str,
39
- output_file_grp=output_file_grps_str,
40
- page_id=page_id,
41
- parameter=parameters,
42
- instance_caching=True,
43
- mets_server_url=mets_server_url,
44
- log_level=log_level
45
- )
46
- except Exception as error:
47
- raise RuntimeError(f"Python executable '{processor_class.__dict__}', error: {error}")
33
+ with NamedTemporaryFile(mode='w') as cfgfile:
34
+ cfgfile.write("""
35
+ [loggers]
36
+ keys=root,ocrd,ocrd_network,tensorflow,shapely_geos
37
+ [handlers]
38
+ keys=fileHandler
39
+ [formatters]
40
+ keys=defaultFormatter
41
+ [logger_root]
42
+ level=WARNING
43
+ handlers=fileHandler
44
+ [logger_ocrd]
45
+ level=INFO
46
+ handlers=
47
+ qualname=ocrd
48
+ [logger_ocrd_network]
49
+ level=INFO
50
+ handlers=
51
+ qualname=ocrd_network
52
+ [logger_tensorflow]
53
+ level=ERROR
54
+ handlers=
55
+ qualname=tensorflow
56
+ [logger_shapely_geos]
57
+ level=ERROR
58
+ handlers=
59
+ qualname=shapely.geos
60
+ [handler_fileHandler]
61
+ class=FileHandler
62
+ formatter=defaultFormatter
63
+ args=('{log_filename}','a+')
64
+ [formatter_defaultFormatter]
65
+ format=%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s
66
+ datefmt=%H:%M:%S
67
+ """.format(log_filename=log_filename))
68
+ cfgfile.flush()
69
+ # deletes all existing handlers
70
+ fileConfig(cfgfile.name)
71
+ try:
72
+ run_processor(
73
+ processorClass=processor_class,
74
+ workspace=workspace,
75
+ input_file_grp=input_file_grps_str,
76
+ output_file_grp=output_file_grps_str,
77
+ page_id=page_id,
78
+ parameter=parameters,
79
+ instance_caching=True,
80
+ mets_server_url=mets_server_url,
81
+ log_level=log_level
82
+ )
83
+ except Exception as error:
84
+ raise RuntimeError(f"Python executable '{processor_class.__dict__}', error: {error}")
48
85
  else:
49
86
  return_code = run_cli(
50
87
  executable=executable,