ocrd 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ from ocrd_utils import xywh_from_points
2
+
3
+ pc_functions = []
4
+
5
+ def _export(func):
6
+ pc_functions.append(func)
7
+ return func
8
+
9
+ @_export
10
+ def pc_pixelarea(nodes):
11
+ """
12
+ Extract Coords/@points from all nodes, calculate the bounding
13
+ box, and accumulate areas.
14
+ """
15
+ area = 0
16
+ for node in nodes:
17
+ # FIXME: find out why we need to go to the parent here
18
+ node = node.parent.value
19
+ coords = node.find(f'{node.prefix}:Coords', node.nsmap)
20
+ if coords is None:
21
+ continue
22
+ points = coords.attrib['points']
23
+ xywh = xywh_from_points(points)
24
+ area += xywh['w'] * xywh['h']
25
+ return area
26
+
27
+ @_export
28
+ def pc_textequiv(nodes):
29
+ """
30
+ Extract TextEquiv/Unicode from all nodes, then concatenate
31
+ (interspersed with spaces or newlines).
32
+ """
33
+ text = ''
34
+ for node in nodes:
35
+ # FIXME: find out why we need to go to the parent here
36
+ node = node.parent.value
37
+ if text and node.tag.endswith('Region'):
38
+ text += '\n'
39
+ if text and node.tag.endswith('Line'):
40
+ text += '\n'
41
+ if text and node.tag.endswith('Word'):
42
+ text += ' '
43
+ equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap)
44
+ if equiv is None:
45
+ continue
46
+ string = equiv.find(f'{node.prefix}:Unicode', node.nsmap)
47
+ if string is None:
48
+ continue
49
+ text += str(string.text)
50
+ return text
51
+
@@ -2,6 +2,7 @@ import click
2
2
  from json import dumps
3
3
  from typing import List, Optional, Tuple
4
4
  from ocrd.decorators.parameter_option import parameter_option, parameter_override_option
5
+ from ocrd_network.constants import JobState
5
6
  from ocrd_utils import DEFAULT_METS_BASENAME
6
7
  from ocrd_utils.introspect import set_json_key_value_overrides
7
8
  from ocrd_utils.str import parse_json_string_or_file
@@ -104,8 +105,10 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
104
105
  @click.option('--result-queue-name')
105
106
  @click.option('--callback-url')
106
107
  @click.option('--agent-type', default='worker')
107
- @click.option('-b', '--block', default=False,
108
+ @click.option('-b', '--block', default=False, is_flag=True,
108
109
  help='If set, the client will block till job timeout, fail or success.')
110
+ @click.option('-p', '--print-state', default=False, is_flag=True,
111
+ help='If set, the client will print job states by each iteration.')
109
112
  def send_processing_job_request(
110
113
  address: Optional[str],
111
114
  processor_name: str,
@@ -120,7 +123,8 @@ def send_processing_job_request(
120
123
  # TODO: This is temporally available to toggle
121
124
  # between the ProcessingWorker/ProcessorServer
122
125
  agent_type: Optional[str],
123
- block: Optional[bool]
126
+ block: Optional[bool],
127
+ print_state: Optional[bool]
124
128
  ):
125
129
  """
126
130
  Submit a processing job to the processing server.
@@ -146,7 +150,7 @@ def send_processing_job_request(
146
150
  assert processing_job_id
147
151
  print(f"Processing job id: {processing_job_id}")
148
152
  if block:
149
- client.poll_job_status(job_id=processing_job_id)
153
+ client.poll_job_status(job_id=processing_job_id, print_state=print_state)
150
154
 
151
155
 
152
156
  @client_cli.group('workflow')
@@ -176,24 +180,39 @@ def check_workflow_job_status(address: Optional[str], workflow_job_id: str):
176
180
  'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
177
181
  @click.option('-m', '--path-to-mets', required=True)
178
182
  @click.option('-w', '--path-to-workflow', required=True)
179
- @click.option('-b', '--block', default=False,
183
+ @click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
184
+ @click.option('-b', '--block', default=False, is_flag=True,
180
185
  help='If set, the client will block till job timeout, fail or success.')
186
+ @click.option('-p', '--print-state', default=False, is_flag=True,
187
+ help='If set, the client will print job states by each iteration.')
181
188
  def send_workflow_job_request(
182
189
  address: Optional[str],
183
190
  path_to_mets: str,
184
191
  path_to_workflow: str,
185
- block: Optional[bool]
192
+ page_wise: bool,
193
+ block: bool,
194
+ print_state: bool
186
195
  ):
187
196
  """
188
197
  Submit a workflow job to the processing server.
189
198
  """
190
199
  client = Client(server_addr_processing=address)
191
- workflow_job_id = client.send_workflow_job_request(path_to_wf=path_to_workflow, path_to_mets=path_to_mets)
200
+ workflow_job_id = client.send_workflow_job_request(
201
+ path_to_wf=path_to_workflow,
202
+ path_to_mets=path_to_mets,
203
+ page_wise=page_wise,
204
+ )
192
205
  assert workflow_job_id
193
206
  print(f"Workflow job id: {workflow_job_id}")
194
207
  if block:
195
- client.poll_workflow_status(job_id=workflow_job_id)
196
-
208
+ print(f"Polling state of workflow job {workflow_job_id}")
209
+ state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state)
210
+ if state != JobState.success:
211
+ print(f"Workflow failed with {state}")
212
+ exit(1)
213
+ else:
214
+ print(f"Workflow succeeded")
215
+ exit(0)
197
216
 
198
217
  @client_cli.group('workspace')
199
218
  def workspace_cli():
ocrd_network/client.py CHANGED
@@ -46,18 +46,21 @@ class Client:
46
46
  def check_workflow_status(self, workflow_job_id: str):
47
47
  return get_ps_workflow_job_status(self.server_addr_processing, workflow_job_id=workflow_job_id)
48
48
 
49
- def poll_job_status(self, job_id: str) -> str:
49
+ def poll_job_status(self, job_id: str, print_state: bool = False) -> str:
50
50
  return poll_job_status_till_timeout_fail_or_success(
51
- ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait)
51
+ ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait,
52
+ print_state=print_state)
52
53
 
53
- def poll_workflow_status(self, job_id: str) -> str:
54
+ def poll_workflow_status(self, job_id: str, print_state: bool = False) -> str:
54
55
  return poll_wf_status_till_timeout_fail_or_success(
55
- ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait)
56
+ ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait,
57
+ print_state=print_state)
56
58
 
57
59
  def send_processing_job_request(self, processor_name: str, req_params: dict) -> str:
58
60
  return post_ps_processing_request(
59
61
  ps_server_host=self.server_addr_processing, processor=processor_name, job_input=req_params)
60
62
 
61
- def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str):
63
+ def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str, page_wise: bool = False):
62
64
  return post_ps_workflow_request(
63
- ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets)
65
+ ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets,
66
+ page_wise=page_wise)
@@ -1,9 +1,10 @@
1
+ import json
1
2
  from requests import get as request_get, post as request_post
2
3
  from time import sleep
3
4
  from .constants import JobState, NETWORK_PROTOCOLS
4
5
 
5
6
 
6
- def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int):
7
+ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int, print_state: bool = False) -> JobState:
7
8
  if job_type not in ["workflow", "processor"]:
8
9
  raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'")
9
10
  job_state = JobState.unset
@@ -13,18 +14,22 @@ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries
13
14
  job_state = get_ps_processing_job_status(ps_server_host, job_id)
14
15
  if job_type == "workflow":
15
16
  job_state = get_ps_workflow_job_status(ps_server_host, job_id)
17
+ if print_state:
18
+ print(f"State of the {job_type} job {job_id}: {job_state}")
16
19
  if job_state == JobState.success or job_state == JobState.failed:
17
20
  break
18
21
  tries -= 1
19
22
  return job_state
20
23
 
21
24
 
22
- def poll_job_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState:
23
- return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait)
25
+ def poll_job_status_till_timeout_fail_or_success(
26
+ ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
27
+ return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait, print_state)
24
28
 
25
29
 
26
- def poll_wf_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState:
27
- return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait)
30
+ def poll_wf_status_till_timeout_fail_or_success(
31
+ ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
32
+ return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait, print_state)
28
33
 
29
34
 
30
35
  def get_ps_deployed_processors(ps_server_host: str):
@@ -47,22 +52,21 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str):
47
52
  return response
48
53
 
49
54
 
50
- def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> str:
55
+ def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState:
51
56
  request_url = f"{ps_server_host}/processor/job/{processing_job_id}"
52
57
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
53
58
  assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
54
59
  job_state = response.json()["state"]
55
60
  assert job_state
56
- return job_state
57
-
61
+ return getattr(JobState, job_state.lower())
58
62
 
59
- def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> str:
63
+ def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState:
60
64
  request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}"
61
65
  response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
62
66
  assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
63
67
  job_state = response.json()["state"]
64
68
  assert job_state
65
- return job_state
69
+ return getattr(JobState, job_state.lower())
66
70
 
67
71
 
68
72
  def post_ps_processing_request(ps_server_host: str, processor: str, job_input: dict) -> str:
@@ -78,9 +82,13 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d
78
82
  return processing_job_id
79
83
 
80
84
 
81
- # TODO: Can be extended to include other parameters such as page_wise
82
- def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: str) -> str:
83
- request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise=True"
85
+ def post_ps_workflow_request(
86
+ ps_server_host: str,
87
+ path_to_wf: str,
88
+ path_to_mets: str,
89
+ page_wise: bool = False,
90
+ ) -> str:
91
+ request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise={'True' if page_wise else 'False'}"
84
92
  response = request_post(
85
93
  url=request_url,
86
94
  headers={"accept": "application/json; charset=utf-8"},
@@ -88,8 +96,11 @@ def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets:
88
96
  )
89
97
  # print(response.json())
90
98
  # print(response.__dict__)
99
+ json_resp_raw = response.text
100
+ # print(f'post_ps_workflow_request >> {response.status_code}')
101
+ # print(f'post_ps_workflow_request >> {json_resp_raw}')
91
102
  assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
92
- wf_job_id = response.json()["job_id"]
103
+ wf_job_id = json.loads(json_resp_raw)["job_id"]
93
104
  assert wf_job_id
94
105
  return wf_job_id
95
106
 
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from os import getpid
3
3
  from pathlib import Path
4
- from typing import Dict, List, Union
4
+ from typing import Dict, List, Optional, Union
5
5
  from uvicorn import run as uvicorn_run
6
6
 
7
7
  from fastapi import APIRouter, FastAPI, File, HTTPException, Request, status, UploadFile
@@ -48,6 +48,7 @@ from .server_utils import (
48
48
  get_workflow_content,
49
49
  get_from_database_workspace,
50
50
  get_from_database_workflow_job,
51
+ kill_mets_server_zombies,
51
52
  parse_workflow_tasks,
52
53
  raise_http_exception,
53
54
  request_processor_server_tool_json,
@@ -78,7 +79,6 @@ class ProcessingServer(FastAPI):
78
79
  """
79
80
 
80
81
  def __init__(self, config_path: str, host: str, port: int) -> None:
81
- initLogging()
82
82
  self.title = "OCR-D Processing Server"
83
83
  super().__init__(
84
84
  title=self.title,
@@ -86,6 +86,7 @@ class ProcessingServer(FastAPI):
86
86
  on_shutdown=[self.on_shutdown],
87
87
  description="OCR-D Processing Server"
88
88
  )
89
+ initLogging()
89
90
  self.log = getLogger("ocrd_network.processing_server")
90
91
  log_file = get_processing_server_logging_file_path(pid=getpid())
91
92
  configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
@@ -155,7 +156,7 @@ class ProcessingServer(FastAPI):
155
156
  queue_names = self.deployer.find_matching_network_agents(
156
157
  worker_only=True, str_names_only=True, unique_only=True
157
158
  )
158
- self.log.debug(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}")
159
+ self.log.info(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}")
159
160
  create_message_queues(logger=self.log, rmq_publisher=self.rmq_publisher, queue_names=queue_names)
160
161
 
161
162
  self.deployer.deploy_network_agents(mongodb_url=self.mongodb_url, rabbitmq_url=self.rabbitmq_url)
@@ -167,6 +168,7 @@ class ProcessingServer(FastAPI):
167
168
  uvicorn_run(self, host=self.hostname, port=int(self.port))
168
169
 
169
170
  async def on_startup(self):
171
+ self.log.info(f"Initializing the Database on: {self.mongodb_url}")
170
172
  await initiate_database(db_url=self.mongodb_url)
171
173
 
172
174
  async def on_shutdown(self) -> None:
@@ -200,6 +202,14 @@ class ProcessingServer(FastAPI):
200
202
  tags=[ServerApiTags.WORKSPACE],
201
203
  summary="Forward a TCP request to UDS mets server"
202
204
  )
205
+ others_router.add_api_route(
206
+ path="/kill_mets_server_zombies",
207
+ endpoint=self.kill_mets_server_zombies,
208
+ methods=["DELETE"],
209
+ tags=[ServerApiTags.WORKFLOW, ServerApiTags.PROCESSING],
210
+ status_code=status.HTTP_200_OK,
211
+ summary="!! Workaround Do Not Use Unless You Have A Reason !! Kill all METS servers on this machine that have been created more than 60 minutes ago."
212
+ )
203
213
  self.include_router(others_router)
204
214
 
205
215
  def add_api_routes_processing(self):
@@ -320,7 +330,7 @@ class ProcessingServer(FastAPI):
320
330
  """Forward mets-server-request
321
331
 
322
332
  A processor calls a mets related method like add_file with ClientSideOcrdMets. This sends
323
- a request to this endpoint. This request contains all infomation neccessary to make a call
333
+ a request to this endpoint. This request contains all information necessary to make a call
324
334
  to the uds-mets-server. This information is used by `MetsServerProxy` to make a the call
325
335
  to the local (local for the processing-server) reachable the uds-mets-server.
326
336
  """
@@ -574,26 +584,20 @@ class ProcessingServer(FastAPI):
574
584
  )
575
585
 
576
586
  async def _consume_cached_jobs_of_workspace(
577
- self, workspace_key: str, mets_server_url: str
587
+ self, workspace_key: str, mets_server_url: str, path_to_mets: str
578
588
  ) -> List[PYJobInput]:
579
-
580
- # Check whether the internal queue for the workspace key still exists
581
- if workspace_key not in self.cache_processing_requests.processing_requests:
582
- self.log.debug(f"No internal queue available for workspace with key: {workspace_key}")
583
- return []
584
-
585
589
  # decrease the internal cache counter by 1
586
590
  request_counter = self.cache_processing_requests.update_request_counter(
587
591
  workspace_key=workspace_key, by_value=-1
588
592
  )
589
593
  self.log.debug(f"Internal processing job cache counter value: {request_counter}")
590
- if not len(self.cache_processing_requests.processing_requests[workspace_key]):
594
+ if (workspace_key not in self.cache_processing_requests.processing_requests or
595
+ not len(self.cache_processing_requests.processing_requests[workspace_key])):
591
596
  if request_counter <= 0:
592
597
  # Shut down the Mets Server for the workspace_key since no
593
598
  # more internal callbacks are expected for that workspace
594
599
  self.log.debug(f"Stopping the mets server: {mets_server_url}")
595
-
596
- self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url)
600
+ self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url, path_to_mets=path_to_mets)
597
601
 
598
602
  try:
599
603
  # The queue is empty - delete it
@@ -609,6 +613,10 @@ class ProcessingServer(FastAPI):
609
613
  else:
610
614
  self.log.debug(f"Internal request cache is empty but waiting for {request_counter} result callbacks.")
611
615
  return []
616
+ # Check whether the internal queue for the workspace key still exists
617
+ if workspace_key not in self.cache_processing_requests.processing_requests:
618
+ self.log.debug(f"No internal queue available for workspace with key: {workspace_key}")
619
+ return []
612
620
  consumed_requests = await self.cache_processing_requests.consume_cached_requests(workspace_key=workspace_key)
613
621
  return consumed_requests
614
622
 
@@ -643,7 +651,7 @@ class ProcessingServer(FastAPI):
643
651
  raise_http_exception(self.log, status.HTTP_404_NOT_FOUND, message, error)
644
652
 
645
653
  consumed_cached_jobs = await self._consume_cached_jobs_of_workspace(
646
- workspace_key=workspace_key, mets_server_url=mets_server_url
654
+ workspace_key=workspace_key, mets_server_url=mets_server_url, path_to_mets=path_to_mets
647
655
  )
648
656
  await self.push_cached_jobs_to_agents(processing_jobs=consumed_cached_jobs)
649
657
 
@@ -817,6 +825,10 @@ class ProcessingServer(FastAPI):
817
825
  response = self._produce_workflow_status_response(processing_jobs=jobs)
818
826
  return response
819
827
 
828
+ async def kill_mets_server_zombies(self, minutes_ago : Optional[int] = None, dry_run : Optional[bool] = None) -> List[int]:
829
+ pids_killed = kill_mets_server_zombies(minutes_ago=minutes_ago, dry_run=dry_run)
830
+ return pids_killed
831
+
820
832
  async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, JobState]:
821
833
  """
822
834
  Simplified version of the `get_workflow_info` that returns a single state for the entire workflow.
@@ -9,12 +9,12 @@ is a single OCR-D Processor instance.
9
9
  """
10
10
 
11
11
  from datetime import datetime
12
- from os import getpid
12
+ from os import getpid, getppid
13
13
  from pika import BasicProperties
14
14
  from pika.adapters.blocking_connection import BlockingChannel
15
15
  from pika.spec import Basic
16
16
 
17
- from ocrd_utils import getLogger
17
+ from ocrd_utils import getLogger, initLogging
18
18
  from .constants import JobState
19
19
  from .database import sync_initiate_database, sync_db_get_workspace, sync_db_update_processing_job, verify_database_uri
20
20
  from .logging_utils import (
@@ -35,14 +35,16 @@ from .utils import calculate_execution_time, post_to_callback_url
35
35
 
36
36
  class ProcessingWorker:
37
37
  def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, processor_class=None) -> None:
38
+ initLogging()
38
39
  self.log = getLogger(f'ocrd_network.processing_worker')
39
40
  log_file = get_processing_worker_logging_file_path(processor_name=processor_name, pid=getpid())
40
41
  configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
41
42
 
42
43
  try:
43
44
  verify_database_uri(mongodb_addr)
44
- self.log.debug(f'Verified MongoDB URL: {mongodb_addr}')
45
+ self.log.info(f'Verified MongoDB URL: {mongodb_addr}')
45
46
  self.rmq_data = verify_and_parse_mq_uri(rabbitmq_addr)
47
+ self.log.info(f'Verified RabbitMQ URL: {rabbitmq_addr}')
46
48
  except ValueError as error:
47
49
  msg = f"Failed to parse data, error: {error}"
48
50
  self.log.exception(msg)
@@ -61,6 +63,7 @@ class ProcessingWorker:
61
63
  # Gets assigned when the `connect_publisher` is called on the worker object
62
64
  # Used to publish OcrdResultMessage type message to the queue with name {processor_name}-result
63
65
  self.rmq_publisher = None
66
+ self.log.info(f"Initialized processing worker: {processor_name}")
64
67
 
65
68
  def connect_consumer(self):
66
69
  self.rmq_consumer = connect_rabbitmq_consumer(self.log, self.rmq_data)
@@ -240,7 +243,7 @@ class ProcessingWorker:
240
243
  # post the result message (callback to a user defined endpoint)
241
244
  post_to_callback_url(self.log, callback_url, result_message)
242
245
  if internal_callback_url:
243
- self.log.info(f"Publishing result to internal callback url (Processing Server): {callback_url}")
246
+ self.log.info(f"Publishing result to internal callback url (Processing Server): {internal_callback_url}")
244
247
  # If the internal callback_url field is set,
245
248
  # post the result message (callback to Processing Server endpoint)
246
249
  post_to_callback_url(self.log, internal_callback_url, result_message)
@@ -42,13 +42,13 @@ class ProcessorServer(FastAPI):
42
42
  def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
43
43
  if not (processor_name or processor_class):
44
44
  raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
45
- initLogging()
46
45
  super().__init__(
47
46
  on_startup=[self.on_startup],
48
47
  on_shutdown=[self.on_shutdown],
49
48
  title=f"Network agent - Processor Server",
50
49
  description="Network agent - Processor Server"
51
50
  )
51
+ initLogging()
52
52
  self.log = getLogger("ocrd_network.processor_server")
53
53
  log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
54
54
  configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
@@ -69,6 +69,7 @@ class ProcessorServer(FastAPI):
69
69
  self.processor_name = self.ocrd_tool["executable"]
70
70
 
71
71
  self.add_api_routes_processing()
72
+ self.log.info(f"Initialized processor server: {processor_name}")
72
73
 
73
74
  async def on_startup(self):
74
75
  await initiate_database(db_url=self.db_url)
@@ -6,6 +6,7 @@ RabbitMQ documentation.
6
6
  from typing import Any, Optional, Union
7
7
  from pika import BasicProperties, BlockingConnection, ConnectionParameters, PlainCredentials
8
8
  from pika.adapters.blocking_connection import BlockingChannel
9
+ from ocrd_utils import config
9
10
  from .constants import (
10
11
  DEFAULT_EXCHANGER_NAME,
11
12
  DEFAULT_EXCHANGER_TYPE,
@@ -69,8 +70,7 @@ class RMQConnector:
69
70
  port=port,
70
71
  virtual_host=vhost,
71
72
  credentials=credentials,
72
- # TODO: The heartbeat should not be disabled (0)!
73
- heartbeat=0
73
+ heartbeat=config.OCRD_NETWORK_RABBITMQ_HEARTBEAT
74
74
  ),
75
75
  )
76
76
  return blocking_connection
@@ -8,7 +8,7 @@ Each Processing Worker is an instance of an OCR-D processor.
8
8
  """
9
9
  from __future__ import annotations
10
10
  from pathlib import Path
11
- from subprocess import Popen, run as subprocess_run
11
+ import psutil
12
12
  from time import sleep
13
13
  from typing import Dict, List, Union
14
14
 
@@ -30,6 +30,8 @@ class Deployer:
30
30
  self.data_hosts: List[DataHost] = parse_hosts_data(ps_config["hosts"])
31
31
  self.internal_callback_url = ps_config.get("internal_callback_url", None)
32
32
  self.mets_servers: Dict = {} # {"mets_server_url": "mets_server_pid"}
33
+ # This is required to store UDS urls that are multiplexed through the TCP proxy and are not preserved anywhere
34
+ self.mets_servers_paths: Dict = {} # {"ws_dir_path": "mets_server_url"}
33
35
  self.use_tcp_mets = ps_config.get("use_tcp_mets", False)
34
36
 
35
37
  # TODO: Reconsider this.
@@ -146,25 +148,33 @@ class Deployer:
146
148
  if is_mets_server_running(mets_server_url=str(mets_server_url)):
147
149
  self.log.debug(f"The UDS mets server for {ws_dir_path} is already started: {mets_server_url}")
148
150
  return mets_server_url
151
+ elif Path(mets_server_url).is_socket():
152
+ self.log.warning(
153
+ f"The UDS mets server for {ws_dir_path} is not running but the socket file exists: {mets_server_url}."
154
+ "Removing to avoid any weird behavior before starting the server.")
155
+ Path(mets_server_url).unlink()
149
156
  self.log.info(f"Starting UDS mets server: {mets_server_url}")
150
- pid = OcrdMetsServer.create_process(mets_server_url=mets_server_url, ws_dir_path=ws_dir_path, log_file=log_file)
151
- self.mets_servers[mets_server_url] = pid
157
+ pid = OcrdMetsServer.create_process(mets_server_url=str(mets_server_url), ws_dir_path=str(ws_dir_path), log_file=str(log_file))
158
+ self.mets_servers[str(mets_server_url)] = pid
159
+ self.mets_servers_paths[str(ws_dir_path)] = str(mets_server_url)
152
160
  return mets_server_url
153
161
 
154
- def stop_uds_mets_server(self, mets_server_url: str, stop_with_pid: bool = False) -> None:
162
+ def stop_uds_mets_server(self, mets_server_url: str, path_to_mets: str) -> None:
155
163
  self.log.info(f"Stopping UDS mets server: {mets_server_url}")
156
- if stop_with_pid:
157
- if Path(mets_server_url) not in self.mets_servers:
158
- message = f"UDS Mets server not found at URL: {mets_server_url}"
159
- self.log.exception(message)
160
- raise Exception(message)
161
- mets_server_pid = self.mets_servers[Path(mets_server_url)]
162
- OcrdMetsServer.kill_process(mets_server_pid=mets_server_pid)
163
- return
164
- # TODO: Reconsider this again
165
- # Not having this sleep here causes connection errors
166
- # on the last request processed by the processing worker.
167
- # Sometimes 3 seconds is enough, sometimes not.
168
- sleep(5)
169
- stop_mets_server(mets_server_url=mets_server_url)
164
+ self.log.info(f"Path to the mets file: {path_to_mets}")
165
+ self.log.debug(f"mets_server: {self.mets_servers}")
166
+ self.log.debug(f"mets_server_paths: {self.mets_servers_paths}")
167
+ workspace_path = str(Path(path_to_mets).parent)
168
+ mets_server_url_uds = self.mets_servers_paths[workspace_path]
169
+ mets_server_pid = self.mets_servers[mets_server_url_uds]
170
+ self.log.info(f"Terminating mets server with pid: {mets_server_pid}")
171
+ p = psutil.Process(mets_server_pid)
172
+ stop_mets_server(self.log, mets_server_url=mets_server_url, ws_dir_path=workspace_path)
173
+ if p.is_running():
174
+ p.wait()
175
+ self.log.info(f"Terminated mets server with pid: {mets_server_pid}")
176
+ else:
177
+ self.log.info(f"Mets server with pid: {mets_server_pid} has already terminated.")
178
+ del self.mets_servers_paths[workspace_path]
179
+ del self.mets_servers[mets_server_url_uds]
170
180
  return