PyPI - ocrd - Versions diffs - 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

ocrd 3.0.0b7py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

ocrd/cli/__init__.py +3 -1
ocrd/decorators/__init__.py +3 -2
ocrd/mets_server.py +62 -42
ocrd/processor/base.py +7 -6
ocrd/processor/builtin/dummy/ocrd-tool.json +20 -0
ocrd/processor/builtin/dummy_processor.py +0 -3
ocrd/processor/builtin/filter_processor.py +108 -0
ocrd/resource_manager.py +4 -0
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/METADATA +2 -1
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/RECORD +32 -31
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/entry_points.txt +1 -0
ocrd_modelfactory/__init__.py +7 -1
ocrd_models/ocrd_exif.py +2 -2
ocrd_models/ocrd_page.py +22 -3
ocrd_models/ocrd_page_generateds.py +2813 -1438
ocrd_models/xpath_functions.py +51 -0
ocrd_network/cli/client.py +27 -8
ocrd_network/client.py +9 -6
ocrd_network/client_utils.py +25 -14
ocrd_network/processing_server.py +27 -15
ocrd_network/processing_worker.py +7 -4
ocrd_network/processor_server.py +2 -1
ocrd_network/rabbitmq_utils/connector.py +2 -2
ocrd_network/runtime_data/deployer.py +28 -18
ocrd_network/server_cache.py +26 -23
ocrd_network/server_utils.py +40 -4
ocrd_network/tcp_to_uds_mets_proxy.py +8 -5
ocrd_network/utils.py +19 -15
ocrd_utils/config.py +38 -16
ocrd/processor/concurrent.py +0 -909
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/LICENSE +0 -0
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/WHEEL +0 -0
{ocrd-3.0.0b7.dist-info → ocrd-3.0.1.dist-info}/top_level.txt +0 -0

ocrd_models/xpath_functions.py ADDED Viewed

@@ -0,0 +1,51 @@
+from ocrd_utils import xywh_from_points
+pc_functions = []
+def _export(func):
+    pc_functions.append(func)
+    return func
+@_export
+def pc_pixelarea(nodes):
+    """
+    Extract Coords/@points from all nodes, calculate the bounding
+    box, and accumulate areas.
+    """
+    area = 0
+    for node in nodes:
+        # FIXME: find out why we need to go to the parent here
+        node = node.parent.value
+        coords = node.find(f'{node.prefix}:Coords', node.nsmap)
+        if coords is None:
+            continue
+        points = coords.attrib['points']
+        xywh = xywh_from_points(points)
+        area += xywh['w'] * xywh['h']
+    return area
+@_export
+def pc_textequiv(nodes):
+    """
+    Extract TextEquiv/Unicode from all nodes, then concatenate
+    (interspersed with spaces or newlines).
+    """
+    text = ''
+    for node in nodes:
+        # FIXME: find out why we need to go to the parent here
+        node = node.parent.value
+        if text and node.tag.endswith('Region'):
+            text += '\n'
+        if text and node.tag.endswith('Line'):
+            text += '\n'
+        if text and node.tag.endswith('Word'):
+            text += ' '
+        equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap)
+        if equiv is None:
+            continue
+        string = equiv.find(f'{node.prefix}:Unicode', node.nsmap)
+        if string is None:
+            continue
+        text += str(string.text)
+    return text

ocrd_network/cli/client.py CHANGED Viewed

@@ -2,6 +2,7 @@ import click
 from json import dumps
 from typing import List, Optional, Tuple
 from ocrd.decorators.parameter_option import parameter_option, parameter_override_option
+from ocrd_network.constants import JobState
 from ocrd_utils import DEFAULT_METS_BASENAME
 from ocrd_utils.introspect import set_json_key_value_overrides
 from ocrd_utils.str import parse_json_string_or_file
@@ -104,8 +105,10 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
 @click.option('--result-queue-name')
 @click.option('--callback-url')
 @click.option('--agent-type', default='worker')
-@click.option('-b', '--block', default=False,
+@click.option('-b', '--block', default=False, is_flag=True,
               help='If set, the client will block till job timeout, fail or success.')
+@click.option('-p', '--print-state', default=False, is_flag=True,
+              help='If set, the client will print job states by each iteration.')
 def send_processing_job_request(
     address: Optional[str],
     processor_name: str,
@@ -120,7 +123,8 @@ def send_processing_job_request(
     # TODO: This is temporally available to toggle
     #  between the ProcessingWorker/ProcessorServer
     agent_type: Optional[str],
-    block: Optional[bool]
+    block: Optional[bool],
+    print_state: Optional[bool]
 ):
     """
     Submit a processing job to the processing server.
@@ -146,7 +150,7 @@ def send_processing_job_request(
     assert processing_job_id
     print(f"Processing job id: {processing_job_id}")
     if block:
-        client.poll_job_status(job_id=processing_job_id)
+        client.poll_job_status(job_id=processing_job_id, print_state=print_state)
 @client_cli.group('workflow')
@@ -176,24 +180,39 @@ def check_workflow_job_status(address: Optional[str], workflow_job_id: str):
                                 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default')
 @click.option('-m', '--path-to-mets', required=True)
 @click.option('-w', '--path-to-workflow', required=True)
-@click.option('-b', '--block', default=False,
+@click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs")
+@click.option('-b', '--block', default=False, is_flag=True,
               help='If set, the client will block till job timeout, fail or success.')
+@click.option('-p', '--print-state', default=False, is_flag=True,
+              help='If set, the client will print job states by each iteration.')
 def send_workflow_job_request(
     address: Optional[str],
     path_to_mets: str,
     path_to_workflow: str,
-    block: Optional[bool]
+    page_wise: bool,
+    block: bool,
+    print_state: bool
 ):
     """
     Submit a workflow job to the processing server.
     """
     client = Client(server_addr_processing=address)
-    workflow_job_id = client.send_workflow_job_request(path_to_wf=path_to_workflow, path_to_mets=path_to_mets)
+    workflow_job_id = client.send_workflow_job_request(
+        path_to_wf=path_to_workflow,
+        path_to_mets=path_to_mets,
+        page_wise=page_wise,
+    )
     assert workflow_job_id
     print(f"Workflow job id: {workflow_job_id}")
     if block:
-        client.poll_workflow_status(job_id=workflow_job_id)
+        print(f"Polling state of workflow job {workflow_job_id}")
+        state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state)
+        if state != JobState.success:
+            print(f"Workflow failed with {state}")
+            exit(1)
+        else:
+            print(f"Workflow succeeded")
+            exit(0)
 @client_cli.group('workspace')
 def workspace_cli():

ocrd_network/client.py CHANGED Viewed

@@ -46,18 +46,21 @@ class Client:
     def check_workflow_status(self, workflow_job_id: str):
         return get_ps_workflow_job_status(self.server_addr_processing, workflow_job_id=workflow_job_id)
-    def poll_job_status(self, job_id: str) -> str:
+    def poll_job_status(self, job_id: str, print_state: bool = False) -> str:
         return poll_job_status_till_timeout_fail_or_success(
-            ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait)
+            ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait,
+            print_state=print_state)
-    def poll_workflow_status(self, job_id: str) -> str:
+    def poll_workflow_status(self, job_id: str, print_state: bool = False) -> str:
         return poll_wf_status_till_timeout_fail_or_success(
-            ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait)
+            ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait,
+            print_state=print_state)
     def send_processing_job_request(self, processor_name: str, req_params: dict) -> str:
         return post_ps_processing_request(
             ps_server_host=self.server_addr_processing, processor=processor_name, job_input=req_params)
-    def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str):
+    def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str, page_wise: bool = False):
         return post_ps_workflow_request(
-            ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets)
+            ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets,
+            page_wise=page_wise)

ocrd_network/client_utils.py CHANGED Viewed

@@ -1,9 +1,10 @@
+import json
 from requests import get as request_get, post as request_post
 from time import sleep
 from .constants import JobState, NETWORK_PROTOCOLS
-def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int):
+def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int, print_state: bool = False) -> JobState:
     if job_type not in ["workflow", "processor"]:
         raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'")
     job_state = JobState.unset
@@ -13,18 +14,22 @@ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries
             job_state = get_ps_processing_job_status(ps_server_host, job_id)
         if job_type == "workflow":
             job_state = get_ps_workflow_job_status(ps_server_host, job_id)
+        if print_state:
+            print(f"State of the {job_type} job {job_id}: {job_state}")
         if job_state == JobState.success or job_state == JobState.failed:
             break
         tries -= 1
     return job_state
-def poll_job_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState:
-    return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait)
+def poll_job_status_till_timeout_fail_or_success(
+    ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
+    return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait, print_state)
-def poll_wf_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState:
-    return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait)
+def poll_wf_status_till_timeout_fail_or_success(
+    ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState:
+    return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait, print_state)
 def get_ps_deployed_processors(ps_server_host: str):
@@ -47,22 +52,21 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str):
     return response
-def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> str:
+def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState:
     request_url = f"{ps_server_host}/processor/job/{processing_job_id}"
     response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
     assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
     job_state = response.json()["state"]
     assert job_state
-    return job_state
+    return getattr(JobState, job_state.lower())
-def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> str:
+def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState:
     request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}"
     response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"})
     assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
     job_state = response.json()["state"]
     assert job_state
-    return job_state
+    return getattr(JobState, job_state.lower())
 def post_ps_processing_request(ps_server_host: str, processor: str, job_input: dict) -> str:
@@ -78,9 +82,13 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d
     return processing_job_id
-# TODO: Can be extended to include other parameters such as page_wise
-def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: str) -> str:
-    request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise=True"
+def post_ps_workflow_request(
+    ps_server_host: str,
+    path_to_wf: str,
+    path_to_mets: str,
+    page_wise: bool = False,
+) -> str:
+    request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise={'True' if page_wise else 'False'}"
     response = request_post(
         url=request_url,
         headers={"accept": "application/json; charset=utf-8"},
@@ -88,8 +96,11 @@ def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets:
     )
     # print(response.json())
     # print(response.__dict__)
+    json_resp_raw = response.text
+    # print(f'post_ps_workflow_request >> {response.status_code}')
+    # print(f'post_ps_workflow_request >> {json_resp_raw}')
     assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}"
-    wf_job_id = response.json()["job_id"]
+    wf_job_id = json.loads(json_resp_raw)["job_id"]
     assert wf_job_id
     return wf_job_id

ocrd_network/processing_server.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from datetime import datetime
 from os import getpid
 from pathlib import Path
-from typing import Dict, List, Union
+from typing import Dict, List, Optional, Union
 from uvicorn import run as uvicorn_run
 from fastapi import APIRouter, FastAPI, File, HTTPException, Request, status, UploadFile
@@ -48,6 +48,7 @@ from .server_utils import (
     get_workflow_content,
     get_from_database_workspace,
     get_from_database_workflow_job,
+    kill_mets_server_zombies,
     parse_workflow_tasks,
     raise_http_exception,
     request_processor_server_tool_json,
@@ -78,7 +79,6 @@ class ProcessingServer(FastAPI):
     """
     def __init__(self, config_path: str, host: str, port: int) -> None:
-        initLogging()
         self.title = "OCR-D Processing Server"
         super().__init__(
             title=self.title,
@@ -86,6 +86,7 @@ class ProcessingServer(FastAPI):
             on_shutdown=[self.on_shutdown],
             description="OCR-D Processing Server"
         )
+        initLogging()
         self.log = getLogger("ocrd_network.processing_server")
         log_file = get_processing_server_logging_file_path(pid=getpid())
         configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
@@ -155,7 +156,7 @@ class ProcessingServer(FastAPI):
             queue_names = self.deployer.find_matching_network_agents(
                 worker_only=True, str_names_only=True, unique_only=True
             )
-            self.log.debug(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}")
+            self.log.info(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}")
             create_message_queues(logger=self.log, rmq_publisher=self.rmq_publisher, queue_names=queue_names)
             self.deployer.deploy_network_agents(mongodb_url=self.mongodb_url, rabbitmq_url=self.rabbitmq_url)
@@ -167,6 +168,7 @@ class ProcessingServer(FastAPI):
         uvicorn_run(self, host=self.hostname, port=int(self.port))
     async def on_startup(self):
+        self.log.info(f"Initializing the Database on: {self.mongodb_url}")
         await initiate_database(db_url=self.mongodb_url)
     async def on_shutdown(self) -> None:
@@ -200,6 +202,14 @@ class ProcessingServer(FastAPI):
             tags=[ServerApiTags.WORKSPACE],
             summary="Forward a TCP request to UDS mets server"
         )
+        others_router.add_api_route(
+            path="/kill_mets_server_zombies",
+            endpoint=self.kill_mets_server_zombies,
+            methods=["DELETE"],
+            tags=[ServerApiTags.WORKFLOW, ServerApiTags.PROCESSING],
+            status_code=status.HTTP_200_OK,
+            summary="!! Workaround Do Not Use Unless You Have A Reason !! Kill all METS servers on this machine that have been created more than 60 minutes ago."
+        )
         self.include_router(others_router)
     def add_api_routes_processing(self):
@@ -320,7 +330,7 @@ class ProcessingServer(FastAPI):
         """Forward mets-server-request
         A processor calls a mets related method like add_file with ClientSideOcrdMets. This sends
-        a request to this endpoint. This request contains all infomation neccessary to make a call
+        a request to this endpoint. This request contains all information necessary to make a call
         to the uds-mets-server. This information is used by `MetsServerProxy` to make a the call
         to the local (local for the processing-server) reachable the uds-mets-server.
         """
@@ -574,26 +584,20 @@ class ProcessingServer(FastAPI):
         )
     async def _consume_cached_jobs_of_workspace(
-        self, workspace_key: str, mets_server_url: str
+        self, workspace_key: str, mets_server_url: str, path_to_mets: str
     ) -> List[PYJobInput]:
-        # Check whether the internal queue for the workspace key still exists
-        if workspace_key not in self.cache_processing_requests.processing_requests:
-            self.log.debug(f"No internal queue available for workspace with key: {workspace_key}")
-            return []
         # decrease the internal cache counter by 1
         request_counter = self.cache_processing_requests.update_request_counter(
             workspace_key=workspace_key, by_value=-1
         )
         self.log.debug(f"Internal processing job cache counter value: {request_counter}")
-        if not len(self.cache_processing_requests.processing_requests[workspace_key]):
+        if (workspace_key not in self.cache_processing_requests.processing_requests or
+            not len(self.cache_processing_requests.processing_requests[workspace_key])):
             if request_counter <= 0:
                 # Shut down the Mets Server for the workspace_key since no
                 # more internal callbacks are expected for that workspace
                 self.log.debug(f"Stopping the mets server: {mets_server_url}")
-                self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url)
+                self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url, path_to_mets=path_to_mets)
                 try:
                     # The queue is empty - delete it
@@ -609,6 +613,10 @@ class ProcessingServer(FastAPI):
             else:
                 self.log.debug(f"Internal request cache is empty but waiting for {request_counter} result callbacks.")
             return []
+        # Check whether the internal queue for the workspace key still exists
+        if workspace_key not in self.cache_processing_requests.processing_requests:
+            self.log.debug(f"No internal queue available for workspace with key: {workspace_key}")
+            return []
         consumed_requests = await self.cache_processing_requests.consume_cached_requests(workspace_key=workspace_key)
         return consumed_requests
@@ -643,7 +651,7 @@ class ProcessingServer(FastAPI):
             raise_http_exception(self.log, status.HTTP_404_NOT_FOUND, message, error)
         consumed_cached_jobs = await self._consume_cached_jobs_of_workspace(
-            workspace_key=workspace_key, mets_server_url=mets_server_url
+            workspace_key=workspace_key, mets_server_url=mets_server_url, path_to_mets=path_to_mets
         )
         await self.push_cached_jobs_to_agents(processing_jobs=consumed_cached_jobs)
@@ -817,6 +825,10 @@ class ProcessingServer(FastAPI):
         response = self._produce_workflow_status_response(processing_jobs=jobs)
         return response
+    async def kill_mets_server_zombies(self, minutes_ago : Optional[int] = None, dry_run : Optional[bool] = None) -> List[int]:
+        pids_killed = kill_mets_server_zombies(minutes_ago=minutes_ago, dry_run=dry_run)
+        return pids_killed
     async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, JobState]:
         """
         Simplified version of the `get_workflow_info` that returns a single state for the entire workflow.

ocrd_network/processing_worker.py CHANGED Viewed

@@ -9,12 +9,12 @@ is a single OCR-D Processor instance.
 """
 from datetime import datetime
-from os import getpid
+from os import getpid, getppid
 from pika import BasicProperties
 from pika.adapters.blocking_connection import BlockingChannel
 from pika.spec import Basic
-from ocrd_utils import getLogger
+from ocrd_utils import getLogger, initLogging
 from .constants import JobState
 from .database import sync_initiate_database, sync_db_get_workspace, sync_db_update_processing_job, verify_database_uri
 from .logging_utils import (
@@ -35,14 +35,16 @@ from .utils import calculate_execution_time, post_to_callback_url
 class ProcessingWorker:
     def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, processor_class=None) -> None:
+        initLogging()
         self.log = getLogger(f'ocrd_network.processing_worker')
         log_file = get_processing_worker_logging_file_path(processor_name=processor_name, pid=getpid())
         configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
         try:
             verify_database_uri(mongodb_addr)
-            self.log.debug(f'Verified MongoDB URL: {mongodb_addr}')
+            self.log.info(f'Verified MongoDB URL: {mongodb_addr}')
             self.rmq_data = verify_and_parse_mq_uri(rabbitmq_addr)
+            self.log.info(f'Verified RabbitMQ URL: {rabbitmq_addr}')
         except ValueError as error:
             msg = f"Failed to parse data, error: {error}"
             self.log.exception(msg)
@@ -61,6 +63,7 @@ class ProcessingWorker:
         # Gets assigned when the `connect_publisher` is called on the worker object
         # Used to publish OcrdResultMessage type message to the queue with name {processor_name}-result
         self.rmq_publisher = None
+        self.log.info(f"Initialized processing worker: {processor_name}")
     def connect_consumer(self):
         self.rmq_consumer = connect_rabbitmq_consumer(self.log, self.rmq_data)
@@ -240,7 +243,7 @@ class ProcessingWorker:
             # post the result message (callback to a user defined endpoint)
             post_to_callback_url(self.log, callback_url, result_message)
         if internal_callback_url:
-            self.log.info(f"Publishing result to internal callback url (Processing Server): {callback_url}")
+            self.log.info(f"Publishing result to internal callback url (Processing Server): {internal_callback_url}")
             # If the internal callback_url field is set,
             # post the result message (callback to Processing Server endpoint)
             post_to_callback_url(self.log, internal_callback_url, result_message)

ocrd_network/processor_server.py CHANGED Viewed

@@ -42,13 +42,13 @@ class ProcessorServer(FastAPI):
     def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
         if not (processor_name or processor_class):
             raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
-        initLogging()
         super().__init__(
             on_startup=[self.on_startup],
             on_shutdown=[self.on_shutdown],
             title=f"Network agent - Processor Server",
             description="Network agent - Processor Server"
         )
+        initLogging()
         self.log = getLogger("ocrd_network.processor_server")
         log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
         configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
@@ -69,6 +69,7 @@ class ProcessorServer(FastAPI):
             self.processor_name = self.ocrd_tool["executable"]
         self.add_api_routes_processing()
+        self.log.info(f"Initialized processor server: {processor_name}")
     async def on_startup(self):
         await initiate_database(db_url=self.db_url)

ocrd_network/rabbitmq_utils/connector.py CHANGED Viewed

@@ -6,6 +6,7 @@ RabbitMQ documentation.
 from typing import Any, Optional, Union
 from pika import BasicProperties, BlockingConnection, ConnectionParameters, PlainCredentials
 from pika.adapters.blocking_connection import BlockingChannel
+from ocrd_utils import config
 from .constants import (
     DEFAULT_EXCHANGER_NAME,
     DEFAULT_EXCHANGER_TYPE,
@@ -69,8 +70,7 @@ class RMQConnector:
                 port=port,
                 virtual_host=vhost,
                 credentials=credentials,
-                # TODO: The heartbeat should not be disabled (0)!
-                heartbeat=0
+                heartbeat=config.OCRD_NETWORK_RABBITMQ_HEARTBEAT
             ),
         )
         return blocking_connection

ocrd_network/runtime_data/deployer.py CHANGED Viewed

@@ -8,7 +8,7 @@ Each Processing Worker is an instance of an OCR-D processor.
 """
 from __future__ import annotations
 from pathlib import Path
-from subprocess import Popen, run as subprocess_run
+import psutil
 from time import sleep
 from typing import Dict, List, Union
@@ -30,6 +30,8 @@ class Deployer:
         self.data_hosts: List[DataHost] = parse_hosts_data(ps_config["hosts"])
         self.internal_callback_url = ps_config.get("internal_callback_url", None)
         self.mets_servers: Dict = {}  # {"mets_server_url": "mets_server_pid"}
+        # This is required to store UDS urls that are multiplexed through the TCP proxy and are not preserved anywhere
+        self.mets_servers_paths: Dict = {}  # {"ws_dir_path": "mets_server_url"}
         self.use_tcp_mets = ps_config.get("use_tcp_mets", False)
     # TODO: Reconsider this.
@@ -146,25 +148,33 @@ class Deployer:
         if is_mets_server_running(mets_server_url=str(mets_server_url)):
             self.log.debug(f"The UDS mets server for {ws_dir_path} is already started: {mets_server_url}")
             return mets_server_url
+        elif Path(mets_server_url).is_socket():
+            self.log.warning(
+                f"The UDS mets server for {ws_dir_path} is not running but the socket file exists: {mets_server_url}."
+                "Removing to avoid any weird behavior before starting the server.")
+            Path(mets_server_url).unlink()
         self.log.info(f"Starting UDS mets server: {mets_server_url}")
-        pid = OcrdMetsServer.create_process(mets_server_url=mets_server_url, ws_dir_path=ws_dir_path, log_file=log_file)
-        self.mets_servers[mets_server_url] = pid
+        pid = OcrdMetsServer.create_process(mets_server_url=str(mets_server_url), ws_dir_path=str(ws_dir_path), log_file=str(log_file))
+        self.mets_servers[str(mets_server_url)] = pid
+        self.mets_servers_paths[str(ws_dir_path)] = str(mets_server_url)
         return mets_server_url
-    def stop_uds_mets_server(self, mets_server_url: str, stop_with_pid: bool = False) -> None:
+    def stop_uds_mets_server(self, mets_server_url: str, path_to_mets: str) -> None:
         self.log.info(f"Stopping UDS mets server: {mets_server_url}")
-        if stop_with_pid:
-            if Path(mets_server_url) not in self.mets_servers:
-                message = f"UDS Mets server not found at URL: {mets_server_url}"
-                self.log.exception(message)
-                raise Exception(message)
-            mets_server_pid = self.mets_servers[Path(mets_server_url)]
-            OcrdMetsServer.kill_process(mets_server_pid=mets_server_pid)
-            return
-        # TODO: Reconsider this again
-        #  Not having this sleep here causes connection errors
-        #  on the last request processed by the processing worker.
-        #  Sometimes 3 seconds is enough, sometimes not.
-        sleep(5)
-        stop_mets_server(mets_server_url=mets_server_url)
+        self.log.info(f"Path to the mets file: {path_to_mets}")
+        self.log.debug(f"mets_server: {self.mets_servers}")
+        self.log.debug(f"mets_server_paths: {self.mets_servers_paths}")
+        workspace_path = str(Path(path_to_mets).parent)
+        mets_server_url_uds = self.mets_servers_paths[workspace_path]
+        mets_server_pid = self.mets_servers[mets_server_url_uds]
+        self.log.info(f"Terminating mets server with pid: {mets_server_pid}")
+        p = psutil.Process(mets_server_pid)
+        stop_mets_server(self.log, mets_server_url=mets_server_url, ws_dir_path=workspace_path)
+        if p.is_running():
+            p.wait()
+            self.log.info(f"Terminated mets server with pid: {mets_server_pid}")
+        else:
+            self.log.info(f"Mets server with pid: {mets_server_pid} has already terminated.")
+        del self.mets_servers_paths[workspace_path]
+        del self.mets_servers[mets_server_url_uds]
         return

ocrd 3.0.0b7__py3-none-any.whl → 3.0.1__py3-none-any.whl

ocrd 3.0.0b7py3-none-any.whl → 3.0.1py3-none-any.whl