ocrd 3.1.2__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd_models/constants.py CHANGED
@@ -28,6 +28,8 @@ __all__ = [
28
28
  'TAG_PAGE_TEXTEQUIV',
29
29
  'TAG_PAGE_TEXTREGION',
30
30
  'METS_PAGE_DIV_ATTRIBUTE',
31
+ 'PAGE_REGION_TYPES',
32
+ 'PAGE_ALTIMG_FEATURES',
31
33
  ]
32
34
 
33
35
 
@@ -72,6 +74,20 @@ PAGE_REGION_TYPES = [
72
74
  'Separator', 'Table', 'Text', 'Unknown'
73
75
  ]
74
76
 
77
+ PAGE_ALTIMG_FEATURES = [
78
+ 'binarized',
79
+ 'grayscale_normalized',
80
+ 'despeckled',
81
+ 'cropped',
82
+ 'deskewed',
83
+ 'rotated-90',
84
+ 'rotated-180',
85
+ 'rotated-270',
86
+ 'dewarped',
87
+ 'clipped',
88
+ ]
89
+
90
+
75
91
  class METS_PAGE_DIV_ATTRIBUTE(Enum):
76
92
  ID = auto()
77
93
  ORDER = auto()
ocrd_network/__init__.py CHANGED
@@ -4,4 +4,5 @@ from .processing_server import ProcessingServer
4
4
  from .processing_worker import ProcessingWorker
5
5
  from .processor_server import ProcessorServer
6
6
  from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
7
+ from .resource_manager_server import ResourceManagerServer
7
8
  from .server_cache import CacheLockedPages, CacheProcessingRequests
@@ -2,10 +2,12 @@ from .client import client_cli
2
2
  from .processing_server import processing_server_cli
3
3
  from .processing_worker import processing_worker_cli
4
4
  from .processor_server import processor_server_cli
5
+ from .resmgr_server import resource_manager_server_cli
5
6
 
6
7
  __all__ = [
7
8
  'client_cli',
8
9
  'processing_server_cli',
9
10
  'processing_worker_cli',
10
- 'processor_server_cli'
11
+ 'processor_server_cli',
12
+ 'resource_manager_server_cli'
11
13
  ]
@@ -0,0 +1,23 @@
1
+ import click
2
+ from ocrd_network import ResourceManagerServer, ServerAddressParamType
3
+
4
+
5
+ @click.command('resmgr-server')
6
+ @click.option('-a', '--address',
7
+ help='The URL of the OCR-D resource manager server, format: host:port',
8
+ type=ServerAddressParamType(),
9
+ required=True)
10
+ def resource_manager_server_cli(address: str):
11
+ """
12
+ Start standalone REST API OCR-D Resource Manager Server
13
+ """
14
+ try:
15
+ # Note, the address is already validated with the type field
16
+ host, port = address.split(':')
17
+ resource_manager_server = ResourceManagerServer(
18
+ host = host,
19
+ port = int(port)
20
+ )
21
+ resource_manager_server.start()
22
+ except Exception as e:
23
+ raise Exception("OCR-D Resource Manager Server has failed with error") from e
ocrd_network/constants.py CHANGED
@@ -10,6 +10,8 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
10
10
  # Used as a placeholder to lock all pages when no page_id is specified
11
11
  SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
12
12
 
13
+ # TODO: Make this more configurable
14
+ RESOURCE_MANAGER_SERVER_PORT = 45555
13
15
 
14
16
  class AgentType(str, Enum):
15
17
  PROCESSING_WORKER = "worker"
@@ -49,6 +51,7 @@ class NetworkLoggingDirs(str, Enum):
49
51
  PROCESSING_SERVERS = "processing_servers"
50
52
  PROCESSING_WORKERS = "processing_workers"
51
53
  PROCESSOR_SERVERS = "processor_servers"
54
+ RESOURCE_MANAGER_SERVERS = "resource_manager_servers"
52
55
 
53
56
 
54
57
  class ServerApiTags(str, Enum):
@@ -50,3 +50,8 @@ def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Pa
50
50
  def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
51
51
  log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
52
52
  return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
53
+
54
+
55
+ def get_resource_manager_server_logging_file_path(pid: int) -> Path:
56
+ log_file: str = f"resource_manager_server.{pid}.log"
57
+ return Path(get_root_logging_dir(NetworkLoggingDirs.RESOURCE_MANAGER_SERVERS), log_file)
@@ -0,0 +1,178 @@
1
+ from datetime import datetime
2
+ from os import getpid
3
+ from pathlib import Path
4
+ import requests
5
+ from shutil import which
6
+ from typing import Any
7
+ from uvicorn import run as uvicorn_run
8
+ from fastapi import APIRouter, FastAPI, HTTPException, status
9
+
10
+ from ocrd import OcrdResourceManager
11
+ from ocrd_utils import directory_size, getLogger, get_moduledir, get_ocrd_tool_json, initLogging
12
+ from .logging_utils import configure_file_handler_with_formatter, get_resource_manager_server_logging_file_path
13
+
14
+
15
+ class ResourceManagerServer(FastAPI):
16
+ def __init__(self, host: str, port: int) -> None:
17
+ self.title = f"OCR-D Resource Manager Server"
18
+ super().__init__(
19
+ title=self.title,
20
+ on_startup=[self.on_startup],
21
+ on_shutdown=[self.on_shutdown],
22
+ description=self.title
23
+ )
24
+ initLogging()
25
+ self.log = getLogger("ocrd_network.resource_manager_server")
26
+ log_file = get_resource_manager_server_logging_file_path(pid=getpid())
27
+ configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
28
+
29
+ self.resmgr_instance = OcrdResourceManager()
30
+
31
+ self.hostname = host
32
+ self.port = port
33
+
34
+ self.add_api_routes()
35
+
36
+ def start(self):
37
+ uvicorn_run(self, host=self.hostname, port=int(self.port))
38
+
39
+ async def on_startup(self):
40
+ self.log.info(f"Starting {self.title}")
41
+ pass
42
+
43
+ async def on_shutdown(self) -> None:
44
+ pass
45
+
46
+ def add_api_routes(self):
47
+ base_router = APIRouter()
48
+ base_router.add_api_route(
49
+ path="/",
50
+ endpoint=self.home_page,
51
+ methods=["GET"],
52
+ status_code=status.HTTP_200_OK,
53
+ summary="Get information about the OCR-D Resource Manager Server"
54
+ )
55
+ base_router.add_api_route(
56
+ path="/list_available",
57
+ endpoint=self.list_available_resources,
58
+ methods=["GET"],
59
+ status_code=status.HTTP_200_OK,
60
+ summary=""
61
+ )
62
+ base_router.add_api_route(
63
+ path="/list_installed",
64
+ endpoint=self.list_installed_resources,
65
+ methods=["GET"],
66
+ status_code=status.HTTP_200_OK,
67
+ summary=""
68
+ )
69
+ base_router.add_api_route(
70
+ path="/download",
71
+ endpoint=self.download_resource,
72
+ methods=["GET"],
73
+ status_code=status.HTTP_200_OK,
74
+ summary=""
75
+ )
76
+ self.include_router(base_router)
77
+
78
+ async def home_page(self):
79
+ message = f"The home page of the {self.title}"
80
+ json_message = {
81
+ "message": message,
82
+ "time": datetime.now().strftime("%Y-%m-%d %H:%M")
83
+ }
84
+ return json_message
85
+
86
+ async def list_available_resources(
87
+ self,
88
+ executable: Any = None,
89
+ dynamic: bool = True,
90
+ name: Any = None,
91
+ database: Any = None,
92
+ url: Any = None
93
+ ):
94
+ result = self.resmgr_instance.list_available(executable, dynamic, name, database, url)
95
+ json_message = {
96
+ "result": result
97
+ }
98
+ return json_message
99
+
100
+ async def list_installed_resources(self, executable: Any = None):
101
+ result = self.resmgr_instance.list_available(executable)
102
+ json_message = {
103
+ "result": result
104
+ }
105
+ return json_message
106
+
107
+ async def download_resource(
108
+ self,
109
+ executable: str,
110
+ name: Any = None,
111
+ location: Any = None,
112
+ any_url: str = '',
113
+ no_dynamic: bool = False,
114
+ resource_type: str = 'file',
115
+ path_in_archive: str = '.',
116
+ allow_uninstalled: bool = True,
117
+ overwrite: bool = True
118
+ ):
119
+ resmgr = OcrdResourceManager()
120
+ response = []
121
+ if executable != '*' and not name:
122
+ message = f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required"
123
+ self.log.error(message)
124
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
125
+ elif executable == '*':
126
+ executable = None
127
+ if name == '*':
128
+ name = None
129
+ if executable and not which(executable):
130
+ if not allow_uninstalled:
131
+ message = (f"Executable '{executable}' is not installed. To download resources anyway, "
132
+ f"use the -a/--allow-uninstalled flag")
133
+ self.log.error(message)
134
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
135
+ else:
136
+ message = f"Executable '{executable}' is not installed, but downloading resources anyway."
137
+ self.log.info(message)
138
+ response.append(message)
139
+ reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name)
140
+ if not any(r[1] for r in reslist):
141
+ message = f"No resources {name} found in registry for executable {executable}"
142
+ self.log.info(message)
143
+ response.append(message)
144
+ if executable and name:
145
+ reslist = [(executable, [{
146
+ 'url': any_url or '???',
147
+ 'name': name,
148
+ 'type': resource_type,
149
+ 'path_in_archive': path_in_archive}]
150
+ )]
151
+ for this_executable, this_reslist in reslist:
152
+ resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
153
+ if not location:
154
+ location = resource_locations[0]
155
+ elif location not in resource_locations:
156
+ response.append(
157
+ f"The selected --location {location} is not in the {this_executable}'s resource search path, "
158
+ f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
159
+ res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
160
+ for res_dict in this_reslist:
161
+ try:
162
+ fpath = resmgr.handle_resource(
163
+ res_dict=res_dict,
164
+ executable=this_executable,
165
+ dest_dir=res_dest_dir,
166
+ any_url=any_url,
167
+ overwrite=overwrite,
168
+ resource_type=resource_type,
169
+ path_in_archive=path_in_archive
170
+ )
171
+ if not fpath:
172
+ continue
173
+ except FileExistsError as exc:
174
+ response.append(str(exc))
175
+ usage = res_dict.get('parameter_usage', 'as-is')
176
+ response.append(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
177
+ json_message = { "result": response }
178
+ return json_message
@@ -1,9 +1,10 @@
1
1
  from logging import Logger
2
- from time import sleep
3
2
  from typing import Dict, List, Union
4
3
 
4
+ from ..constants import RESOURCE_MANAGER_SERVER_PORT
5
5
  from .connection_clients import create_docker_client, create_ssh_client
6
- from .network_agents import AgentType, DataNetworkAgent, DataProcessingWorker, DataProcessorServer, DeployType
6
+ from .network_agents import (
7
+ AgentType, DataProcessingWorker, DataProcessorServer, DeployType, deploy_agent_native_get_pid_hack)
7
8
 
8
9
 
9
10
  class DataHost:
@@ -11,6 +12,8 @@ class DataHost:
11
12
  self, host: str, username: str, password: str, keypath: str, workers: List[Dict], servers: List[Dict]
12
13
  ) -> None:
13
14
  self.host = host
15
+ self.resource_manager_port = RESOURCE_MANAGER_SERVER_PORT
16
+ self.resource_manager_pid = None
14
17
  self.username = username
15
18
  self.password = password
16
19
  self.keypath = keypath
@@ -24,22 +27,18 @@ class DataHost:
24
27
  self.ssh_client = None
25
28
  self.docker_client = None
26
29
 
27
- # Time to wait between deploying agents
28
- self.wait_between_agent_deploys: float = 0.3
29
-
30
30
  # Lists of network agents based on their agent and deployment type
31
- self.network_agents_worker_native = []
32
- self.network_agents_worker_docker = []
33
- self.network_agents_server_native = []
34
- self.network_agents_server_docker = []
31
+ self.network_agents_worker_native: List[DataProcessingWorker] = []
32
+ self.network_agents_worker_docker: List[DataProcessingWorker] = []
33
+ self.network_agents_server_native: List[DataProcessorServer] = []
34
+ self.network_agents_server_docker: List[DataProcessorServer] = []
35
35
 
36
36
  if not workers:
37
37
  workers = []
38
38
  if not servers:
39
39
  servers = []
40
40
 
41
- self.__parse_network_agents_workers(processing_workers=workers)
42
- self.__parse_network_agents_servers(processor_servers=servers)
41
+ self.__parse_network_agents(processing_workers=workers, processor_servers=servers)
43
42
 
44
43
  # Used for caching deployed Processor Servers' ports on the current host
45
44
  # Key: processor_name, Value: list of ports
@@ -51,7 +50,7 @@ class DataHost:
51
50
  return
52
51
  self.processor_servers_ports[processor_name] = self.processor_servers_ports[processor_name].append(port)
53
52
 
54
- def __append_network_agent_to_lists(self, agent_data: DataNetworkAgent) -> None:
53
+ def __append_network_agent_to_lists(self, agent_data: Union[DataProcessingWorker, DataProcessorServer]) -> None:
55
54
  if agent_data.deploy_type != DeployType.DOCKER and agent_data.deploy_type != DeployType.NATIVE:
56
55
  raise ValueError(f"Network agent deploy type is unknown: {agent_data.deploy_type}")
57
56
  if agent_data.agent_type != AgentType.PROCESSING_WORKER and agent_data.agent_type != AgentType.PROCESSOR_SERVER:
@@ -61,24 +60,16 @@ class DataHost:
61
60
  self.needs_ssh_connector = True
62
61
  if agent_data.agent_type == AgentType.PROCESSING_WORKER:
63
62
  self.network_agents_worker_native.append(agent_data)
64
- if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
63
+ elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
65
64
  self.network_agents_server_native.append(agent_data)
66
- if agent_data.deploy_type == DeployType.DOCKER:
65
+ elif agent_data.deploy_type == DeployType.DOCKER:
67
66
  self.needs_docker_connector = True
68
67
  if agent_data.agent_type == AgentType.PROCESSING_WORKER:
69
68
  self.network_agents_worker_docker.append(agent_data)
70
- if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
69
+ elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
71
70
  self.network_agents_server_docker.append(agent_data)
72
71
 
73
- def __parse_network_agents_servers(self, processor_servers: List[Dict]):
74
- for server in processor_servers:
75
- server_data = DataProcessorServer(
76
- processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
77
- port=int(server["port"]), init_by_config=True, pid=None
78
- )
79
- self.__append_network_agent_to_lists(agent_data=server_data)
80
-
81
- def __parse_network_agents_workers(self, processing_workers: List[Dict]):
72
+ def __parse_network_agents(self, processing_workers: List[Dict], processor_servers: List[Dict]):
82
73
  for worker in processing_workers:
83
74
  worker_data = DataProcessingWorker(
84
75
  processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
@@ -86,6 +77,12 @@ class DataHost:
86
77
  )
87
78
  for _ in range(int(worker["number_of_instance"])):
88
79
  self.__append_network_agent_to_lists(agent_data=worker_data)
80
+ for server in processor_servers:
81
+ server_data = DataProcessorServer(
82
+ processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
83
+ port=int(server["port"]), init_by_config=True, pid=None
84
+ )
85
+ self.__append_network_agent_to_lists(agent_data=server_data)
89
86
 
90
87
  def create_connection_client(self, client_type: str):
91
88
  if client_type not in ["docker", "ssh"]:
@@ -97,52 +94,38 @@ class DataHost:
97
94
  self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
98
95
  return self.docker_client
99
96
 
100
- def __deploy_network_agent(
101
- self, logger: Logger, agent_data: Union[DataProcessorServer, DataProcessingWorker],
102
- mongodb_url: str, rabbitmq_url: str
103
- ) -> None:
104
- deploy_type = agent_data.deploy_type
105
- agent_type = agent_data.agent_type
106
- name = agent_data.processor_name
107
- agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}, host: {self.host}"
108
- logger.info(f"Deploying {agent_info}")
109
-
110
- connection_client = None
111
- if deploy_type == DeployType.NATIVE:
112
- assert self.ssh_client, f"SSH client connection missing."
113
- connection_client = self.ssh_client
114
- if deploy_type == DeployType.DOCKER:
115
- assert self.docker_client, f"Docker client connection missing."
116
- connection_client = self.docker_client
117
-
118
- if agent_type == AgentType.PROCESSING_WORKER:
119
- agent_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
120
- if agent_type == AgentType.PROCESSOR_SERVER:
121
- agent_data.deploy_network_agent(logger, connection_client, mongodb_url)
122
-
123
- sleep(self.wait_between_agent_deploys)
97
+ def __deploy_network_agent_resource_manager_server(self, logger: Logger):
98
+ logger.info(f"Deploying resource manager server on host: {self.host}:{self.resource_manager_port}")
99
+ start_cmd = f"ocrd network resmgr-server --address {self.host}:{self.resource_manager_port} &"
100
+ pid = deploy_agent_native_get_pid_hack(logger, self.ssh_client, start_cmd)
101
+ logger.info(f"Deployed: OCR-D Resource Manager Server [{pid}]: {self.host}:{self.resource_manager_port}")
102
+ self.resource_manager_pid = pid
124
103
 
125
- def __deploy_network_agents_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
104
+ def __deploy_network_agents_processing_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
126
105
  logger.info(f"Deploying processing workers on host: {self.host}")
127
106
  amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
128
107
  if not amount_workers:
129
108
  logger.info(f"No processing workers found to be deployed")
130
109
  for data_worker in self.network_agents_worker_native:
131
- self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
110
+ data_worker.deploy_network_agent(logger, self.ssh_client, mongodb_url, rabbitmq_url)
111
+ logger.info(f"Deployed: {data_worker}")
132
112
  for data_worker in self.network_agents_worker_docker:
133
- self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
113
+ data_worker.deploy_network_agent(logger, self.docker_client, mongodb_url, rabbitmq_url)
114
+ logger.info(f"Deployed: {data_worker}")
134
115
 
135
- def __deploy_network_agents_servers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
116
+ def __deploy_network_agents_processor_servers(self, logger: Logger, mongodb_url: str):
136
117
  logger.info(f"Deploying processor servers on host: {self.host}")
137
118
  amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
138
119
  if not amount_servers:
139
120
  logger.info(f"No processor servers found to be deployed")
140
121
  for data_server in self.network_agents_server_native:
141
- self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
122
+ data_server.deploy_network_agent(logger, self.ssh_client, mongodb_url)
142
123
  self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
124
+ logger.info(f"Deployed: {data_server}")
143
125
  for data_server in self.network_agents_server_docker:
144
- self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
126
+ data_server.deploy_network_agent(logger, self.docker_client, mongodb_url)
145
127
  self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
128
+ logger.info(f"Deployed: {data_server}")
146
129
 
147
130
  def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
148
131
  if self.needs_ssh_connector and not self.ssh_client:
@@ -151,8 +134,9 @@ class DataHost:
151
134
  if self.needs_docker_connector:
152
135
  logger.debug("Creating missing docker connector before deploying")
153
136
  self.docker_client = self.create_connection_client(client_type="docker")
154
- self.__deploy_network_agents_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
155
- self.__deploy_network_agents_servers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
137
+ self.__deploy_network_agent_resource_manager_server(logger)
138
+ self.__deploy_network_agents_processing_workers(logger, mongodb_url, rabbitmq_url)
139
+ self.__deploy_network_agents_processor_servers(logger, mongodb_url)
156
140
  if self.ssh_client:
157
141
  self.ssh_client.close()
158
142
  self.ssh_client = None
@@ -160,6 +144,12 @@ class DataHost:
160
144
  self.docker_client.close()
161
145
  self.docker_client = None
162
146
 
147
+ def __stop_network_agent_resource_manager_server(self, logger: Logger):
148
+ logger.info(f"Stopping OCR-D Resource Manager Server [{self.resource_manager_pid}]: "
149
+ f"{self.host}:{self.resource_manager_port}")
150
+ assert self.ssh_client, f"SSH client connection missing"
151
+ self.ssh_client.exec_command(f"kill {self.resource_manager_pid}")
152
+
163
153
  def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployType, agent_type: AgentType, pid: str):
164
154
  agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}"
165
155
  if not pid:
@@ -205,6 +195,7 @@ class DataHost:
205
195
  if self.needs_docker_connector and not self.docker_client:
206
196
  logger.debug("Creating missing docker connector before stopping")
207
197
  self.docker_client = self.create_connection_client(client_type="docker")
198
+ self.__stop_network_agent_resource_manager_server(logger=logger)
208
199
  self.__stop_network_agents_workers(logger=logger)
209
200
  self.__stop_network_agents_servers(logger=logger)
210
201
  if self.ssh_client:
@@ -1,4 +1,5 @@
1
1
  from logging import Logger
2
+ from time import sleep
2
3
  from typing import Any
3
4
 
4
5
  from re import search as re_search
@@ -24,7 +25,8 @@ def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str)
24
25
  output = stdout.read().decode("utf-8")
25
26
  stdout.close()
26
27
  stdin.close()
27
- return re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
28
+ pid = re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
29
+ return pid
28
30
 
29
31
 
30
32
  # TODO: Implement the actual method that is missing
@@ -51,6 +53,12 @@ class DataNetworkAgent:
51
53
  # The id is assigned when the agent is deployed
52
54
  self.pid = pid
53
55
 
56
+ # Time to wait between deploying agents
57
+ self.wait_between_agent_deploys: float = 0.3
58
+
59
+ def __str__(self):
60
+ return f"{self.pid} {self.deploy_type} {self.agent_type} {self.processor_name} on host: {self.host}"
61
+
54
62
  def _start_native_instance(self, logger: Logger, ssh_client, start_cmd: str):
55
63
  if self.deploy_type != DeployType.NATIVE:
56
64
  raise RuntimeError(f"Mismatch of deploy type when starting network agent: {self.processor_name}")
@@ -76,12 +84,18 @@ class DataProcessingWorker(DataNetworkAgent):
76
84
  def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
77
85
  if self.deploy_type == DeployType.NATIVE:
78
86
  start_cmd = f"{self.processor_name} {self.agent_type} --database {database_url} --queue {queue_url} &"
87
+ assert connector_client, f"SSH client connection missing."
79
88
  self.pid = self._start_native_instance(logger, connector_client, start_cmd)
89
+ sleep(self.wait_between_agent_deploys)
80
90
  return self.pid
81
91
  if self.deploy_type == DeployType.DOCKER:
82
92
  # TODO: add real command to start processing worker in docker here
83
- start_cmd = f""
93
+ start_cmd = ""
94
+ assert connector_client, f"Docker client connection missing."
95
+ if not start_cmd:
96
+ raise RuntimeError("Missing start command for the Processing Worker in docker mode")
84
97
  self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
98
+ sleep(self.wait_between_agent_deploys)
85
99
  return self.pid
86
100
  raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
87
101
 
@@ -96,15 +110,24 @@ class DataProcessorServer(DataNetworkAgent):
96
110
  )
97
111
  self.port = port
98
112
 
113
+ def __str__(self):
114
+ return f"{super().__str__}:{self.port}"
115
+
99
116
  def deploy_network_agent(self, logger: Logger, connector_client, database_url: str):
100
117
  agent_address = f"{self.host}:{self.port}"
101
118
  if self.deploy_type == DeployType.NATIVE:
102
119
  start_cmd = f"{self.processor_name} {self.agent_type} --address {agent_address} --database {database_url} &"
120
+ assert connector_client, f"SSH client connection missing."
103
121
  self.pid = self._start_native_instance(logger, connector_client, start_cmd)
122
+ sleep(self.wait_between_agent_deploys)
104
123
  return self.pid
105
124
  if self.deploy_type == DeployType.DOCKER:
106
125
  # TODO: add real command to start processor server in docker here
107
- start_cmd = f""
126
+ start_cmd = ""
127
+ assert connector_client, f"Docker client connection missing."
128
+ if not start_cmd:
129
+ raise RuntimeError("Missing start command for the Processor Server in docker mode")
108
130
  self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
131
+ sleep(self.wait_between_agent_deploys)
109
132
  return self.pid
110
133
  raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
ocrd_utils/__init__.py CHANGED
@@ -118,6 +118,7 @@ from .constants import (
118
118
  REGEX_PREFIX,
119
119
  REGEX_FILE_ID,
120
120
  RESOURCE_LOCATIONS,
121
+ RESOURCE_TYPES,
121
122
  LOG_FORMAT,
122
123
  LOG_TIMEFMT,
123
124
  VERSION,
@@ -184,6 +185,7 @@ from .os import (
184
185
  get_processor_resource_types,
185
186
  get_ocrd_tool_json,
186
187
  get_moduledir,
188
+ get_env_locations,
187
189
  guess_media_type,
188
190
  list_all_resources,
189
191
  is_file_in_directory,
ocrd_utils/constants.py CHANGED
@@ -5,6 +5,7 @@ from .introspect import dist_version
5
5
  from re import compile as regex_compile
6
6
 
7
7
  __all__ = [
8
+ 'DEFAULT_METS_BASENAME',
8
9
  'EXT_TO_MIME',
9
10
  'LOG_FORMAT',
10
11
  'LOG_TIMEFMT',
@@ -14,7 +15,9 @@ __all__ = [
14
15
  'PIL_TO_MIME',
15
16
  'REGEX_PREFIX',
16
17
  'REGEX_FILE_ID',
18
+ 'RESOURCES_DIR_SYSTEM',
17
19
  'RESOURCE_LOCATIONS',
20
+ 'RESOURCE_TYPES',
18
21
  'VERSION',
19
22
  ]
20
23
 
@@ -108,6 +111,8 @@ LOG_FORMAT = r'%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s'
108
111
  LOG_TIMEFMT = r'%H:%M:%S'
109
112
 
110
113
  RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']
114
+ RESOURCE_TYPES = ['file', 'directory', 'archive']
115
+ RESOURCES_DIR_SYSTEM = '/usr/local/share/ocrd-resources'
111
116
 
112
117
  DEFAULT_METS_BASENAME = 'mets.xml'
113
118