ocrd 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd_network/constants.py CHANGED
@@ -10,8 +10,6 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
10
10
  # Used as a placeholder to lock all pages when no page_id is specified
11
11
  SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
12
12
 
13
- # TODO: Make this more configurable
14
- RESOURCE_MANAGER_SERVER_PORT = 45555
15
13
 
16
14
  class AgentType(str, Enum):
17
15
  PROCESSING_WORKER = "worker"
@@ -51,7 +49,6 @@ class NetworkLoggingDirs(str, Enum):
51
49
  PROCESSING_SERVERS = "processing_servers"
52
50
  PROCESSING_WORKERS = "processing_workers"
53
51
  PROCESSOR_SERVERS = "processor_servers"
54
- RESOURCE_MANAGER_SERVERS = "resource_manager_servers"
55
52
 
56
53
 
57
54
  class ServerApiTags(str, Enum):
@@ -50,8 +50,3 @@ def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Pa
50
50
  def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
51
51
  log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
52
52
  return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
53
-
54
-
55
- def get_resource_manager_server_logging_file_path(pid: int) -> Path:
56
- log_file: str = f"resource_manager_server.{pid}.log"
57
- return Path(get_root_logging_dir(NetworkLoggingDirs.RESOURCE_MANAGER_SERVERS), log_file)
@@ -1,10 +1,9 @@
1
1
  from logging import Logger
2
+ from time import sleep
2
3
  from typing import Dict, List, Union
3
4
 
4
- from ..constants import RESOURCE_MANAGER_SERVER_PORT
5
5
  from .connection_clients import create_docker_client, create_ssh_client
6
- from .network_agents import (
7
- AgentType, DataProcessingWorker, DataProcessorServer, DeployType, deploy_agent_native_get_pid_hack)
6
+ from .network_agents import AgentType, DataNetworkAgent, DataProcessingWorker, DataProcessorServer, DeployType
8
7
 
9
8
 
10
9
  class DataHost:
@@ -12,8 +11,6 @@ class DataHost:
12
11
  self, host: str, username: str, password: str, keypath: str, workers: List[Dict], servers: List[Dict]
13
12
  ) -> None:
14
13
  self.host = host
15
- self.resource_manager_port = RESOURCE_MANAGER_SERVER_PORT
16
- self.resource_manager_pid = None
17
14
  self.username = username
18
15
  self.password = password
19
16
  self.keypath = keypath
@@ -27,18 +24,22 @@ class DataHost:
27
24
  self.ssh_client = None
28
25
  self.docker_client = None
29
26
 
27
+ # Time to wait between deploying agents
28
+ self.wait_between_agent_deploys: float = 0.3
29
+
30
30
  # Lists of network agents based on their agent and deployment type
31
- self.network_agents_worker_native: List[DataProcessingWorker] = []
32
- self.network_agents_worker_docker: List[DataProcessingWorker] = []
33
- self.network_agents_server_native: List[DataProcessorServer] = []
34
- self.network_agents_server_docker: List[DataProcessorServer] = []
31
+ self.network_agents_worker_native = []
32
+ self.network_agents_worker_docker = []
33
+ self.network_agents_server_native = []
34
+ self.network_agents_server_docker = []
35
35
 
36
36
  if not workers:
37
37
  workers = []
38
38
  if not servers:
39
39
  servers = []
40
40
 
41
- self.__parse_network_agents(processing_workers=workers, processor_servers=servers)
41
+ self.__parse_network_agents_workers(processing_workers=workers)
42
+ self.__parse_network_agents_servers(processor_servers=servers)
42
43
 
43
44
  # Used for caching deployed Processor Servers' ports on the current host
44
45
  # Key: processor_name, Value: list of ports
@@ -50,7 +51,7 @@ class DataHost:
50
51
  return
51
52
  self.processor_servers_ports[processor_name] = self.processor_servers_ports[processor_name].append(port)
52
53
 
53
- def __append_network_agent_to_lists(self, agent_data: Union[DataProcessingWorker, DataProcessorServer]) -> None:
54
+ def __append_network_agent_to_lists(self, agent_data: DataNetworkAgent) -> None:
54
55
  if agent_data.deploy_type != DeployType.DOCKER and agent_data.deploy_type != DeployType.NATIVE:
55
56
  raise ValueError(f"Network agent deploy type is unknown: {agent_data.deploy_type}")
56
57
  if agent_data.agent_type != AgentType.PROCESSING_WORKER and agent_data.agent_type != AgentType.PROCESSOR_SERVER:
@@ -60,16 +61,24 @@ class DataHost:
60
61
  self.needs_ssh_connector = True
61
62
  if agent_data.agent_type == AgentType.PROCESSING_WORKER:
62
63
  self.network_agents_worker_native.append(agent_data)
63
- elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
64
+ if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
64
65
  self.network_agents_server_native.append(agent_data)
65
- elif agent_data.deploy_type == DeployType.DOCKER:
66
+ if agent_data.deploy_type == DeployType.DOCKER:
66
67
  self.needs_docker_connector = True
67
68
  if agent_data.agent_type == AgentType.PROCESSING_WORKER:
68
69
  self.network_agents_worker_docker.append(agent_data)
69
- elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
70
+ if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
70
71
  self.network_agents_server_docker.append(agent_data)
71
72
 
72
- def __parse_network_agents(self, processing_workers: List[Dict], processor_servers: List[Dict]):
73
+ def __parse_network_agents_servers(self, processor_servers: List[Dict]):
74
+ for server in processor_servers:
75
+ server_data = DataProcessorServer(
76
+ processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
77
+ port=int(server["port"]), init_by_config=True, pid=None
78
+ )
79
+ self.__append_network_agent_to_lists(agent_data=server_data)
80
+
81
+ def __parse_network_agents_workers(self, processing_workers: List[Dict]):
73
82
  for worker in processing_workers:
74
83
  worker_data = DataProcessingWorker(
75
84
  processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
@@ -77,12 +86,6 @@ class DataHost:
77
86
  )
78
87
  for _ in range(int(worker["number_of_instance"])):
79
88
  self.__append_network_agent_to_lists(agent_data=worker_data)
80
- for server in processor_servers:
81
- server_data = DataProcessorServer(
82
- processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
83
- port=int(server["port"]), init_by_config=True, pid=None
84
- )
85
- self.__append_network_agent_to_lists(agent_data=server_data)
86
89
 
87
90
  def create_connection_client(self, client_type: str):
88
91
  if client_type not in ["docker", "ssh"]:
@@ -94,38 +97,52 @@ class DataHost:
94
97
  self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
95
98
  return self.docker_client
96
99
 
97
- def __deploy_network_agent_resource_manager_server(self, logger: Logger):
98
- logger.info(f"Deploying resource manager server on host: {self.host}:{self.resource_manager_port}")
99
- start_cmd = f"ocrd network resmgr-server --address {self.host}:{self.resource_manager_port} &"
100
- pid = deploy_agent_native_get_pid_hack(logger, self.ssh_client, start_cmd)
101
- logger.info(f"Deployed: OCR-D Resource Manager Server [{pid}]: {self.host}:{self.resource_manager_port}")
102
- self.resource_manager_pid = pid
100
+ def __deploy_network_agent(
101
+ self, logger: Logger, agent_data: Union[DataProcessorServer, DataProcessingWorker],
102
+ mongodb_url: str, rabbitmq_url: str
103
+ ) -> None:
104
+ deploy_type = agent_data.deploy_type
105
+ agent_type = agent_data.agent_type
106
+ name = agent_data.processor_name
107
+ agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}, host: {self.host}"
108
+ logger.info(f"Deploying {agent_info}")
109
+
110
+ connection_client = None
111
+ if deploy_type == DeployType.NATIVE:
112
+ assert self.ssh_client, f"SSH client connection missing."
113
+ connection_client = self.ssh_client
114
+ if deploy_type == DeployType.DOCKER:
115
+ assert self.docker_client, f"Docker client connection missing."
116
+ connection_client = self.docker_client
117
+
118
+ if agent_type == AgentType.PROCESSING_WORKER:
119
+ agent_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
120
+ if agent_type == AgentType.PROCESSOR_SERVER:
121
+ agent_data.deploy_network_agent(logger, connection_client, mongodb_url)
122
+
123
+ sleep(self.wait_between_agent_deploys)
103
124
 
104
- def __deploy_network_agents_processing_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
125
+ def __deploy_network_agents_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
105
126
  logger.info(f"Deploying processing workers on host: {self.host}")
106
127
  amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
107
128
  if not amount_workers:
108
129
  logger.info(f"No processing workers found to be deployed")
109
130
  for data_worker in self.network_agents_worker_native:
110
- data_worker.deploy_network_agent(logger, self.ssh_client, mongodb_url, rabbitmq_url)
111
- logger.info(f"Deployed: {data_worker}")
131
+ self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
112
132
  for data_worker in self.network_agents_worker_docker:
113
- data_worker.deploy_network_agent(logger, self.docker_client, mongodb_url, rabbitmq_url)
114
- logger.info(f"Deployed: {data_worker}")
133
+ self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
115
134
 
116
- def __deploy_network_agents_processor_servers(self, logger: Logger, mongodb_url: str):
135
+ def __deploy_network_agents_servers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
117
136
  logger.info(f"Deploying processor servers on host: {self.host}")
118
137
  amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
119
138
  if not amount_servers:
120
139
  logger.info(f"No processor servers found to be deployed")
121
140
  for data_server in self.network_agents_server_native:
122
- data_server.deploy_network_agent(logger, self.ssh_client, mongodb_url)
141
+ self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
123
142
  self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
124
- logger.info(f"Deployed: {data_server}")
125
143
  for data_server in self.network_agents_server_docker:
126
- data_server.deploy_network_agent(logger, self.docker_client, mongodb_url)
144
+ self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
127
145
  self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
128
- logger.info(f"Deployed: {data_server}")
129
146
 
130
147
  def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
131
148
  if self.needs_ssh_connector and not self.ssh_client:
@@ -134,9 +151,8 @@ class DataHost:
134
151
  if self.needs_docker_connector:
135
152
  logger.debug("Creating missing docker connector before deploying")
136
153
  self.docker_client = self.create_connection_client(client_type="docker")
137
- self.__deploy_network_agent_resource_manager_server(logger)
138
- self.__deploy_network_agents_processing_workers(logger, mongodb_url, rabbitmq_url)
139
- self.__deploy_network_agents_processor_servers(logger, mongodb_url)
154
+ self.__deploy_network_agents_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
155
+ self.__deploy_network_agents_servers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
140
156
  if self.ssh_client:
141
157
  self.ssh_client.close()
142
158
  self.ssh_client = None
@@ -144,12 +160,6 @@ class DataHost:
144
160
  self.docker_client.close()
145
161
  self.docker_client = None
146
162
 
147
- def __stop_network_agent_resource_manager_server(self, logger: Logger):
148
- logger.info(f"Stopping OCR-D Resource Manager Server [{self.resource_manager_pid}]: "
149
- f"{self.host}:{self.resource_manager_port}")
150
- assert self.ssh_client, f"SSH client connection missing"
151
- self.ssh_client.exec_command(f"kill {self.resource_manager_pid}")
152
-
153
163
  def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployType, agent_type: AgentType, pid: str):
154
164
  agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}"
155
165
  if not pid:
@@ -195,7 +205,6 @@ class DataHost:
195
205
  if self.needs_docker_connector and not self.docker_client:
196
206
  logger.debug("Creating missing docker connector before stopping")
197
207
  self.docker_client = self.create_connection_client(client_type="docker")
198
- self.__stop_network_agent_resource_manager_server(logger=logger)
199
208
  self.__stop_network_agents_workers(logger=logger)
200
209
  self.__stop_network_agents_servers(logger=logger)
201
210
  if self.ssh_client:
@@ -1,5 +1,4 @@
1
1
  from logging import Logger
2
- from time import sleep
3
2
  from typing import Any
4
3
 
5
4
  from re import search as re_search
@@ -25,8 +24,7 @@ def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str)
25
24
  output = stdout.read().decode("utf-8")
26
25
  stdout.close()
27
26
  stdin.close()
28
- pid = re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
29
- return pid
27
+ return re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
30
28
 
31
29
 
32
30
  # TODO: Implement the actual method that is missing
@@ -53,12 +51,6 @@ class DataNetworkAgent:
53
51
  # The id is assigned when the agent is deployed
54
52
  self.pid = pid
55
53
 
56
- # Time to wait between deploying agents
57
- self.wait_between_agent_deploys: float = 0.3
58
-
59
- def __str__(self):
60
- return f"{self.pid} {self.deploy_type} {self.agent_type} {self.processor_name} on host: {self.host}"
61
-
62
54
  def _start_native_instance(self, logger: Logger, ssh_client, start_cmd: str):
63
55
  if self.deploy_type != DeployType.NATIVE:
64
56
  raise RuntimeError(f"Mismatch of deploy type when starting network agent: {self.processor_name}")
@@ -84,18 +76,12 @@ class DataProcessingWorker(DataNetworkAgent):
84
76
  def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
85
77
  if self.deploy_type == DeployType.NATIVE:
86
78
  start_cmd = f"{self.processor_name} {self.agent_type} --database {database_url} --queue {queue_url} &"
87
- assert connector_client, f"SSH client connection missing."
88
79
  self.pid = self._start_native_instance(logger, connector_client, start_cmd)
89
- sleep(self.wait_between_agent_deploys)
90
80
  return self.pid
91
81
  if self.deploy_type == DeployType.DOCKER:
92
82
  # TODO: add real command to start processing worker in docker here
93
- start_cmd = ""
94
- assert connector_client, f"Docker client connection missing."
95
- if not start_cmd:
96
- raise RuntimeError("Missing start command for the Processing Worker in docker mode")
83
+ start_cmd = f""
97
84
  self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
98
- sleep(self.wait_between_agent_deploys)
99
85
  return self.pid
100
86
  raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
101
87
 
@@ -110,24 +96,15 @@ class DataProcessorServer(DataNetworkAgent):
110
96
  )
111
97
  self.port = port
112
98
 
113
- def __str__(self):
114
- return f"{super().__str__}:{self.port}"
115
-
116
99
  def deploy_network_agent(self, logger: Logger, connector_client, database_url: str):
117
100
  agent_address = f"{self.host}:{self.port}"
118
101
  if self.deploy_type == DeployType.NATIVE:
119
102
  start_cmd = f"{self.processor_name} {self.agent_type} --address {agent_address} --database {database_url} &"
120
- assert connector_client, f"SSH client connection missing."
121
103
  self.pid = self._start_native_instance(logger, connector_client, start_cmd)
122
- sleep(self.wait_between_agent_deploys)
123
104
  return self.pid
124
105
  if self.deploy_type == DeployType.DOCKER:
125
106
  # TODO: add real command to start processor server in docker here
126
- start_cmd = ""
127
- assert connector_client, f"Docker client connection missing."
128
- if not start_cmd:
129
- raise RuntimeError("Missing start command for the Processor Server in docker mode")
107
+ start_cmd = f""
130
108
  self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
131
- sleep(self.wait_between_agent_deploys)
132
109
  return self.pid
133
110
  raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
ocrd_utils/__init__.py CHANGED
@@ -118,7 +118,6 @@ from .constants import (
118
118
  REGEX_PREFIX,
119
119
  REGEX_FILE_ID,
120
120
  RESOURCE_LOCATIONS,
121
- RESOURCE_TYPES,
122
121
  LOG_FORMAT,
123
122
  LOG_TIMEFMT,
124
123
  VERSION,
@@ -185,7 +184,6 @@ from .os import (
185
184
  get_processor_resource_types,
186
185
  get_ocrd_tool_json,
187
186
  get_moduledir,
188
- get_env_locations,
189
187
  guess_media_type,
190
188
  list_all_resources,
191
189
  is_file_in_directory,
ocrd_utils/constants.py CHANGED
@@ -5,7 +5,6 @@ from .introspect import dist_version
5
5
  from re import compile as regex_compile
6
6
 
7
7
  __all__ = [
8
- 'DEFAULT_METS_BASENAME',
9
8
  'EXT_TO_MIME',
10
9
  'LOG_FORMAT',
11
10
  'LOG_TIMEFMT',
@@ -15,9 +14,7 @@ __all__ = [
15
14
  'PIL_TO_MIME',
16
15
  'REGEX_PREFIX',
17
16
  'REGEX_FILE_ID',
18
- 'RESOURCES_DIR_SYSTEM',
19
17
  'RESOURCE_LOCATIONS',
20
- 'RESOURCE_TYPES',
21
18
  'VERSION',
22
19
  ]
23
20
 
@@ -111,8 +108,6 @@ LOG_FORMAT = r'%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s'
111
108
  LOG_TIMEFMT = r'%H:%M:%S'
112
109
 
113
110
  RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']
114
- RESOURCE_TYPES = ['file', 'directory', 'archive']
115
- RESOURCES_DIR_SYSTEM = '/usr/local/share/ocrd-resources'
116
111
 
117
112
  DEFAULT_METS_BASENAME = 'mets.xml'
118
113