ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ocrd/cli/__init__.py +8 -6
  2. ocrd/cli/bashlib.py +8 -114
  3. ocrd/cli/network.py +0 -2
  4. ocrd/cli/ocrd_tool.py +26 -4
  5. ocrd/cli/process.py +1 -0
  6. ocrd/cli/resmgr.py +0 -1
  7. ocrd/cli/validate.py +32 -13
  8. ocrd/cli/workspace.py +125 -52
  9. ocrd/cli/zip.py +13 -4
  10. ocrd/decorators/__init__.py +28 -52
  11. ocrd/decorators/loglevel_option.py +4 -0
  12. ocrd/decorators/mets_find_options.py +2 -1
  13. ocrd/decorators/ocrd_cli_options.py +3 -7
  14. ocrd/decorators/parameter_option.py +12 -11
  15. ocrd/mets_server.py +11 -15
  16. ocrd/processor/base.py +88 -71
  17. ocrd/processor/builtin/dummy_processor.py +7 -4
  18. ocrd/processor/builtin/filter_processor.py +3 -2
  19. ocrd/processor/helpers.py +5 -6
  20. ocrd/processor/ocrd_page_result.py +7 -5
  21. ocrd/resolver.py +42 -32
  22. ocrd/task_sequence.py +11 -4
  23. ocrd/workspace.py +64 -54
  24. ocrd/workspace_backup.py +3 -0
  25. ocrd/workspace_bagger.py +15 -8
  26. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
  27. ocrd-3.7.0.dist-info/RECORD +123 -0
  28. ocrd_modelfactory/__init__.py +4 -2
  29. ocrd_models/constants.py +18 -1
  30. ocrd_models/ocrd_agent.py +1 -1
  31. ocrd_models/ocrd_exif.py +7 -3
  32. ocrd_models/ocrd_file.py +24 -19
  33. ocrd_models/ocrd_mets.py +90 -67
  34. ocrd_models/ocrd_page.py +17 -13
  35. ocrd_models/ocrd_xml_base.py +1 -0
  36. ocrd_models/report.py +2 -1
  37. ocrd_models/utils.py +4 -3
  38. ocrd_models/xpath_functions.py +3 -1
  39. ocrd_network/__init__.py +1 -2
  40. ocrd_network/cli/__init__.py +0 -2
  41. ocrd_network/cli/client.py +122 -50
  42. ocrd_network/cli/processing_server.py +1 -2
  43. ocrd_network/client.py +2 -2
  44. ocrd_network/client_utils.py +30 -13
  45. ocrd_network/constants.py +1 -6
  46. ocrd_network/database.py +3 -3
  47. ocrd_network/logging_utils.py +2 -7
  48. ocrd_network/models/__init__.py +0 -2
  49. ocrd_network/models/job.py +31 -33
  50. ocrd_network/models/messages.py +3 -2
  51. ocrd_network/models/workspace.py +5 -5
  52. ocrd_network/process_helpers.py +54 -17
  53. ocrd_network/processing_server.py +63 -114
  54. ocrd_network/processing_worker.py +6 -5
  55. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  56. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  57. ocrd_network/runtime_data/__init__.py +1 -2
  58. ocrd_network/runtime_data/deployer.py +12 -85
  59. ocrd_network/runtime_data/hosts.py +61 -130
  60. ocrd_network/runtime_data/network_agents.py +7 -31
  61. ocrd_network/runtime_data/network_services.py +1 -1
  62. ocrd_network/server_cache.py +1 -1
  63. ocrd_network/server_utils.py +13 -52
  64. ocrd_network/utils.py +1 -0
  65. ocrd_utils/__init__.py +4 -4
  66. ocrd_utils/config.py +86 -76
  67. ocrd_utils/deprecate.py +3 -0
  68. ocrd_utils/image.py +51 -23
  69. ocrd_utils/introspect.py +8 -3
  70. ocrd_utils/logging.py +15 -7
  71. ocrd_utils/os.py +17 -4
  72. ocrd_utils/str.py +32 -16
  73. ocrd_validators/json_validator.py +4 -1
  74. ocrd_validators/ocrd_tool_validator.py +2 -1
  75. ocrd_validators/ocrd_zip_validator.py +5 -4
  76. ocrd_validators/page_validator.py +21 -9
  77. ocrd_validators/parameter_validator.py +3 -2
  78. ocrd_validators/processing_server_config.schema.yml +1 -33
  79. ocrd_validators/resource_list_validator.py +3 -1
  80. ocrd_validators/workspace_validator.py +30 -20
  81. ocrd_validators/xsd_mets_validator.py +2 -1
  82. ocrd_validators/xsd_page_validator.py +2 -1
  83. ocrd_validators/xsd_validator.py +4 -2
  84. ocrd/cli/log.py +0 -51
  85. ocrd/lib.bash +0 -317
  86. ocrd-3.5.1.dist-info/RECORD +0 -128
  87. ocrd_network/cli/processor_server.py +0 -31
  88. ocrd_network/models/ocrd_tool.py +0 -12
  89. ocrd_network/processor_server.py +0 -255
  90. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
  91. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
  92. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
  93. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
@@ -5,10 +5,9 @@ __all__ = [
5
5
  "DataNetworkAgent",
6
6
  "DataRabbitMQ",
7
7
  "DataProcessingWorker",
8
- "DataProcessorServer"
9
8
  ]
10
9
 
11
10
  from .deployer import Deployer
12
11
  from .hosts import DataHost
13
- from .network_agents import DataNetworkAgent, DataProcessingWorker, DataProcessorServer
12
+ from .network_agents import DataNetworkAgent, DataProcessingWorker
14
13
  from .network_services import DataMongoDB, DataRabbitMQ
@@ -9,11 +9,10 @@ Each Processing Worker is an instance of an OCR-D processor.
9
9
  from __future__ import annotations
10
10
  from pathlib import Path
11
11
  import psutil
12
- from time import sleep
13
- from typing import Dict, List, Union
12
+ from typing import Dict, List
14
13
 
15
14
  from ocrd import OcrdMetsServer
16
- from ocrd_utils import config, getLogger, safe_filename
15
+ from ocrd_utils import getLogger
17
16
  from ..logging_utils import get_mets_server_logging_file_path
18
17
  from ..utils import get_uds_path, is_mets_server_running, stop_mets_server
19
18
  from .config_parser import parse_hosts_data, parse_mongodb_data, parse_rabbitmq_data, validate_and_load_config
@@ -34,89 +33,15 @@ class Deployer:
34
33
  self.mets_servers_paths: Dict = {} # {"ws_dir_path": "mets_server_url"}
35
34
  self.use_tcp_mets = ps_config.get("use_tcp_mets", False)
36
35
 
37
- # TODO: Reconsider this.
38
- def find_matching_network_agents(
39
- self, worker_only: bool = False, server_only: bool = False, docker_only: bool = False,
40
- native_only: bool = False, str_names_only: bool = False, unique_only: bool = False, sort: bool = False
41
- ) -> Union[List[str], List[object]]:
42
- """Finds and returns a list of matching data objects of type:
43
- `DataProcessingWorker` and `DataProcessorServer`.
44
-
45
- :py:attr:`worker_only` match only worker network agents (DataProcessingWorker)
46
- :py:attr:`server_only` match only server network agents (DataProcessorServer)
47
- :py:attr:`docker_only` match only docker network agents (DataProcessingWorker and DataProcessorServer)
48
- :py:attr:`native_only` match only native network agents (DataProcessingWorker and DataProcessorServer)
49
- :py:attr:`str_names_only` returns the processor_name filed instead of the Data* object
50
- :py:attr:`unique_only` remove duplicate names from the matches
51
- :py:attr:`sort` sort the result
52
-
53
- `worker_only` and `server_only` are mutually exclusive to each other
54
- `docker_only` and `native_only` are mutually exclusive to each other
55
- `unique_only` is allowed only together with `str_names_only`
56
- """
57
-
58
- if worker_only and server_only:
59
- msg = f"Only 'worker_only' or 'server_only' is allowed, not both."
60
- self.log.exception(msg)
61
- raise ValueError(msg)
62
- if docker_only and native_only:
63
- msg = f"Only 'docker_only' or 'native_only' is allowed, not both."
64
- self.log.exception(msg)
65
- raise ValueError(msg)
66
- if not str_names_only and unique_only:
67
- msg = f"Value 'unique_only' is allowed only together with 'str_names_only'"
68
- self.log.exception(msg)
69
- raise ValueError(msg)
70
- if sort and not str_names_only:
71
- msg = f"Value 'sort' is allowed only together with 'str_names_only'"
72
- self.log.exception(msg)
73
- raise ValueError(msg)
74
-
75
- # Find all matching objects of type DataProcessingWorker or DataProcessorServer
76
- matched_objects = []
77
- for data_host in self.data_hosts:
78
- if not server_only:
79
- if not docker_only:
80
- for data_worker in data_host.network_agents_worker_native:
81
- matched_objects.append(data_worker)
82
- if not native_only:
83
- for data_worker in data_host.network_agents_worker_docker:
84
- matched_objects.append(data_worker)
85
- if not worker_only:
86
- if not docker_only:
87
- for data_server in data_host.network_agents_server_native:
88
- matched_objects.append(data_server)
89
- if not native_only:
90
- for data_server in data_host.network_agents_server_docker:
91
- matched_objects.append(data_server)
92
- if not str_names_only:
93
- return matched_objects
94
- # Gets only the processor names of the matched objects
95
- matched_names = [match.processor_name for match in matched_objects]
96
- if not unique_only:
97
- return matched_names
98
- list_matched = list(dict.fromkeys(matched_names))
99
- if not sort:
100
- # Removes any duplicate entries from matched names
101
- return list_matched
102
- list_matched.sort()
103
- return list_matched
104
-
105
- def resolve_processor_server_url(self, processor_name) -> str:
106
- processor_server_url = ''
107
- for data_host in self.data_hosts:
108
- processor_server_url = data_host.resolve_processor_server_url(processor_name=processor_name)
109
- return processor_server_url
110
-
111
- def deploy_network_agents(self, mongodb_url: str, rabbitmq_url: str) -> None:
112
- self.log.debug("Deploying processing workers/processor servers...")
36
+ def deploy_workers(self, mongodb_url: str, rabbitmq_url: str) -> None:
37
+ self.log.debug("Deploying processing workers...")
113
38
  for host_data in self.data_hosts:
114
- host_data.deploy_network_agents(logger=self.log, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
39
+ host_data.deploy_workers(logger=self.log, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
115
40
 
116
- def stop_network_agents(self) -> None:
117
- self.log.debug("Stopping processing workers/processor servers...")
41
+ def stop_workers(self) -> None:
42
+ self.log.debug("Stopping processing workers...")
118
43
  for host_data in self.data_hosts:
119
- host_data.stop_network_agents(logger=self.log)
44
+ host_data.stop_workers(logger=self.log)
120
45
 
121
46
  def deploy_rabbitmq(self) -> str:
122
47
  self.data_queue.deploy_rabbitmq(self.log)
@@ -138,7 +63,7 @@ class Deployer:
138
63
  If RabbitMQ server is stopped before stopping Processing Workers that may have
139
64
  a bad outcome and leave Processing Workers in an unpredictable state.
140
65
  """
141
- self.stop_network_agents()
66
+ self.stop_workers()
142
67
  self.stop_mongodb()
143
68
  self.stop_rabbitmq()
144
69
 
@@ -154,7 +79,9 @@ class Deployer:
154
79
  "Removing to avoid any weird behavior before starting the server.")
155
80
  Path(mets_server_url).unlink()
156
81
  self.log.info(f"Starting UDS mets server: {mets_server_url}")
157
- pid = OcrdMetsServer.create_process(mets_server_url=str(mets_server_url), ws_dir_path=str(ws_dir_path), log_file=str(log_file))
82
+ pid = OcrdMetsServer.create_process(mets_server_url=str(mets_server_url),
83
+ ws_dir_path=str(ws_dir_path),
84
+ log_file=str(log_file))
158
85
  self.mets_servers[str(mets_server_url)] = pid
159
86
  self.mets_servers_paths[str(ws_dir_path)] = str(mets_server_url)
160
87
  return mets_server_url
@@ -1,9 +1,9 @@
1
1
  from logging import Logger
2
2
  from time import sleep
3
- from typing import Dict, List, Union
3
+ from typing import Dict, List
4
4
 
5
5
  from .connection_clients import create_docker_client, create_ssh_client
6
- from .network_agents import AgentType, DataNetworkAgent, DataProcessingWorker, DataProcessorServer, DeployType
6
+ from .network_agents import DataProcessingWorker, DeployType
7
7
 
8
8
 
9
9
  class DataHost:
@@ -24,68 +24,39 @@ class DataHost:
24
24
  self.ssh_client = None
25
25
  self.docker_client = None
26
26
 
27
- # Time to wait between deploying agents
28
- self.wait_between_agent_deploys: float = 0.3
27
+ # Time to wait between deploying single workers
28
+ self.wait_between_deploys: float = 0.3
29
29
 
30
- # Lists of network agents based on their agent and deployment type
31
- self.network_agents_worker_native = []
32
- self.network_agents_worker_docker = []
33
- self.network_agents_server_native = []
34
- self.network_agents_server_docker = []
30
+ # Lists of Processing Workers based on their deployment type
31
+ self.workers_native = []
32
+ self.workers_docker = []
35
33
 
36
34
  if not workers:
37
35
  workers = []
38
36
  if not servers:
39
37
  servers = []
40
38
 
41
- self.__parse_network_agents_workers(processing_workers=workers)
42
- self.__parse_network_agents_servers(processor_servers=servers)
39
+ self.__parse_workers(processing_workers=workers)
43
40
 
44
- # Used for caching deployed Processor Servers' ports on the current host
45
- # Key: processor_name, Value: list of ports
46
- self.processor_servers_ports: dict = {}
41
+ def __append_workers_to_lists(self, worker_data: DataProcessingWorker) -> None:
42
+ if worker_data.deploy_type != DeployType.DOCKER and worker_data.deploy_type != DeployType.NATIVE:
43
+ raise ValueError(f"Processing Worker deploy type is unknown: {worker_data.deploy_type}")
47
44
 
48
- def __add_deployed_agent_server_port_to_cache(self, processor_name: str, port: int) -> None:
49
- if processor_name not in self.processor_servers_ports:
50
- self.processor_servers_ports[processor_name] = [port]
51
- return
52
- self.processor_servers_ports[processor_name] = self.processor_servers_ports[processor_name].append(port)
53
-
54
- def __append_network_agent_to_lists(self, agent_data: DataNetworkAgent) -> None:
55
- if agent_data.deploy_type != DeployType.DOCKER and agent_data.deploy_type != DeployType.NATIVE:
56
- raise ValueError(f"Network agent deploy type is unknown: {agent_data.deploy_type}")
57
- if agent_data.agent_type != AgentType.PROCESSING_WORKER and agent_data.agent_type != AgentType.PROCESSOR_SERVER:
58
- raise ValueError(f"Network agent type is unknown: {agent_data.agent_type}")
59
-
60
- if agent_data.deploy_type == DeployType.NATIVE:
45
+ if worker_data.deploy_type == DeployType.NATIVE:
61
46
  self.needs_ssh_connector = True
62
- if agent_data.agent_type == AgentType.PROCESSING_WORKER:
63
- self.network_agents_worker_native.append(agent_data)
64
- if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
65
- self.network_agents_server_native.append(agent_data)
66
- if agent_data.deploy_type == DeployType.DOCKER:
47
+ self.workers_native.append(worker_data)
48
+ if worker_data.deploy_type == DeployType.DOCKER:
67
49
  self.needs_docker_connector = True
68
- if agent_data.agent_type == AgentType.PROCESSING_WORKER:
69
- self.network_agents_worker_docker.append(agent_data)
70
- if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
71
- self.network_agents_server_docker.append(agent_data)
72
-
73
- def __parse_network_agents_servers(self, processor_servers: List[Dict]):
74
- for server in processor_servers:
75
- server_data = DataProcessorServer(
76
- processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
77
- port=int(server["port"]), init_by_config=True, pid=None
78
- )
79
- self.__append_network_agent_to_lists(agent_data=server_data)
50
+ self.workers_docker.append(worker_data)
80
51
 
81
- def __parse_network_agents_workers(self, processing_workers: List[Dict]):
52
+ def __parse_workers(self, processing_workers: List[Dict]):
82
53
  for worker in processing_workers:
83
54
  worker_data = DataProcessingWorker(
84
- processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
85
- init_by_config=True, pid=None
55
+ processor_name=worker["name"], deploy_type=worker.get("deploy_type", "native"),
56
+ host=self.host, init_by_config=True, pid=None
86
57
  )
87
58
  for _ in range(int(worker["number_of_instance"])):
88
- self.__append_network_agent_to_lists(agent_data=worker_data)
59
+ self.__append_workers_to_lists(worker_data=worker_data)
89
60
 
90
61
  def create_connection_client(self, client_type: str):
91
62
  if client_type not in ["docker", "ssh"]:
@@ -97,62 +68,46 @@ class DataHost:
97
68
  self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
98
69
  return self.docker_client
99
70
 
100
- def __deploy_network_agent(
101
- self, logger: Logger, agent_data: Union[DataProcessorServer, DataProcessingWorker],
71
+ def __deploy_single_worker(
72
+ self, logger: Logger, worker_data: DataProcessingWorker,
102
73
  mongodb_url: str, rabbitmq_url: str
103
74
  ) -> None:
104
- deploy_type = agent_data.deploy_type
105
- agent_type = agent_data.agent_type
106
- name = agent_data.processor_name
107
- agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}, host: {self.host}"
108
- logger.info(f"Deploying {agent_info}")
75
+ deploy_type = worker_data.deploy_type
76
+ name = worker_data.processor_name
77
+ worker_info = f"Processing Worker, deploy: {deploy_type}, name: {name}, host: {self.host}"
78
+ logger.info(f"Deploying {worker_info}")
109
79
 
110
80
  connection_client = None
111
81
  if deploy_type == DeployType.NATIVE:
112
- assert self.ssh_client, f"SSH client connection missing."
82
+ assert self.ssh_client, "SSH client connection missing."
113
83
  connection_client = self.ssh_client
114
84
  if deploy_type == DeployType.DOCKER:
115
- assert self.docker_client, f"Docker client connection missing."
85
+ assert self.docker_client, "Docker client connection missing."
116
86
  connection_client = self.docker_client
117
87
 
118
- if agent_type == AgentType.PROCESSING_WORKER:
119
- agent_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
120
- if agent_type == AgentType.PROCESSOR_SERVER:
121
- agent_data.deploy_network_agent(logger, connection_client, mongodb_url)
88
+ worker_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
89
+ sleep(self.wait_between_deploys)
122
90
 
123
- sleep(self.wait_between_agent_deploys)
124
-
125
- def __deploy_network_agents_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
91
+ def __deploy_all_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
126
92
  logger.info(f"Deploying processing workers on host: {self.host}")
127
- amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
93
+ amount_workers = len(self.workers_native) + len(self.workers_docker)
128
94
  if not amount_workers:
129
- logger.info(f"No processing workers found to be deployed")
130
- for data_worker in self.network_agents_worker_native:
131
- self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
132
- for data_worker in self.network_agents_worker_docker:
133
- self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
134
-
135
- def __deploy_network_agents_servers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
136
- logger.info(f"Deploying processor servers on host: {self.host}")
137
- amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
138
- if not amount_servers:
139
- logger.info(f"No processor servers found to be deployed")
140
- for data_server in self.network_agents_server_native:
141
- self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
142
- self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
143
- for data_server in self.network_agents_server_docker:
144
- self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
145
- self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
146
-
147
- def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
95
+ logger.info("No processing workers found to be deployed")
96
+ for data_worker in self.workers_native:
97
+ self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
98
+ for data_worker in self.workers_docker:
99
+ self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
100
+
101
+ def deploy_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
148
102
  if self.needs_ssh_connector and not self.ssh_client:
149
103
  logger.debug("Creating missing ssh connector before deploying")
150
104
  self.ssh_client = self.create_connection_client(client_type="ssh")
151
105
  if self.needs_docker_connector:
152
106
  logger.debug("Creating missing docker connector before deploying")
153
107
  self.docker_client = self.create_connection_client(client_type="docker")
154
- self.__deploy_network_agents_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
155
- self.__deploy_network_agents_servers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
108
+
109
+ self.__deploy_all_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
110
+
156
111
  if self.ssh_client:
157
112
  self.ssh_client.close()
158
113
  self.ssh_client = None
@@ -160,66 +115,42 @@ class DataHost:
160
115
  self.docker_client.close()
161
116
  self.docker_client = None
162
117
 
163
- def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployType, agent_type: AgentType, pid: str):
164
- agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}"
118
+ def __stop_worker(self, logger: Logger, name: str, deploy_type: DeployType, pid: str):
119
+ worker_info = f"Processing Worker: deploy: {deploy_type}, name: {name}"
165
120
  if not pid:
166
- logger.warning(f"No pid was passed for {agent_info}")
121
+ logger.warning(f"No pid was passed for {worker_info}")
167
122
  return
168
- agent_info += f", pid: {pid}"
169
- logger.info(f"Stopping {agent_info}")
123
+ worker_info += f", pid: {pid}"
124
+ logger.info(f"Stopping {worker_info}")
170
125
  if deploy_type == DeployType.NATIVE:
171
- assert self.ssh_client, f"SSH client connection missing"
126
+ assert self.ssh_client, "SSH client connection missing"
172
127
  self.ssh_client.exec_command(f"kill {pid}")
173
128
  if deploy_type == DeployType.DOCKER:
174
- assert self.docker_client, f"Docker client connection missing"
129
+ assert self.docker_client, "Docker client connection missing"
175
130
  self.docker_client.containers.get(pid).stop()
176
131
 
177
- def __stop_network_agents_workers(self, logger: Logger):
178
- logger.info(f"Stopping processing workers on host: {self.host}")
179
- amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
180
- if not amount_workers:
181
- logger.warning(f"No active processing workers to be stopped.")
182
- for worker in self.network_agents_worker_native:
183
- self.__stop_network_agent(logger, worker.processor_name, worker.deploy_type, worker.agent_type, worker.pid)
184
- self.network_agents_worker_native = []
185
- for worker in self.network_agents_worker_docker:
186
- self.__stop_network_agent(logger, worker.processor_name, worker.deploy_type, worker.agent_type, worker.pid)
187
- self.network_agents_worker_docker = []
188
-
189
- def __stop_network_agents_servers(self, logger: Logger):
190
- logger.info(f"Stopping processor servers on host: {self.host}")
191
- amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
192
- if not amount_servers:
193
- logger.warning(f"No active processor servers to be stopped.")
194
- for server in self.network_agents_server_native:
195
- self.__stop_network_agent(logger, server.processor_name, server.deploy_type, server.agent_type, server.pid)
196
- self.network_agents_server_native = []
197
- for server in self.network_agents_server_docker:
198
- self.__stop_network_agent(logger, server.processor_name, server.deploy_type, server.agent_type, server.pid)
199
- self.network_agents_server_docker = []
200
-
201
- def stop_network_agents(self, logger: Logger):
132
+ def stop_workers(self, logger: Logger):
202
133
  if self.needs_ssh_connector and not self.ssh_client:
203
134
  logger.debug("Creating missing ssh connector before stopping")
204
135
  self.ssh_client = self.create_connection_client(client_type="ssh")
205
136
  if self.needs_docker_connector and not self.docker_client:
206
137
  logger.debug("Creating missing docker connector before stopping")
207
138
  self.docker_client = self.create_connection_client(client_type="docker")
208
- self.__stop_network_agents_workers(logger=logger)
209
- self.__stop_network_agents_servers(logger=logger)
139
+
140
+ logger.info(f"Stopping processing workers on host: {self.host}")
141
+ amount_workers = len(self.workers_native) + len(self.workers_docker)
142
+ if not amount_workers:
143
+ logger.warning("No active processing workers to be stopped.")
144
+ for worker in self.workers_native:
145
+ self.__stop_worker(logger, worker.processor_name, worker.deploy_type, worker.pid)
146
+ self.workers_native = []
147
+ for worker in self.workers_docker:
148
+ self.__stop_worker(logger, worker.processor_name, worker.deploy_type, worker.pid)
149
+ self.workers_docker = []
150
+
210
151
  if self.ssh_client:
211
152
  self.ssh_client.close()
212
153
  self.ssh_client = None
213
154
  if self.docker_client:
214
155
  self.docker_client.close()
215
156
  self.docker_client = None
216
-
217
- def resolve_processor_server_url(self, processor_name: str) -> str:
218
- processor_server_url = ''
219
- for data_server in self.network_agents_server_docker:
220
- if data_server.processor_name == processor_name:
221
- processor_server_url = f"http://{self.host}:{data_server.port}/"
222
- for data_server in self.network_agents_server_native:
223
- if data_server.processor_name == processor_name:
224
- processor_server_url = f"http://{self.host}:{data_server.port}/"
225
- return processor_server_url
@@ -2,14 +2,15 @@ from logging import Logger
2
2
  from typing import Any
3
3
 
4
4
  from re import search as re_search
5
- from ..constants import AgentType, DeployType
5
+ from ..constants import DeployType
6
6
 
7
7
 
8
8
  # TODO: Find appropriate replacement for the hack
9
9
  def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str):
10
10
  channel = ssh_client.invoke_shell()
11
11
  stdin, stdout = channel.makefile("wb"), channel.makefile("rb")
12
- logger.debug(f"Executing command: {start_cmd}")
12
+ # TODO: set back to debug
13
+ logger.info(f"Executing command: {start_cmd}")
13
14
 
14
15
  # TODO: This hack should still be fixed
15
16
  # Note left from @joschrew
@@ -40,14 +41,13 @@ def deploy_agent_docker_template(logger: Logger, docker_client, start_cmd: str):
40
41
 
41
42
  class DataNetworkAgent:
42
43
  def __init__(
43
- self, processor_name: str, deploy_type: DeployType, agent_type: AgentType,
44
+ self, processor_name: str, deploy_type: DeployType,
44
45
  host: str, init_by_config: bool, pid: Any = None
45
46
  ) -> None:
46
47
  self.processor_name = processor_name
47
48
  self.deploy_type = deploy_type
48
49
  self.host = host
49
50
  self.deployed_by_config = init_by_config
50
- self.agent_type = agent_type
51
51
  # The id is assigned when the agent is deployed
52
52
  self.pid = pid
53
53
 
@@ -69,42 +69,18 @@ class DataProcessingWorker(DataNetworkAgent):
69
69
  self, processor_name: str, deploy_type: DeployType, host: str, init_by_config: bool, pid: Any = None
70
70
  ) -> None:
71
71
  super().__init__(
72
- processor_name=processor_name, host=host, deploy_type=deploy_type, agent_type=AgentType.PROCESSING_WORKER,
72
+ processor_name=processor_name, host=host, deploy_type=deploy_type,
73
73
  init_by_config=init_by_config, pid=pid
74
74
  )
75
75
 
76
76
  def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
77
77
  if self.deploy_type == DeployType.NATIVE:
78
- start_cmd = f"{self.processor_name} {self.agent_type} --database {database_url} --queue {queue_url} &"
78
+ start_cmd = f"{self.processor_name} --database {database_url} --queue {queue_url} &"
79
79
  self.pid = self._start_native_instance(logger, connector_client, start_cmd)
80
80
  return self.pid
81
81
  if self.deploy_type == DeployType.DOCKER:
82
82
  # TODO: add real command to start processing worker in docker here
83
- start_cmd = f""
84
- self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
85
- return self.pid
86
- raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
87
-
88
-
89
- class DataProcessorServer(DataNetworkAgent):
90
- def __init__(
91
- self, processor_name: str, deploy_type: DeployType, host: str, port: int, init_by_config: bool, pid: Any = None
92
- ) -> None:
93
- super().__init__(
94
- processor_name=processor_name, host=host, deploy_type=deploy_type, agent_type=AgentType.PROCESSOR_SERVER,
95
- init_by_config=init_by_config, pid=pid
96
- )
97
- self.port = port
98
-
99
- def deploy_network_agent(self, logger: Logger, connector_client, database_url: str):
100
- agent_address = f"{self.host}:{self.port}"
101
- if self.deploy_type == DeployType.NATIVE:
102
- start_cmd = f"{self.processor_name} {self.agent_type} --address {agent_address} --database {database_url} &"
103
- self.pid = self._start_native_instance(logger, connector_client, start_cmd)
104
- return self.pid
105
- if self.deploy_type == DeployType.DOCKER:
106
- # TODO: add real command to start processor server in docker here
107
- start_cmd = f""
83
+ start_cmd = ""
108
84
  self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
109
85
  return self.pid
110
86
  raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
@@ -129,7 +129,7 @@ class DataRabbitMQ(DataNetworkService):
129
129
  rmq_host, rmq_port, rmq_vhost = self.host, int(self.port), self.vhost
130
130
  rmq_user, rmq_password = self.cred_username, self.cred_password
131
131
  if self.skip_deployment:
132
- logger.debug(f"RabbitMQ is managed externally. Skipping deployment.")
132
+ logger.debug("RabbitMQ is managed externally. Skipping deployment.")
133
133
  verify_rabbitmq_available(logger=logger, rabbitmq_address=self.service_url)
134
134
  return self.service_url
135
135
  if not env:
@@ -33,7 +33,7 @@ class CacheLockedPages:
33
33
  if not self.locked_pages.get(workspace_key, None):
34
34
  self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}")
35
35
  return False
36
- debug_message = f"Caching the received request due to locked output file grp pages."
36
+ debug_message = "Caching the received request due to locked output file grp pages."
37
37
  for file_group in output_file_grps:
38
38
  if file_group in self.locked_pages[workspace_key]:
39
39
  if self.placeholder_all_pages in self.locked_pages[workspace_key][file_group]:
@@ -55,7 +55,7 @@ def create_processing_message(logger: Logger, job: DBProcessorJob) -> OcrdProces
55
55
  )
56
56
  return processing_message
57
57
  except ValueError as error:
58
- message = f"Failed to create OcrdProcessingMessage from DBProcessorJob"
58
+ message = "Failed to create OcrdProcessingMessage from DBProcessorJob"
59
59
  raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message, error)
60
60
 
61
61
 
@@ -124,50 +124,7 @@ async def _get_processor_job_log(logger: Logger, job_id: str) -> FileResponse:
124
124
  return FileResponse(path=log_file_path, filename=log_file_path.name)
125
125
 
126
126
 
127
- def request_processor_server_tool_json(logger: Logger, processor_server_base_url: str) -> Dict:
128
- # Request the ocrd tool json from the Processor Server
129
- try:
130
- response = requests_get(
131
- urljoin(base=processor_server_base_url, url="info"),
132
- headers={"Content-Type": "application/json"}
133
- )
134
- except Exception as error:
135
- message = f"Failed to retrieve ocrd tool json from: {processor_server_base_url}"
136
- raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message, error)
137
- if response.status_code != 200:
138
- message = f"Failed to retrieve tool json from: {processor_server_base_url}, code: {response.status_code}"
139
- raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message)
140
- return response.json()
141
-
142
- async def forward_job_to_processor_server(
143
- logger: Logger, job_input: PYJobInput, processor_server_base_url: str
144
- ) -> PYJobOutput:
145
- try:
146
- json_data = dumps(job_input.dict(exclude_unset=True, exclude_none=True))
147
- except Exception as error:
148
- message = f"Failed to json dump the PYJobInput: {job_input}"
149
- raise_http_exception(logger, status.HTTP_500_INTERNAL_SERVER_ERROR, message, error)
150
-
151
- # TODO: The amount of pages should come as a request input
152
- # TODO: cf https://github.com/OCR-D/core/pull/1030/files#r1152551161
153
- # currently, use 200 as a default
154
- request_timeout = calculate_processing_request_timeout(amount_pages=200, timeout_per_page=20.0)
155
-
156
- # Post a processing job to the Processor Server asynchronously
157
- async with AsyncClient(timeout=Timeout(timeout=request_timeout, connect=30.0)) as client:
158
- response = await client.post(
159
- urljoin(base=processor_server_base_url, url="run"),
160
- headers={"Content-Type": "application/json"},
161
- json=loads(json_data)
162
- )
163
- if response.status_code != 202:
164
- message = f"Failed to post '{job_input.processor_name}' job to: {processor_server_base_url}"
165
- raise_http_exception(logger, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
166
- job_output = response.json()
167
- return job_output
168
-
169
-
170
- async def get_workflow_content(logger: Logger, workflow_id: str, workflow: Union[UploadFile, None]) -> str:
127
+ async def get_workflow_content(logger: Logger, workflow_id: str, workflow: Union[UploadFile, str, None]) -> str:
171
128
  if not workflow and not workflow_id:
172
129
  message = "Either 'workflow' must be uploaded as a file or 'workflow_id' must be provided. Both are missing."
173
130
  raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
@@ -178,6 +135,9 @@ async def get_workflow_content(logger: Logger, workflow_id: str, workflow: Union
178
135
  except ValueError as error:
179
136
  message = f"Workflow with id '{workflow_id}' not found"
180
137
  raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message, error)
138
+ if isinstance(workflow, str):
139
+ with open(workflow) as wf_file:
140
+ return wf_file.read()
181
141
  return await generate_workflow_content(workflow)
182
142
 
183
143
 
@@ -193,14 +153,14 @@ def parse_workflow_tasks(logger: Logger, workflow_content: str) -> List[Processo
193
153
  tasks_list = workflow_content.splitlines()
194
154
  return [ProcessorTask.parse(task_str) for task_str in tasks_list if task_str.strip()]
195
155
  except ValueError as error:
196
- message = f"Failed parsing processing tasks from a workflow."
156
+ message = "Failed parsing processing tasks from a workflow."
197
157
  raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message, error)
198
158
 
199
159
 
200
160
  def raise_http_exception(logger: Logger, status_code: int, message: str, error: Exception = None) -> None:
201
161
  if error:
202
162
  message = f"{message} {error}"
203
- logger.exception(f"{message}")
163
+ logger.exception(message)
204
164
  raise HTTPException(status_code=status_code, detail=message)
205
165
 
206
166
 
@@ -214,7 +174,7 @@ def validate_job_input(logger: Logger, processor_name: str, ocrd_tool: dict, job
214
174
  )
215
175
  raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
216
176
  if not ocrd_tool:
217
- message = f"Failed parsing processing tasks from a workflow."
177
+ message = "Failed parsing processing tasks from a workflow."
218
178
  raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message)
219
179
  try:
220
180
  report = ParameterValidator(ocrd_tool).validate(dict(job_input.parameters))
@@ -249,10 +209,10 @@ def validate_first_task_input_file_groups_existence(logger: Logger, mets_path: s
249
209
  raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
250
210
 
251
211
 
252
- def kill_mets_server_zombies(minutes_ago : Optional[int], dry_run : Optional[bool]) -> List[int]:
253
- if minutes_ago == None:
212
+ def kill_mets_server_zombies(minutes_ago: Optional[int], dry_run: Optional[bool]) -> List[int]:
213
+ if minutes_ago is None:
254
214
  minutes_ago = 90
255
- if dry_run == None:
215
+ if dry_run is None:
256
216
  dry_run = False
257
217
 
258
218
  now = time()
@@ -271,7 +231,8 @@ def kill_mets_server_zombies(minutes_ago : Optional[int], dry_run : Optional[boo
271
231
  if re.match(cmdline_pat, cmdline):
272
232
  pid = int(procdir.name)
273
233
  ret.append(pid)
274
- print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, so killing (cmdline="{cmdline})', file=sys.stderr)
234
+ print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, '
235
+ f'so killing (cmdline="{cmdline})', file=sys.stderr)
275
236
  if dry_run:
276
237
  print(f'[dry_run is active] kill {pid}')
277
238
  else:
ocrd_network/utils.py CHANGED
@@ -172,5 +172,6 @@ def stop_mets_server(logger: Logger, mets_server_url: str, ws_dir_path: str) ->
172
172
  else:
173
173
  ValueError(f"Unexpected protocol type: {protocol}")
174
174
 
175
+
175
176
  def get_uds_path(ws_dir_path: str) -> Path:
176
177
  return Path(config.OCRD_NETWORK_SOCKETS_ROOT_DIR, f"{safe_filename(ws_dir_path)}.sock")