ocrd 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/network.py +2 -0
- ocrd/cli/resmgr.py +24 -61
- ocrd/cli/workspace.py +2 -2
- ocrd/processor/base.py +15 -18
- ocrd/resource_manager.py +199 -116
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/METADATA +2 -2
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/RECORD +25 -23
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/WHEEL +1 -1
- ocrd_models/constants.py +16 -0
- ocrd_network/__init__.py +1 -0
- ocrd_network/cli/__init__.py +3 -1
- ocrd_network/cli/resmgr_server.py +23 -0
- ocrd_network/constants.py +3 -0
- ocrd_network/logging_utils.py +5 -0
- ocrd_network/resource_manager_server.py +178 -0
- ocrd_network/runtime_data/hosts.py +47 -56
- ocrd_network/runtime_data/network_agents.py +26 -3
- ocrd_utils/__init__.py +2 -0
- ocrd_utils/constants.py +5 -0
- ocrd_utils/os.py +130 -52
- ocrd_validators/ocrd_tool.schema.yml +7 -4
- ocrd_validators/workspace_validator.py +45 -10
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/LICENSE +0 -0
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.1.1.dist-info → ocrd-3.2.0.dist-info}/top_level.txt +0 -0
ocrd_models/constants.py
CHANGED
|
@@ -28,6 +28,8 @@ __all__ = [
|
|
|
28
28
|
'TAG_PAGE_TEXTEQUIV',
|
|
29
29
|
'TAG_PAGE_TEXTREGION',
|
|
30
30
|
'METS_PAGE_DIV_ATTRIBUTE',
|
|
31
|
+
'PAGE_REGION_TYPES',
|
|
32
|
+
'PAGE_ALTIMG_FEATURES',
|
|
31
33
|
]
|
|
32
34
|
|
|
33
35
|
|
|
@@ -72,6 +74,20 @@ PAGE_REGION_TYPES = [
|
|
|
72
74
|
'Separator', 'Table', 'Text', 'Unknown'
|
|
73
75
|
]
|
|
74
76
|
|
|
77
|
+
PAGE_ALTIMG_FEATURES = [
|
|
78
|
+
'binarized',
|
|
79
|
+
'grayscale_normalized',
|
|
80
|
+
'despeckled',
|
|
81
|
+
'cropped',
|
|
82
|
+
'deskewed',
|
|
83
|
+
'rotated-90',
|
|
84
|
+
'rotated-180',
|
|
85
|
+
'rotated-270',
|
|
86
|
+
'dewarped',
|
|
87
|
+
'clipped',
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
75
91
|
class METS_PAGE_DIV_ATTRIBUTE(Enum):
|
|
76
92
|
ID = auto()
|
|
77
93
|
ORDER = auto()
|
ocrd_network/__init__.py
CHANGED
|
@@ -4,4 +4,5 @@ from .processing_server import ProcessingServer
|
|
|
4
4
|
from .processing_worker import ProcessingWorker
|
|
5
5
|
from .processor_server import ProcessorServer
|
|
6
6
|
from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
|
|
7
|
+
from .resource_manager_server import ResourceManagerServer
|
|
7
8
|
from .server_cache import CacheLockedPages, CacheProcessingRequests
|
ocrd_network/cli/__init__.py
CHANGED
|
@@ -2,10 +2,12 @@ from .client import client_cli
|
|
|
2
2
|
from .processing_server import processing_server_cli
|
|
3
3
|
from .processing_worker import processing_worker_cli
|
|
4
4
|
from .processor_server import processor_server_cli
|
|
5
|
+
from .resmgr_server import resource_manager_server_cli
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
'client_cli',
|
|
8
9
|
'processing_server_cli',
|
|
9
10
|
'processing_worker_cli',
|
|
10
|
-
'processor_server_cli'
|
|
11
|
+
'processor_server_cli',
|
|
12
|
+
'resource_manager_server_cli'
|
|
11
13
|
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from ocrd_network import ResourceManagerServer, ServerAddressParamType
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@click.command('resmgr-server')
|
|
6
|
+
@click.option('-a', '--address',
|
|
7
|
+
help='The URL of the OCR-D resource manager server, format: host:port',
|
|
8
|
+
type=ServerAddressParamType(),
|
|
9
|
+
required=True)
|
|
10
|
+
def resource_manager_server_cli(address: str):
|
|
11
|
+
"""
|
|
12
|
+
Start standalone REST API OCR-D Resource Manager Server
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
# Note, the address is already validated with the type field
|
|
16
|
+
host, port = address.split(':')
|
|
17
|
+
resource_manager_server = ResourceManagerServer(
|
|
18
|
+
host = host,
|
|
19
|
+
port = int(port)
|
|
20
|
+
)
|
|
21
|
+
resource_manager_server.start()
|
|
22
|
+
except Exception as e:
|
|
23
|
+
raise Exception("OCR-D Resource Manager Server has failed with error") from e
|
ocrd_network/constants.py
CHANGED
|
@@ -10,6 +10,8 @@ OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
|
|
|
10
10
|
# Used as a placeholder to lock all pages when no page_id is specified
|
|
11
11
|
SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"
|
|
12
12
|
|
|
13
|
+
# TODO: Make this more configurable
|
|
14
|
+
RESOURCE_MANAGER_SERVER_PORT = 45555
|
|
13
15
|
|
|
14
16
|
class AgentType(str, Enum):
|
|
15
17
|
PROCESSING_WORKER = "worker"
|
|
@@ -49,6 +51,7 @@ class NetworkLoggingDirs(str, Enum):
|
|
|
49
51
|
PROCESSING_SERVERS = "processing_servers"
|
|
50
52
|
PROCESSING_WORKERS = "processing_workers"
|
|
51
53
|
PROCESSOR_SERVERS = "processor_servers"
|
|
54
|
+
RESOURCE_MANAGER_SERVERS = "resource_manager_servers"
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
class ServerApiTags(str, Enum):
|
ocrd_network/logging_utils.py
CHANGED
|
@@ -50,3 +50,8 @@ def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Pa
|
|
|
50
50
|
def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
|
|
51
51
|
log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
|
|
52
52
|
return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_resource_manager_server_logging_file_path(pid: int) -> Path:
|
|
56
|
+
log_file: str = f"resource_manager_server.{pid}.log"
|
|
57
|
+
return Path(get_root_logging_dir(NetworkLoggingDirs.RESOURCE_MANAGER_SERVERS), log_file)
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from os import getpid
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import requests
|
|
5
|
+
from shutil import which
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uvicorn import run as uvicorn_run
|
|
8
|
+
from fastapi import APIRouter, FastAPI, HTTPException, status
|
|
9
|
+
|
|
10
|
+
from ocrd import OcrdResourceManager
|
|
11
|
+
from ocrd_utils import directory_size, getLogger, get_moduledir, get_ocrd_tool_json, initLogging
|
|
12
|
+
from .logging_utils import configure_file_handler_with_formatter, get_resource_manager_server_logging_file_path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ResourceManagerServer(FastAPI):
|
|
16
|
+
def __init__(self, host: str, port: int) -> None:
|
|
17
|
+
self.title = f"OCR-D Resource Manager Server"
|
|
18
|
+
super().__init__(
|
|
19
|
+
title=self.title,
|
|
20
|
+
on_startup=[self.on_startup],
|
|
21
|
+
on_shutdown=[self.on_shutdown],
|
|
22
|
+
description=self.title
|
|
23
|
+
)
|
|
24
|
+
initLogging()
|
|
25
|
+
self.log = getLogger("ocrd_network.resource_manager_server")
|
|
26
|
+
log_file = get_resource_manager_server_logging_file_path(pid=getpid())
|
|
27
|
+
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
|
|
28
|
+
|
|
29
|
+
self.resmgr_instance = OcrdResourceManager()
|
|
30
|
+
|
|
31
|
+
self.hostname = host
|
|
32
|
+
self.port = port
|
|
33
|
+
|
|
34
|
+
self.add_api_routes()
|
|
35
|
+
|
|
36
|
+
def start(self):
|
|
37
|
+
uvicorn_run(self, host=self.hostname, port=int(self.port))
|
|
38
|
+
|
|
39
|
+
async def on_startup(self):
|
|
40
|
+
self.log.info(f"Starting {self.title}")
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
async def on_shutdown(self) -> None:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
def add_api_routes(self):
|
|
47
|
+
base_router = APIRouter()
|
|
48
|
+
base_router.add_api_route(
|
|
49
|
+
path="/",
|
|
50
|
+
endpoint=self.home_page,
|
|
51
|
+
methods=["GET"],
|
|
52
|
+
status_code=status.HTTP_200_OK,
|
|
53
|
+
summary="Get information about the OCR-D Resource Manager Server"
|
|
54
|
+
)
|
|
55
|
+
base_router.add_api_route(
|
|
56
|
+
path="/list_available",
|
|
57
|
+
endpoint=self.list_available_resources,
|
|
58
|
+
methods=["GET"],
|
|
59
|
+
status_code=status.HTTP_200_OK,
|
|
60
|
+
summary=""
|
|
61
|
+
)
|
|
62
|
+
base_router.add_api_route(
|
|
63
|
+
path="/list_installed",
|
|
64
|
+
endpoint=self.list_installed_resources,
|
|
65
|
+
methods=["GET"],
|
|
66
|
+
status_code=status.HTTP_200_OK,
|
|
67
|
+
summary=""
|
|
68
|
+
)
|
|
69
|
+
base_router.add_api_route(
|
|
70
|
+
path="/download",
|
|
71
|
+
endpoint=self.download_resource,
|
|
72
|
+
methods=["GET"],
|
|
73
|
+
status_code=status.HTTP_200_OK,
|
|
74
|
+
summary=""
|
|
75
|
+
)
|
|
76
|
+
self.include_router(base_router)
|
|
77
|
+
|
|
78
|
+
async def home_page(self):
|
|
79
|
+
message = f"The home page of the {self.title}"
|
|
80
|
+
json_message = {
|
|
81
|
+
"message": message,
|
|
82
|
+
"time": datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
83
|
+
}
|
|
84
|
+
return json_message
|
|
85
|
+
|
|
86
|
+
async def list_available_resources(
|
|
87
|
+
self,
|
|
88
|
+
executable: Any = None,
|
|
89
|
+
dynamic: bool = True,
|
|
90
|
+
name: Any = None,
|
|
91
|
+
database: Any = None,
|
|
92
|
+
url: Any = None
|
|
93
|
+
):
|
|
94
|
+
result = self.resmgr_instance.list_available(executable, dynamic, name, database, url)
|
|
95
|
+
json_message = {
|
|
96
|
+
"result": result
|
|
97
|
+
}
|
|
98
|
+
return json_message
|
|
99
|
+
|
|
100
|
+
async def list_installed_resources(self, executable: Any = None):
|
|
101
|
+
result = self.resmgr_instance.list_available(executable)
|
|
102
|
+
json_message = {
|
|
103
|
+
"result": result
|
|
104
|
+
}
|
|
105
|
+
return json_message
|
|
106
|
+
|
|
107
|
+
async def download_resource(
|
|
108
|
+
self,
|
|
109
|
+
executable: str,
|
|
110
|
+
name: Any = None,
|
|
111
|
+
location: Any = None,
|
|
112
|
+
any_url: str = '',
|
|
113
|
+
no_dynamic: bool = False,
|
|
114
|
+
resource_type: str = 'file',
|
|
115
|
+
path_in_archive: str = '.',
|
|
116
|
+
allow_uninstalled: bool = True,
|
|
117
|
+
overwrite: bool = True
|
|
118
|
+
):
|
|
119
|
+
resmgr = OcrdResourceManager()
|
|
120
|
+
response = []
|
|
121
|
+
if executable != '*' and not name:
|
|
122
|
+
message = f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required"
|
|
123
|
+
self.log.error(message)
|
|
124
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
125
|
+
elif executable == '*':
|
|
126
|
+
executable = None
|
|
127
|
+
if name == '*':
|
|
128
|
+
name = None
|
|
129
|
+
if executable and not which(executable):
|
|
130
|
+
if not allow_uninstalled:
|
|
131
|
+
message = (f"Executable '{executable}' is not installed. To download resources anyway, "
|
|
132
|
+
f"use the -a/--allow-uninstalled flag")
|
|
133
|
+
self.log.error(message)
|
|
134
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
135
|
+
else:
|
|
136
|
+
message = f"Executable '{executable}' is not installed, but downloading resources anyway."
|
|
137
|
+
self.log.info(message)
|
|
138
|
+
response.append(message)
|
|
139
|
+
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name)
|
|
140
|
+
if not any(r[1] for r in reslist):
|
|
141
|
+
message = f"No resources {name} found in registry for executable {executable}"
|
|
142
|
+
self.log.info(message)
|
|
143
|
+
response.append(message)
|
|
144
|
+
if executable and name:
|
|
145
|
+
reslist = [(executable, [{
|
|
146
|
+
'url': any_url or '???',
|
|
147
|
+
'name': name,
|
|
148
|
+
'type': resource_type,
|
|
149
|
+
'path_in_archive': path_in_archive}]
|
|
150
|
+
)]
|
|
151
|
+
for this_executable, this_reslist in reslist:
|
|
152
|
+
resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
|
|
153
|
+
if not location:
|
|
154
|
+
location = resource_locations[0]
|
|
155
|
+
elif location not in resource_locations:
|
|
156
|
+
response.append(
|
|
157
|
+
f"The selected --location {location} is not in the {this_executable}'s resource search path, "
|
|
158
|
+
f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
|
|
159
|
+
res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
|
|
160
|
+
for res_dict in this_reslist:
|
|
161
|
+
try:
|
|
162
|
+
fpath = resmgr.handle_resource(
|
|
163
|
+
res_dict=res_dict,
|
|
164
|
+
executable=this_executable,
|
|
165
|
+
dest_dir=res_dest_dir,
|
|
166
|
+
any_url=any_url,
|
|
167
|
+
overwrite=overwrite,
|
|
168
|
+
resource_type=resource_type,
|
|
169
|
+
path_in_archive=path_in_archive
|
|
170
|
+
)
|
|
171
|
+
if not fpath:
|
|
172
|
+
continue
|
|
173
|
+
except FileExistsError as exc:
|
|
174
|
+
response.append(str(exc))
|
|
175
|
+
usage = res_dict.get('parameter_usage', 'as-is')
|
|
176
|
+
response.append(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
|
|
177
|
+
json_message = { "result": response }
|
|
178
|
+
return json_message
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from logging import Logger
|
|
2
|
-
from time import sleep
|
|
3
2
|
from typing import Dict, List, Union
|
|
4
3
|
|
|
4
|
+
from ..constants import RESOURCE_MANAGER_SERVER_PORT
|
|
5
5
|
from .connection_clients import create_docker_client, create_ssh_client
|
|
6
|
-
from .network_agents import
|
|
6
|
+
from .network_agents import (
|
|
7
|
+
AgentType, DataProcessingWorker, DataProcessorServer, DeployType, deploy_agent_native_get_pid_hack)
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class DataHost:
|
|
@@ -11,6 +12,8 @@ class DataHost:
|
|
|
11
12
|
self, host: str, username: str, password: str, keypath: str, workers: List[Dict], servers: List[Dict]
|
|
12
13
|
) -> None:
|
|
13
14
|
self.host = host
|
|
15
|
+
self.resource_manager_port = RESOURCE_MANAGER_SERVER_PORT
|
|
16
|
+
self.resource_manager_pid = None
|
|
14
17
|
self.username = username
|
|
15
18
|
self.password = password
|
|
16
19
|
self.keypath = keypath
|
|
@@ -24,22 +27,18 @@ class DataHost:
|
|
|
24
27
|
self.ssh_client = None
|
|
25
28
|
self.docker_client = None
|
|
26
29
|
|
|
27
|
-
# Time to wait between deploying agents
|
|
28
|
-
self.wait_between_agent_deploys: float = 0.3
|
|
29
|
-
|
|
30
30
|
# Lists of network agents based on their agent and deployment type
|
|
31
|
-
self.network_agents_worker_native = []
|
|
32
|
-
self.network_agents_worker_docker = []
|
|
33
|
-
self.network_agents_server_native = []
|
|
34
|
-
self.network_agents_server_docker = []
|
|
31
|
+
self.network_agents_worker_native: List[DataProcessingWorker] = []
|
|
32
|
+
self.network_agents_worker_docker: List[DataProcessingWorker] = []
|
|
33
|
+
self.network_agents_server_native: List[DataProcessorServer] = []
|
|
34
|
+
self.network_agents_server_docker: List[DataProcessorServer] = []
|
|
35
35
|
|
|
36
36
|
if not workers:
|
|
37
37
|
workers = []
|
|
38
38
|
if not servers:
|
|
39
39
|
servers = []
|
|
40
40
|
|
|
41
|
-
self.
|
|
42
|
-
self.__parse_network_agents_servers(processor_servers=servers)
|
|
41
|
+
self.__parse_network_agents(processing_workers=workers, processor_servers=servers)
|
|
43
42
|
|
|
44
43
|
# Used for caching deployed Processor Servers' ports on the current host
|
|
45
44
|
# Key: processor_name, Value: list of ports
|
|
@@ -51,7 +50,7 @@ class DataHost:
|
|
|
51
50
|
return
|
|
52
51
|
self.processor_servers_ports[processor_name] = self.processor_servers_ports[processor_name].append(port)
|
|
53
52
|
|
|
54
|
-
def __append_network_agent_to_lists(self, agent_data:
|
|
53
|
+
def __append_network_agent_to_lists(self, agent_data: Union[DataProcessingWorker, DataProcessorServer]) -> None:
|
|
55
54
|
if agent_data.deploy_type != DeployType.DOCKER and agent_data.deploy_type != DeployType.NATIVE:
|
|
56
55
|
raise ValueError(f"Network agent deploy type is unknown: {agent_data.deploy_type}")
|
|
57
56
|
if agent_data.agent_type != AgentType.PROCESSING_WORKER and agent_data.agent_type != AgentType.PROCESSOR_SERVER:
|
|
@@ -61,24 +60,16 @@ class DataHost:
|
|
|
61
60
|
self.needs_ssh_connector = True
|
|
62
61
|
if agent_data.agent_type == AgentType.PROCESSING_WORKER:
|
|
63
62
|
self.network_agents_worker_native.append(agent_data)
|
|
64
|
-
|
|
63
|
+
elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
|
|
65
64
|
self.network_agents_server_native.append(agent_data)
|
|
66
|
-
|
|
65
|
+
elif agent_data.deploy_type == DeployType.DOCKER:
|
|
67
66
|
self.needs_docker_connector = True
|
|
68
67
|
if agent_data.agent_type == AgentType.PROCESSING_WORKER:
|
|
69
68
|
self.network_agents_worker_docker.append(agent_data)
|
|
70
|
-
|
|
69
|
+
elif agent_data.agent_type == AgentType.PROCESSOR_SERVER:
|
|
71
70
|
self.network_agents_server_docker.append(agent_data)
|
|
72
71
|
|
|
73
|
-
def
|
|
74
|
-
for server in processor_servers:
|
|
75
|
-
server_data = DataProcessorServer(
|
|
76
|
-
processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
|
|
77
|
-
port=int(server["port"]), init_by_config=True, pid=None
|
|
78
|
-
)
|
|
79
|
-
self.__append_network_agent_to_lists(agent_data=server_data)
|
|
80
|
-
|
|
81
|
-
def __parse_network_agents_workers(self, processing_workers: List[Dict]):
|
|
72
|
+
def __parse_network_agents(self, processing_workers: List[Dict], processor_servers: List[Dict]):
|
|
82
73
|
for worker in processing_workers:
|
|
83
74
|
worker_data = DataProcessingWorker(
|
|
84
75
|
processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
|
|
@@ -86,6 +77,12 @@ class DataHost:
|
|
|
86
77
|
)
|
|
87
78
|
for _ in range(int(worker["number_of_instance"])):
|
|
88
79
|
self.__append_network_agent_to_lists(agent_data=worker_data)
|
|
80
|
+
for server in processor_servers:
|
|
81
|
+
server_data = DataProcessorServer(
|
|
82
|
+
processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
|
|
83
|
+
port=int(server["port"]), init_by_config=True, pid=None
|
|
84
|
+
)
|
|
85
|
+
self.__append_network_agent_to_lists(agent_data=server_data)
|
|
89
86
|
|
|
90
87
|
def create_connection_client(self, client_type: str):
|
|
91
88
|
if client_type not in ["docker", "ssh"]:
|
|
@@ -97,52 +94,38 @@ class DataHost:
|
|
|
97
94
|
self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
|
|
98
95
|
return self.docker_client
|
|
99
96
|
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
name = agent_data.processor_name
|
|
107
|
-
agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}, host: {self.host}"
|
|
108
|
-
logger.info(f"Deploying {agent_info}")
|
|
109
|
-
|
|
110
|
-
connection_client = None
|
|
111
|
-
if deploy_type == DeployType.NATIVE:
|
|
112
|
-
assert self.ssh_client, f"SSH client connection missing."
|
|
113
|
-
connection_client = self.ssh_client
|
|
114
|
-
if deploy_type == DeployType.DOCKER:
|
|
115
|
-
assert self.docker_client, f"Docker client connection missing."
|
|
116
|
-
connection_client = self.docker_client
|
|
117
|
-
|
|
118
|
-
if agent_type == AgentType.PROCESSING_WORKER:
|
|
119
|
-
agent_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
|
|
120
|
-
if agent_type == AgentType.PROCESSOR_SERVER:
|
|
121
|
-
agent_data.deploy_network_agent(logger, connection_client, mongodb_url)
|
|
122
|
-
|
|
123
|
-
sleep(self.wait_between_agent_deploys)
|
|
97
|
+
def __deploy_network_agent_resource_manager_server(self, logger: Logger):
|
|
98
|
+
logger.info(f"Deploying resource manager server on host: {self.host}:{self.resource_manager_port}")
|
|
99
|
+
start_cmd = f"ocrd network resmgr-server --address {self.host}:{self.resource_manager_port} &"
|
|
100
|
+
pid = deploy_agent_native_get_pid_hack(logger, self.ssh_client, start_cmd)
|
|
101
|
+
logger.info(f"Deployed: OCR-D Resource Manager Server [{pid}]: {self.host}:{self.resource_manager_port}")
|
|
102
|
+
self.resource_manager_pid = pid
|
|
124
103
|
|
|
125
|
-
def
|
|
104
|
+
def __deploy_network_agents_processing_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
|
|
126
105
|
logger.info(f"Deploying processing workers on host: {self.host}")
|
|
127
106
|
amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
|
|
128
107
|
if not amount_workers:
|
|
129
108
|
logger.info(f"No processing workers found to be deployed")
|
|
130
109
|
for data_worker in self.network_agents_worker_native:
|
|
131
|
-
|
|
110
|
+
data_worker.deploy_network_agent(logger, self.ssh_client, mongodb_url, rabbitmq_url)
|
|
111
|
+
logger.info(f"Deployed: {data_worker}")
|
|
132
112
|
for data_worker in self.network_agents_worker_docker:
|
|
133
|
-
|
|
113
|
+
data_worker.deploy_network_agent(logger, self.docker_client, mongodb_url, rabbitmq_url)
|
|
114
|
+
logger.info(f"Deployed: {data_worker}")
|
|
134
115
|
|
|
135
|
-
def
|
|
116
|
+
def __deploy_network_agents_processor_servers(self, logger: Logger, mongodb_url: str):
|
|
136
117
|
logger.info(f"Deploying processor servers on host: {self.host}")
|
|
137
118
|
amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
|
|
138
119
|
if not amount_servers:
|
|
139
120
|
logger.info(f"No processor servers found to be deployed")
|
|
140
121
|
for data_server in self.network_agents_server_native:
|
|
141
|
-
|
|
122
|
+
data_server.deploy_network_agent(logger, self.ssh_client, mongodb_url)
|
|
142
123
|
self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
|
|
124
|
+
logger.info(f"Deployed: {data_server}")
|
|
143
125
|
for data_server in self.network_agents_server_docker:
|
|
144
|
-
|
|
126
|
+
data_server.deploy_network_agent(logger, self.docker_client, mongodb_url)
|
|
145
127
|
self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
|
|
128
|
+
logger.info(f"Deployed: {data_server}")
|
|
146
129
|
|
|
147
130
|
def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
|
|
148
131
|
if self.needs_ssh_connector and not self.ssh_client:
|
|
@@ -151,8 +134,9 @@ class DataHost:
|
|
|
151
134
|
if self.needs_docker_connector:
|
|
152
135
|
logger.debug("Creating missing docker connector before deploying")
|
|
153
136
|
self.docker_client = self.create_connection_client(client_type="docker")
|
|
154
|
-
self.
|
|
155
|
-
self.
|
|
137
|
+
self.__deploy_network_agent_resource_manager_server(logger)
|
|
138
|
+
self.__deploy_network_agents_processing_workers(logger, mongodb_url, rabbitmq_url)
|
|
139
|
+
self.__deploy_network_agents_processor_servers(logger, mongodb_url)
|
|
156
140
|
if self.ssh_client:
|
|
157
141
|
self.ssh_client.close()
|
|
158
142
|
self.ssh_client = None
|
|
@@ -160,6 +144,12 @@ class DataHost:
|
|
|
160
144
|
self.docker_client.close()
|
|
161
145
|
self.docker_client = None
|
|
162
146
|
|
|
147
|
+
def __stop_network_agent_resource_manager_server(self, logger: Logger):
|
|
148
|
+
logger.info(f"Stopping OCR-D Resource Manager Server [{self.resource_manager_pid}]: "
|
|
149
|
+
f"{self.host}:{self.resource_manager_port}")
|
|
150
|
+
assert self.ssh_client, f"SSH client connection missing"
|
|
151
|
+
self.ssh_client.exec_command(f"kill {self.resource_manager_pid}")
|
|
152
|
+
|
|
163
153
|
def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployType, agent_type: AgentType, pid: str):
|
|
164
154
|
agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}"
|
|
165
155
|
if not pid:
|
|
@@ -205,6 +195,7 @@ class DataHost:
|
|
|
205
195
|
if self.needs_docker_connector and not self.docker_client:
|
|
206
196
|
logger.debug("Creating missing docker connector before stopping")
|
|
207
197
|
self.docker_client = self.create_connection_client(client_type="docker")
|
|
198
|
+
self.__stop_network_agent_resource_manager_server(logger=logger)
|
|
208
199
|
self.__stop_network_agents_workers(logger=logger)
|
|
209
200
|
self.__stop_network_agents_servers(logger=logger)
|
|
210
201
|
if self.ssh_client:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from logging import Logger
|
|
2
|
+
from time import sleep
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
from re import search as re_search
|
|
@@ -24,7 +25,8 @@ def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str)
|
|
|
24
25
|
output = stdout.read().decode("utf-8")
|
|
25
26
|
stdout.close()
|
|
26
27
|
stdin.close()
|
|
27
|
-
|
|
28
|
+
pid = re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
|
|
29
|
+
return pid
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
# TODO: Implement the actual method that is missing
|
|
@@ -51,6 +53,12 @@ class DataNetworkAgent:
|
|
|
51
53
|
# The id is assigned when the agent is deployed
|
|
52
54
|
self.pid = pid
|
|
53
55
|
|
|
56
|
+
# Time to wait between deploying agents
|
|
57
|
+
self.wait_between_agent_deploys: float = 0.3
|
|
58
|
+
|
|
59
|
+
def __str__(self):
|
|
60
|
+
return f"{self.pid} {self.deploy_type} {self.agent_type} {self.processor_name} on host: {self.host}"
|
|
61
|
+
|
|
54
62
|
def _start_native_instance(self, logger: Logger, ssh_client, start_cmd: str):
|
|
55
63
|
if self.deploy_type != DeployType.NATIVE:
|
|
56
64
|
raise RuntimeError(f"Mismatch of deploy type when starting network agent: {self.processor_name}")
|
|
@@ -76,12 +84,18 @@ class DataProcessingWorker(DataNetworkAgent):
|
|
|
76
84
|
def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
|
|
77
85
|
if self.deploy_type == DeployType.NATIVE:
|
|
78
86
|
start_cmd = f"{self.processor_name} {self.agent_type} --database {database_url} --queue {queue_url} &"
|
|
87
|
+
assert connector_client, f"SSH client connection missing."
|
|
79
88
|
self.pid = self._start_native_instance(logger, connector_client, start_cmd)
|
|
89
|
+
sleep(self.wait_between_agent_deploys)
|
|
80
90
|
return self.pid
|
|
81
91
|
if self.deploy_type == DeployType.DOCKER:
|
|
82
92
|
# TODO: add real command to start processing worker in docker here
|
|
83
|
-
start_cmd =
|
|
93
|
+
start_cmd = ""
|
|
94
|
+
assert connector_client, f"Docker client connection missing."
|
|
95
|
+
if not start_cmd:
|
|
96
|
+
raise RuntimeError("Missing start command for the Processing Worker in docker mode")
|
|
84
97
|
self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
|
|
98
|
+
sleep(self.wait_between_agent_deploys)
|
|
85
99
|
return self.pid
|
|
86
100
|
raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
|
|
87
101
|
|
|
@@ -96,15 +110,24 @@ class DataProcessorServer(DataNetworkAgent):
|
|
|
96
110
|
)
|
|
97
111
|
self.port = port
|
|
98
112
|
|
|
113
|
+
def __str__(self):
|
|
114
|
+
return f"{super().__str__}:{self.port}"
|
|
115
|
+
|
|
99
116
|
def deploy_network_agent(self, logger: Logger, connector_client, database_url: str):
|
|
100
117
|
agent_address = f"{self.host}:{self.port}"
|
|
101
118
|
if self.deploy_type == DeployType.NATIVE:
|
|
102
119
|
start_cmd = f"{self.processor_name} {self.agent_type} --address {agent_address} --database {database_url} &"
|
|
120
|
+
assert connector_client, f"SSH client connection missing."
|
|
103
121
|
self.pid = self._start_native_instance(logger, connector_client, start_cmd)
|
|
122
|
+
sleep(self.wait_between_agent_deploys)
|
|
104
123
|
return self.pid
|
|
105
124
|
if self.deploy_type == DeployType.DOCKER:
|
|
106
125
|
# TODO: add real command to start processor server in docker here
|
|
107
|
-
start_cmd =
|
|
126
|
+
start_cmd = ""
|
|
127
|
+
assert connector_client, f"Docker client connection missing."
|
|
128
|
+
if not start_cmd:
|
|
129
|
+
raise RuntimeError("Missing start command for the Processor Server in docker mode")
|
|
108
130
|
self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
|
|
131
|
+
sleep(self.wait_between_agent_deploys)
|
|
109
132
|
return self.pid
|
|
110
133
|
raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
|
ocrd_utils/__init__.py
CHANGED
|
@@ -118,6 +118,7 @@ from .constants import (
|
|
|
118
118
|
REGEX_PREFIX,
|
|
119
119
|
REGEX_FILE_ID,
|
|
120
120
|
RESOURCE_LOCATIONS,
|
|
121
|
+
RESOURCE_TYPES,
|
|
121
122
|
LOG_FORMAT,
|
|
122
123
|
LOG_TIMEFMT,
|
|
123
124
|
VERSION,
|
|
@@ -184,6 +185,7 @@ from .os import (
|
|
|
184
185
|
get_processor_resource_types,
|
|
185
186
|
get_ocrd_tool_json,
|
|
186
187
|
get_moduledir,
|
|
188
|
+
get_env_locations,
|
|
187
189
|
guess_media_type,
|
|
188
190
|
list_all_resources,
|
|
189
191
|
is_file_in_directory,
|
ocrd_utils/constants.py
CHANGED
|
@@ -5,6 +5,7 @@ from .introspect import dist_version
|
|
|
5
5
|
from re import compile as regex_compile
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
|
+
'DEFAULT_METS_BASENAME',
|
|
8
9
|
'EXT_TO_MIME',
|
|
9
10
|
'LOG_FORMAT',
|
|
10
11
|
'LOG_TIMEFMT',
|
|
@@ -14,7 +15,9 @@ __all__ = [
|
|
|
14
15
|
'PIL_TO_MIME',
|
|
15
16
|
'REGEX_PREFIX',
|
|
16
17
|
'REGEX_FILE_ID',
|
|
18
|
+
'RESOURCES_DIR_SYSTEM',
|
|
17
19
|
'RESOURCE_LOCATIONS',
|
|
20
|
+
'RESOURCE_TYPES',
|
|
18
21
|
'VERSION',
|
|
19
22
|
]
|
|
20
23
|
|
|
@@ -108,6 +111,8 @@ LOG_FORMAT = r'%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s'
|
|
|
108
111
|
LOG_TIMEFMT = r'%H:%M:%S'
|
|
109
112
|
|
|
110
113
|
RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']
|
|
114
|
+
RESOURCE_TYPES = ['file', 'directory', 'archive']
|
|
115
|
+
RESOURCES_DIR_SYSTEM = '/usr/local/share/ocrd-resources'
|
|
111
116
|
|
|
112
117
|
DEFAULT_METS_BASENAME = 'mets.xml'
|
|
113
118
|
|