ocrd 3.6.0__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +2 -4
- ocrd/cli/bashlib.py +6 -117
- ocrd/cli/network.py +2 -0
- ocrd/cli/resmgr.py +29 -65
- ocrd/constants.py +0 -2
- ocrd/mets_server.py +5 -5
- ocrd/processor/base.py +6 -16
- ocrd/processor/builtin/dummy/ocrd-tool.json +25 -0
- ocrd/processor/builtin/merge_processor.py +131 -0
- ocrd/processor/builtin/param_command_header2unordered.json +7 -0
- ocrd/processor/builtin/param_command_heading2unordered.json +7 -0
- ocrd/processor/builtin/param_command_lines2orientation.json +6 -0
- ocrd/processor/builtin/param_command_page-update-version.json +5 -0
- ocrd/processor/builtin/param_command_transkribus-to-prima.json +8 -0
- ocrd/processor/builtin/shell_processor.py +128 -0
- ocrd/resource_manager.py +213 -124
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/METADATA +23 -10
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/RECORD +40 -34
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/entry_points.txt +2 -0
- ocrd_models/ocrd_agent.py +3 -3
- ocrd_network/__init__.py +1 -0
- ocrd_network/cli/__init__.py +2 -0
- ocrd_network/cli/resmgr_server.py +23 -0
- ocrd_network/constants.py +3 -0
- ocrd_network/logging_utils.py +5 -0
- ocrd_network/models/job.py +29 -28
- ocrd_network/models/messages.py +3 -2
- ocrd_network/models/workspace.py +4 -4
- ocrd_network/resource_manager_server.py +182 -0
- ocrd_network/runtime_data/connection_clients.py +1 -1
- ocrd_network/runtime_data/hosts.py +43 -16
- ocrd_network/runtime_data/network_agents.py +15 -1
- ocrd_utils/__init__.py +5 -1
- ocrd_utils/constants.py +5 -0
- ocrd_utils/logging.py +3 -0
- ocrd_utils/os.py +142 -62
- ocrd_validators/ocrd_tool.schema.yml +7 -4
- ocrd/cli/log.py +0 -56
- ocrd/lib.bash +0 -310
- ocrd/resource_list.yml +0 -61
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/LICENSE +0 -0
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/WHEEL +0 -0
- {ocrd-3.6.0.dist-info → ocrd-3.8.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from os import getpid
|
|
3
|
+
from shutil import which
|
|
4
|
+
from typing import Any
|
|
5
|
+
from uvicorn import run as uvicorn_run
|
|
6
|
+
from fastapi import APIRouter, FastAPI, HTTPException, status
|
|
7
|
+
|
|
8
|
+
from ocrd import OcrdResourceManager
|
|
9
|
+
from ocrd_utils import getLogger, get_ocrd_tool_json, initLogging
|
|
10
|
+
from .logging_utils import configure_file_handler_with_formatter, get_resource_manager_server_logging_file_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ResourceManagerServer(FastAPI):
|
|
14
|
+
def __init__(self, host: str, port: int) -> None:
|
|
15
|
+
self.title = f"OCR-D Resource Manager Server"
|
|
16
|
+
super().__init__(
|
|
17
|
+
title=self.title,
|
|
18
|
+
on_startup=[self.on_startup],
|
|
19
|
+
on_shutdown=[self.on_shutdown],
|
|
20
|
+
description=self.title
|
|
21
|
+
)
|
|
22
|
+
initLogging()
|
|
23
|
+
self.log = getLogger("ocrd_network.resource_manager_server")
|
|
24
|
+
log_file = get_resource_manager_server_logging_file_path(pid=getpid())
|
|
25
|
+
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
|
|
26
|
+
|
|
27
|
+
self.resmgr_instance = OcrdResourceManager()
|
|
28
|
+
|
|
29
|
+
self.hostname = host
|
|
30
|
+
self.port = port
|
|
31
|
+
|
|
32
|
+
self.add_api_routes()
|
|
33
|
+
|
|
34
|
+
def start(self):
|
|
35
|
+
uvicorn_run(self, host=self.hostname, port=int(self.port))
|
|
36
|
+
|
|
37
|
+
async def on_startup(self):
|
|
38
|
+
self.log.info(f"Starting {self.title}")
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
async def on_shutdown(self) -> None:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def add_api_routes(self):
|
|
45
|
+
base_router = APIRouter()
|
|
46
|
+
base_router.add_api_route(
|
|
47
|
+
path="/",
|
|
48
|
+
endpoint=self.home_page,
|
|
49
|
+
methods=["GET"],
|
|
50
|
+
status_code=status.HTTP_200_OK,
|
|
51
|
+
summary="Get information about the OCR-D Resource Manager Server"
|
|
52
|
+
)
|
|
53
|
+
base_router.add_api_route(
|
|
54
|
+
path="/list_available",
|
|
55
|
+
endpoint=self.list_available_resources,
|
|
56
|
+
methods=["GET"],
|
|
57
|
+
status_code=status.HTTP_200_OK,
|
|
58
|
+
summary=""
|
|
59
|
+
)
|
|
60
|
+
base_router.add_api_route(
|
|
61
|
+
path="/list_installed",
|
|
62
|
+
endpoint=self.list_installed_resources,
|
|
63
|
+
methods=["GET"],
|
|
64
|
+
status_code=status.HTTP_200_OK,
|
|
65
|
+
summary=""
|
|
66
|
+
)
|
|
67
|
+
base_router.add_api_route(
|
|
68
|
+
path="/download",
|
|
69
|
+
endpoint=self.download_resource,
|
|
70
|
+
methods=["GET"],
|
|
71
|
+
status_code=status.HTTP_200_OK,
|
|
72
|
+
summary=""
|
|
73
|
+
)
|
|
74
|
+
self.include_router(base_router)
|
|
75
|
+
|
|
76
|
+
async def home_page(self):
|
|
77
|
+
message = f"The home page of the {self.title}"
|
|
78
|
+
json_message = {
|
|
79
|
+
"message": message,
|
|
80
|
+
"time": datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
81
|
+
}
|
|
82
|
+
return json_message
|
|
83
|
+
|
|
84
|
+
async def list_available_resources(
|
|
85
|
+
self,
|
|
86
|
+
executable: Any = "ocrd-dummy",
|
|
87
|
+
dynamic: bool = True,
|
|
88
|
+
name: Any = None,
|
|
89
|
+
database: Any = None,
|
|
90
|
+
url: Any = None
|
|
91
|
+
):
|
|
92
|
+
if executable == '*':
|
|
93
|
+
message = f"'*' is not an acceptable executable name! Try with a specific executable."
|
|
94
|
+
self.log.error(message)
|
|
95
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
96
|
+
result = self.resmgr_instance.list_available(executable, dynamic, name, database, url)
|
|
97
|
+
json_message = {
|
|
98
|
+
"result": result
|
|
99
|
+
}
|
|
100
|
+
return json_message
|
|
101
|
+
|
|
102
|
+
async def list_installed_resources(self, executable: Any = None):
|
|
103
|
+
if executable == '*':
|
|
104
|
+
message = f"'*' is not an acceptable executable name! Try with a specific executable."
|
|
105
|
+
self.log.error(message)
|
|
106
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
107
|
+
result = self.resmgr_instance.list_available(executable)
|
|
108
|
+
json_message = {
|
|
109
|
+
"result": result
|
|
110
|
+
}
|
|
111
|
+
return json_message
|
|
112
|
+
|
|
113
|
+
async def download_resource(
|
|
114
|
+
self,
|
|
115
|
+
executable: str = "ocrd-dummy",
|
|
116
|
+
name: Any = None,
|
|
117
|
+
location: Any = None,
|
|
118
|
+
any_url: str = '',
|
|
119
|
+
no_dynamic: bool = False,
|
|
120
|
+
resource_type: str = 'file',
|
|
121
|
+
path_in_archive: str = '.',
|
|
122
|
+
allow_uninstalled: bool = True,
|
|
123
|
+
overwrite: bool = True
|
|
124
|
+
):
|
|
125
|
+
resmgr = OcrdResourceManager()
|
|
126
|
+
response = []
|
|
127
|
+
if executable == '*':
|
|
128
|
+
message = f"'*' is not an acceptable executable name! Try with a specific executable."
|
|
129
|
+
self.log.error(message)
|
|
130
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
131
|
+
if name == '*':
|
|
132
|
+
name = None
|
|
133
|
+
if executable and not which(executable):
|
|
134
|
+
if not allow_uninstalled:
|
|
135
|
+
message = (f"Executable '{executable}' is not installed. To download resources anyway, "
|
|
136
|
+
f"use the -a/--allow-uninstalled flag")
|
|
137
|
+
self.log.error(message)
|
|
138
|
+
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
139
|
+
else:
|
|
140
|
+
message = f"Executable '{executable}' is not installed, but downloading resources anyway."
|
|
141
|
+
self.log.info(message)
|
|
142
|
+
response.append(message)
|
|
143
|
+
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name)
|
|
144
|
+
if not any(r[1] for r in reslist):
|
|
145
|
+
message = f"No resources {name} found in registry for executable {executable}"
|
|
146
|
+
self.log.info(message)
|
|
147
|
+
response.append(message)
|
|
148
|
+
if executable and name:
|
|
149
|
+
reslist = [(executable, [{
|
|
150
|
+
'url': any_url or '???',
|
|
151
|
+
'name': name,
|
|
152
|
+
'type': resource_type,
|
|
153
|
+
'path_in_archive': path_in_archive}]
|
|
154
|
+
)]
|
|
155
|
+
for this_executable, this_reslist in reslist:
|
|
156
|
+
resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
|
|
157
|
+
if not location:
|
|
158
|
+
location = resource_locations[0]
|
|
159
|
+
elif location not in resource_locations:
|
|
160
|
+
response.append(
|
|
161
|
+
f"The selected --location {location} is not in the {this_executable}'s resource search path, "
|
|
162
|
+
f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
|
|
163
|
+
res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
|
|
164
|
+
for res_dict in this_reslist:
|
|
165
|
+
try:
|
|
166
|
+
fpath = resmgr.handle_resource(
|
|
167
|
+
res_dict=res_dict,
|
|
168
|
+
executable=this_executable,
|
|
169
|
+
dest_dir=res_dest_dir,
|
|
170
|
+
any_url=any_url,
|
|
171
|
+
overwrite=overwrite,
|
|
172
|
+
resource_type=resource_type,
|
|
173
|
+
path_in_archive=path_in_archive
|
|
174
|
+
)
|
|
175
|
+
if not fpath:
|
|
176
|
+
continue
|
|
177
|
+
except FileExistsError as exc:
|
|
178
|
+
response.append(str(exc))
|
|
179
|
+
usage = res_dict.get('parameter_usage', 'as-is')
|
|
180
|
+
response.append(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
|
|
181
|
+
json_message = { "result": response }
|
|
182
|
+
return json_message
|
|
@@ -36,7 +36,7 @@ class CustomDockerClient(DockerClient):
|
|
|
36
36
|
raise ValueError("Both 'password' and 'keypath' provided - one must be provided")
|
|
37
37
|
if ("password" not in kwargs) and ("keypath" not in kwargs):
|
|
38
38
|
raise ValueError("Missing 'password' or 'keypath' - one must be provided")
|
|
39
|
-
self.api = APIClient(base_url=f"ssh://{host}", use_ssh_client=True, version="
|
|
39
|
+
self.api = APIClient(base_url=f"ssh://{host}", use_ssh_client=True, version="auto")
|
|
40
40
|
self.api.mount(
|
|
41
41
|
prefix="http+docker://ssh", adapter=self.CustomSshHttpAdapter(base_url=f"ssh://{user}@{host}:22", **kwargs)
|
|
42
42
|
)
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from logging import Logger
|
|
2
|
-
from
|
|
3
|
-
from typing import Dict, List
|
|
2
|
+
from typing import Dict, List, Optional
|
|
4
3
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
4
|
+
from docker import APIClient
|
|
5
|
+
from paramiko import SSHClient
|
|
6
|
+
|
|
7
|
+
from ..constants import RESOURCE_MANAGER_SERVER_PORT
|
|
8
|
+
from .connection_clients import CustomDockerClient, create_docker_client, create_ssh_client
|
|
9
|
+
from .network_agents import (
|
|
10
|
+
DataProcessingWorker, DeployType, deploy_agent_native_get_pid_hack)
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
class DataHost:
|
|
@@ -11,6 +15,8 @@ class DataHost:
|
|
|
11
15
|
self, host: str, username: str, password: str, keypath: str, workers: List[Dict], servers: List[Dict]
|
|
12
16
|
) -> None:
|
|
13
17
|
self.host = host
|
|
18
|
+
self.resource_manager_port = RESOURCE_MANAGER_SERVER_PORT
|
|
19
|
+
self.resource_manager_pid = None
|
|
14
20
|
self.username = username
|
|
15
21
|
self.password = password
|
|
16
22
|
self.keypath = keypath
|
|
@@ -22,14 +28,11 @@ class DataHost:
|
|
|
22
28
|
|
|
23
29
|
# Connection clients, ssh for native deployment, docker for docker deployment
|
|
24
30
|
self.ssh_client = None
|
|
25
|
-
self.docker_client = None
|
|
26
|
-
|
|
27
|
-
# Time to wait between deploying single workers
|
|
28
|
-
self.wait_between_deploys: float = 0.3
|
|
31
|
+
self.docker_client: Optional[CustomDockerClient] = None
|
|
29
32
|
|
|
30
|
-
# Lists of
|
|
31
|
-
self.workers_native = []
|
|
32
|
-
self.workers_docker = []
|
|
33
|
+
# Lists of network agents based on their agent and deployment type
|
|
34
|
+
self.workers_native: List[DataProcessingWorker] = []
|
|
35
|
+
self.workers_docker: List[DataProcessingWorker] = []
|
|
33
36
|
|
|
34
37
|
if not workers:
|
|
35
38
|
workers = []
|
|
@@ -68,6 +71,13 @@ class DataHost:
|
|
|
68
71
|
self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
|
|
69
72
|
return self.docker_client
|
|
70
73
|
|
|
74
|
+
def __deploy_network_agent_resource_manager_server(self, logger: Logger):
|
|
75
|
+
logger.info(f"Deploying resource manager server on host: {self.host}:{self.resource_manager_port}")
|
|
76
|
+
start_cmd = f"ocrd network resmgr-server --address {self.host}:{self.resource_manager_port} &"
|
|
77
|
+
pid = deploy_agent_native_get_pid_hack(logger, self.ssh_client, start_cmd)
|
|
78
|
+
logger.info(f"Deployed: OCR-D Resource Manager Server [{pid}]: {self.host}:{self.resource_manager_port}")
|
|
79
|
+
self.resource_manager_pid = pid
|
|
80
|
+
|
|
71
81
|
def __deploy_single_worker(
|
|
72
82
|
self, logger: Logger, worker_data: DataProcessingWorker,
|
|
73
83
|
mongodb_url: str, rabbitmq_url: str
|
|
@@ -86,7 +96,6 @@ class DataHost:
|
|
|
86
96
|
connection_client = self.docker_client
|
|
87
97
|
|
|
88
98
|
worker_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
|
|
89
|
-
sleep(self.wait_between_deploys)
|
|
90
99
|
|
|
91
100
|
def __deploy_all_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
|
|
92
101
|
logger.info(f"Deploying processing workers on host: {self.host}")
|
|
@@ -95,17 +104,24 @@ class DataHost:
|
|
|
95
104
|
logger.info("No processing workers found to be deployed")
|
|
96
105
|
for data_worker in self.workers_native:
|
|
97
106
|
self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
|
|
107
|
+
logger.info(f"Deployed: {data_worker}")
|
|
98
108
|
for data_worker in self.workers_docker:
|
|
99
109
|
self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
|
|
110
|
+
logger.info(f"Deployed: {data_worker}")
|
|
100
111
|
|
|
101
112
|
def deploy_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
|
|
102
113
|
if self.needs_ssh_connector and not self.ssh_client:
|
|
103
114
|
logger.debug("Creating missing ssh connector before deploying")
|
|
104
|
-
|
|
115
|
+
client = self.create_connection_client(client_type="ssh")
|
|
116
|
+
assert isinstance(client, SSHClient)
|
|
117
|
+
self.ssh_client = client
|
|
105
118
|
if self.needs_docker_connector:
|
|
106
119
|
logger.debug("Creating missing docker connector before deploying")
|
|
107
|
-
|
|
120
|
+
client = self.create_connection_client(client_type="docker")
|
|
121
|
+
assert isinstance(client, CustomDockerClient)
|
|
122
|
+
self.docker_client = client
|
|
108
123
|
|
|
124
|
+
self.__deploy_network_agent_resource_manager_server(logger)
|
|
109
125
|
self.__deploy_all_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
|
|
110
126
|
|
|
111
127
|
if self.ssh_client:
|
|
@@ -115,6 +131,12 @@ class DataHost:
|
|
|
115
131
|
self.docker_client.close()
|
|
116
132
|
self.docker_client = None
|
|
117
133
|
|
|
134
|
+
def __stop_network_agent_resource_manager_server(self, logger: Logger):
|
|
135
|
+
logger.info(f"Stopping OCR-D Resource Manager Server [{self.resource_manager_pid}]: "
|
|
136
|
+
f"{self.host}:{self.resource_manager_port}")
|
|
137
|
+
assert self.ssh_client, "SSH client connection missing"
|
|
138
|
+
self.ssh_client.exec_command(f"kill {self.resource_manager_pid}")
|
|
139
|
+
|
|
118
140
|
def __stop_worker(self, logger: Logger, name: str, deploy_type: DeployType, pid: str):
|
|
119
141
|
worker_info = f"Processing Worker: deploy: {deploy_type}, name: {name}"
|
|
120
142
|
if not pid:
|
|
@@ -132,10 +154,15 @@ class DataHost:
|
|
|
132
154
|
def stop_workers(self, logger: Logger):
|
|
133
155
|
if self.needs_ssh_connector and not self.ssh_client:
|
|
134
156
|
logger.debug("Creating missing ssh connector before stopping")
|
|
135
|
-
|
|
157
|
+
client = self.create_connection_client(client_type="ssh")
|
|
158
|
+
assert isinstance(client, SSHClient)
|
|
159
|
+
self.ssh_client = client
|
|
136
160
|
if self.needs_docker_connector and not self.docker_client:
|
|
137
161
|
logger.debug("Creating missing docker connector before stopping")
|
|
138
|
-
|
|
162
|
+
client = self.create_connection_client(client_type="docker")
|
|
163
|
+
assert isinstance(client, CustomDockerClient)
|
|
164
|
+
self.docker_client = client
|
|
165
|
+
self.__stop_network_agent_resource_manager_server(logger=logger)
|
|
139
166
|
|
|
140
167
|
logger.info(f"Stopping processing workers on host: {self.host}")
|
|
141
168
|
amount_workers = len(self.workers_native) + len(self.workers_docker)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from logging import Logger
|
|
2
|
+
from time import sleep
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
from re import search as re_search
|
|
@@ -25,7 +26,8 @@ def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str)
|
|
|
25
26
|
output = stdout.read().decode("utf-8")
|
|
26
27
|
stdout.close()
|
|
27
28
|
stdin.close()
|
|
28
|
-
|
|
29
|
+
pid = re_search(r"xyz([0-9]+)xyz", output).group(1) # type: ignore
|
|
30
|
+
return pid
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
# TODO: Implement the actual method that is missing
|
|
@@ -51,6 +53,12 @@ class DataNetworkAgent:
|
|
|
51
53
|
# The id is assigned when the agent is deployed
|
|
52
54
|
self.pid = pid
|
|
53
55
|
|
|
56
|
+
# Time to wait between deploying agents
|
|
57
|
+
self.wait_between_agent_deploys: float = 0.3
|
|
58
|
+
|
|
59
|
+
def __str__(self):
|
|
60
|
+
return f"{self.pid} {self.deploy_type} {self.processor_name} on host: {self.host}"
|
|
61
|
+
|
|
54
62
|
def _start_native_instance(self, logger: Logger, ssh_client, start_cmd: str):
|
|
55
63
|
if self.deploy_type != DeployType.NATIVE:
|
|
56
64
|
raise RuntimeError(f"Mismatch of deploy type when starting network agent: {self.processor_name}")
|
|
@@ -76,11 +84,17 @@ class DataProcessingWorker(DataNetworkAgent):
|
|
|
76
84
|
def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
|
|
77
85
|
if self.deploy_type == DeployType.NATIVE:
|
|
78
86
|
start_cmd = f"{self.processor_name} --database {database_url} --queue {queue_url} &"
|
|
87
|
+
assert connector_client, f"SSH client connection missing."
|
|
79
88
|
self.pid = self._start_native_instance(logger, connector_client, start_cmd)
|
|
89
|
+
sleep(self.wait_between_agent_deploys)
|
|
80
90
|
return self.pid
|
|
81
91
|
if self.deploy_type == DeployType.DOCKER:
|
|
82
92
|
# TODO: add real command to start processing worker in docker here
|
|
83
93
|
start_cmd = ""
|
|
94
|
+
assert connector_client, f"Docker client connection missing."
|
|
95
|
+
if not start_cmd:
|
|
96
|
+
raise RuntimeError("Missing start command for the Processing Worker in docker mode")
|
|
84
97
|
self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
|
|
98
|
+
sleep(self.wait_between_agent_deploys)
|
|
85
99
|
return self.pid
|
|
86
100
|
raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
|
ocrd_utils/__init__.py
CHANGED
|
@@ -70,7 +70,8 @@ Utility functions and constants usable in various circumstances.
|
|
|
70
70
|
|
|
71
71
|
filesystem-related utilities
|
|
72
72
|
|
|
73
|
-
* :py:func:`
|
|
73
|
+
* :py:func:`is_git_url`,
|
|
74
|
+
:py:func:`is_string`,
|
|
74
75
|
:py:func:`membername`,
|
|
75
76
|
:py:func:`concat_padded`,
|
|
76
77
|
:py:func:`nth_url_segment`,
|
|
@@ -118,6 +119,7 @@ from .constants import (
|
|
|
118
119
|
REGEX_PREFIX,
|
|
119
120
|
REGEX_FILE_ID,
|
|
120
121
|
RESOURCE_LOCATIONS,
|
|
122
|
+
RESOURCE_TYPES,
|
|
121
123
|
LOG_FORMAT,
|
|
122
124
|
LOG_TIMEFMT,
|
|
123
125
|
VERSION,
|
|
@@ -184,9 +186,11 @@ from .os import (
|
|
|
184
186
|
get_processor_resource_types,
|
|
185
187
|
get_ocrd_tool_json,
|
|
186
188
|
get_moduledir,
|
|
189
|
+
get_env_locations,
|
|
187
190
|
guess_media_type,
|
|
188
191
|
list_all_resources,
|
|
189
192
|
is_file_in_directory,
|
|
193
|
+
is_git_url,
|
|
190
194
|
list_resource_candidates,
|
|
191
195
|
atomic_write,
|
|
192
196
|
pushd_popd,
|
ocrd_utils/constants.py
CHANGED
|
@@ -5,6 +5,7 @@ from .introspect import dist_version
|
|
|
5
5
|
from re import compile as regex_compile
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
|
+
'DEFAULT_METS_BASENAME',
|
|
8
9
|
'EXT_TO_MIME',
|
|
9
10
|
'LOG_FORMAT',
|
|
10
11
|
'LOG_TIMEFMT',
|
|
@@ -14,7 +15,9 @@ __all__ = [
|
|
|
14
15
|
'PIL_TO_MIME',
|
|
15
16
|
'REGEX_PREFIX',
|
|
16
17
|
'REGEX_FILE_ID',
|
|
18
|
+
'RESOURCES_DIR_SYSTEM',
|
|
17
19
|
'RESOURCE_LOCATIONS',
|
|
20
|
+
'RESOURCE_TYPES',
|
|
18
21
|
'VERSION',
|
|
19
22
|
]
|
|
20
23
|
|
|
@@ -108,6 +111,8 @@ LOG_FORMAT = r'%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s'
|
|
|
108
111
|
LOG_TIMEFMT = r'%H:%M:%S'
|
|
109
112
|
|
|
110
113
|
RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']
|
|
114
|
+
RESOURCE_TYPES = ['file', 'directory', 'archive']
|
|
115
|
+
RESOURCES_DIR_SYSTEM = '/usr/local/share/ocrd-resources'
|
|
111
116
|
|
|
112
117
|
DEFAULT_METS_BASENAME = 'mets.xml'
|
|
113
118
|
|
ocrd_utils/logging.py
CHANGED
|
@@ -75,6 +75,9 @@ _ocrdLevel2pythonLevel = {
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
def tf_disable_interactive_logs():
|
|
78
|
+
"""
|
|
79
|
+
Disable the interactive logging of tf/keras and set the log level to error or higher
|
|
80
|
+
"""
|
|
78
81
|
try:
|
|
79
82
|
from os import environ # pylint: disable=import-outside-toplevel
|
|
80
83
|
# This env variable must be set before importing from Keras
|