ocrd 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/network.py +0 -2
- ocrd/cli/resmgr.py +61 -24
- ocrd/processor/base.py +15 -5
- ocrd/resource_manager.py +116 -199
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/METADATA +2 -2
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/RECORD +20 -22
- ocrd_network/__init__.py +0 -1
- ocrd_network/cli/__init__.py +1 -3
- ocrd_network/constants.py +0 -3
- ocrd_network/logging_utils.py +0 -5
- ocrd_network/runtime_data/hosts.py +56 -47
- ocrd_network/runtime_data/network_agents.py +3 -26
- ocrd_utils/__init__.py +0 -2
- ocrd_utils/constants.py +0 -5
- ocrd_utils/os.py +52 -130
- ocrd_validators/ocrd_tool.schema.yml +4 -7
- ocrd_network/cli/resmgr_server.py +0 -23
- ocrd_network/resource_manager_server.py +0 -178
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/LICENSE +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/WHEEL +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.0.dist-info}/top_level.txt +0 -0
ocrd_utils/os.py
CHANGED
|
@@ -8,7 +8,6 @@ __all__ = [
|
|
|
8
8
|
'get_ocrd_tool_json',
|
|
9
9
|
'get_moduledir',
|
|
10
10
|
'get_processor_resource_types',
|
|
11
|
-
'get_env_locations',
|
|
12
11
|
'guess_media_type',
|
|
13
12
|
'pushd_popd',
|
|
14
13
|
'unzip_file_to_dir',
|
|
@@ -16,30 +15,28 @@ __all__ = [
|
|
|
16
15
|
'redirect_stderr_and_stdout_to_file',
|
|
17
16
|
]
|
|
18
17
|
|
|
19
|
-
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
20
18
|
from tempfile import TemporaryDirectory, gettempdir
|
|
21
19
|
from functools import lru_cache
|
|
22
20
|
from contextlib import contextmanager, redirect_stderr, redirect_stdout
|
|
23
21
|
from shutil import which
|
|
24
22
|
from json import loads
|
|
25
23
|
from json.decoder import JSONDecodeError
|
|
26
|
-
from os import getcwd, chdir, stat, chmod, umask, environ
|
|
24
|
+
from os import getcwd, chdir, stat, chmod, umask, environ
|
|
27
25
|
from pathlib import Path
|
|
28
26
|
from os.path import abspath as abspath_, join
|
|
29
27
|
from zipfile import ZipFile
|
|
30
28
|
from subprocess import run, PIPE
|
|
31
29
|
from mimetypes import guess_type as mimetypes_guess
|
|
32
30
|
from filetype import guess as filetype_guess
|
|
33
|
-
from fnmatch import filter as apply_glob
|
|
34
31
|
|
|
35
32
|
from atomicwrites import atomic_write as atomic_write_, AtomicWriter
|
|
36
33
|
|
|
37
|
-
from .constants import EXT_TO_MIME
|
|
34
|
+
from .constants import EXT_TO_MIME
|
|
38
35
|
from .config import config
|
|
39
36
|
from .logging import getLogger
|
|
40
37
|
from .introspect import resource_string
|
|
41
38
|
|
|
42
|
-
def abspath(url
|
|
39
|
+
def abspath(url):
|
|
43
40
|
"""
|
|
44
41
|
Get a full path to a file or file URL
|
|
45
42
|
|
|
@@ -50,7 +47,7 @@ def abspath(url : str) -> str:
|
|
|
50
47
|
return abspath_(url)
|
|
51
48
|
|
|
52
49
|
@contextmanager
|
|
53
|
-
def pushd_popd(newcwd
|
|
50
|
+
def pushd_popd(newcwd=None, tempdir=False):
|
|
54
51
|
if newcwd and tempdir:
|
|
55
52
|
raise Exception("pushd_popd can accept either newcwd or tempdir, not both")
|
|
56
53
|
try:
|
|
@@ -70,7 +67,7 @@ def pushd_popd(newcwd : Union[str, PathLike] = None, tempdir : bool = False) ->
|
|
|
70
67
|
finally:
|
|
71
68
|
chdir(oldcwd)
|
|
72
69
|
|
|
73
|
-
def unzip_file_to_dir(path_to_zip
|
|
70
|
+
def unzip_file_to_dir(path_to_zip, output_directory):
|
|
74
71
|
"""
|
|
75
72
|
Extract a ZIP archive to a directory
|
|
76
73
|
"""
|
|
@@ -78,7 +75,7 @@ def unzip_file_to_dir(path_to_zip : Union[str, PathLike], output_directory : str
|
|
|
78
75
|
z.extractall(output_directory)
|
|
79
76
|
|
|
80
77
|
@lru_cache()
|
|
81
|
-
def get_ocrd_tool_json(executable
|
|
78
|
+
def get_ocrd_tool_json(executable):
|
|
82
79
|
"""
|
|
83
80
|
Get the ``ocrd-tool`` description of ``executable``.
|
|
84
81
|
"""
|
|
@@ -93,11 +90,11 @@ def get_ocrd_tool_json(executable : str) -> Dict[str, Any]:
|
|
|
93
90
|
except (JSONDecodeError, OSError) as e:
|
|
94
91
|
getLogger('ocrd.utils.get_ocrd_tool_json').error(f'{executable} --dump-json produced invalid JSON: {e}')
|
|
95
92
|
if 'resource_locations' not in ocrd_tool:
|
|
96
|
-
ocrd_tool['resource_locations'] =
|
|
93
|
+
ocrd_tool['resource_locations'] = ['data', 'cwd', 'system', 'module']
|
|
97
94
|
return ocrd_tool
|
|
98
95
|
|
|
99
96
|
@lru_cache()
|
|
100
|
-
def get_moduledir(executable
|
|
97
|
+
def get_moduledir(executable):
|
|
101
98
|
moduledir = None
|
|
102
99
|
try:
|
|
103
100
|
ocrd_all_moduledir = loads(resource_string('ocrd', 'ocrd-all-module-dir.json'))
|
|
@@ -109,80 +106,57 @@ def get_moduledir(executable : str) -> str:
|
|
|
109
106
|
getLogger('ocrd.utils.get_moduledir').error(f'{executable} --dump-module-dir failed: {e}')
|
|
110
107
|
return moduledir
|
|
111
108
|
|
|
112
|
-
def
|
|
113
|
-
processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
|
|
114
|
-
if processor_path_var in environ:
|
|
115
|
-
return environ[processor_path_var].split(':')
|
|
116
|
-
return []
|
|
117
|
-
|
|
118
|
-
def list_resource_candidates(executable : str, fname : str, cwd : Optional[str] = None, moduled : Optional[str] = None, xdg_data_home : Optional[str] = None) -> List[str]:
|
|
109
|
+
def list_resource_candidates(executable, fname, cwd=getcwd(), moduled=None, xdg_data_home=None):
|
|
119
110
|
"""
|
|
120
111
|
Generate candidates for processor resources according to
|
|
121
112
|
https://ocr-d.de/en/spec/ocrd_tool#file-parameters
|
|
122
113
|
"""
|
|
123
|
-
if cwd is None:
|
|
124
|
-
cwd = getcwd()
|
|
125
114
|
candidates = []
|
|
126
115
|
candidates.append(join(cwd, fname))
|
|
127
|
-
xdg_data_home = xdg_data_home
|
|
128
|
-
|
|
129
|
-
|
|
116
|
+
xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
|
|
117
|
+
processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
|
|
118
|
+
if processor_path_var in environ:
|
|
119
|
+
candidates += [join(x, fname) for x in environ[processor_path_var].split(':')]
|
|
130
120
|
candidates.append(join(xdg_data_home, 'ocrd-resources', executable, fname))
|
|
131
|
-
candidates.append(join(
|
|
121
|
+
candidates.append(join('/usr/local/share/ocrd-resources', executable, fname))
|
|
132
122
|
if moduled:
|
|
133
123
|
candidates.append(join(moduled, fname))
|
|
134
124
|
return candidates
|
|
135
125
|
|
|
136
|
-
def list_all_resources(executable
|
|
126
|
+
def list_all_resources(executable, moduled=None, xdg_data_home=None):
|
|
137
127
|
"""
|
|
138
128
|
List all processor resources in the filesystem according to
|
|
139
|
-
https://ocr-d.de/en/spec/ocrd_tool#
|
|
129
|
+
https://ocr-d.de/en/spec/ocrd_tool#file-parameters
|
|
140
130
|
"""
|
|
141
|
-
|
|
142
|
-
if ocrd_tool is None:
|
|
143
|
-
ocrd_tool = get_ocrd_tool_json(executable)
|
|
144
|
-
# processor we're looking for might not be installed, hence the fallbacks
|
|
145
|
-
try:
|
|
146
|
-
mimetypes = get_processor_resource_types(executable, ocrd_tool=ocrd_tool)
|
|
147
|
-
except KeyError:
|
|
148
|
-
mimetypes = ['*/*']
|
|
131
|
+
candidates = []
|
|
149
132
|
try:
|
|
150
|
-
resource_locations =
|
|
151
|
-
except
|
|
133
|
+
resource_locations = get_ocrd_tool_json(executable)['resource_locations']
|
|
134
|
+
except FileNotFoundError:
|
|
135
|
+
# processor we're looking for resource_locations of is not installed.
|
|
152
136
|
# Assume the default
|
|
153
|
-
resource_locations =
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
# (e.g. dirname without dot means we falsely match files without suffix)
|
|
157
|
-
resource_suffixes = [Path(res['name']).suffix
|
|
158
|
-
for res in ocrd_tool['resources']]
|
|
159
|
-
except KeyError:
|
|
160
|
-
resource_suffixes = []
|
|
161
|
-
logger = getLogger('ocrd.utils.list_all_resources')
|
|
162
|
-
candidates = []
|
|
163
|
-
# cwd would list too many false positives:
|
|
137
|
+
resource_locations = ['data', 'cwd', 'system', 'module']
|
|
138
|
+
xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
|
|
139
|
+
# XXX cwd would list too many false positives
|
|
164
140
|
# if 'cwd' in resource_locations:
|
|
165
|
-
#
|
|
166
|
-
#
|
|
167
|
-
#
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
candidates += processor_path.iterdir()
|
|
141
|
+
# cwd_candidate = join(getcwd(), 'ocrd-resources', executable)
|
|
142
|
+
# if Path(cwd_candidate).exists():
|
|
143
|
+
# candidates.append(cwd_candidate)
|
|
144
|
+
processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
|
|
145
|
+
if processor_path_var in environ:
|
|
146
|
+
for processor_path in environ[processor_path_var].split(':'):
|
|
147
|
+
if Path(processor_path).is_dir():
|
|
148
|
+
candidates += Path(processor_path).iterdir()
|
|
174
149
|
if 'data' in resource_locations:
|
|
175
150
|
datadir = Path(xdg_data_home, 'ocrd-resources', executable)
|
|
176
151
|
if datadir.is_dir():
|
|
177
152
|
candidates += datadir.iterdir()
|
|
178
153
|
if 'system' in resource_locations:
|
|
179
|
-
systemdir = Path(
|
|
154
|
+
systemdir = Path('/usr/local/share/ocrd-resources', executable)
|
|
180
155
|
if systemdir.is_dir():
|
|
181
156
|
candidates += systemdir.iterdir()
|
|
182
157
|
if 'module' in resource_locations and moduled:
|
|
183
158
|
# recurse fully
|
|
184
|
-
|
|
185
|
-
for resource in moduled.iterdir():
|
|
159
|
+
for resource in itertree(Path(moduled)):
|
|
186
160
|
if resource.is_dir():
|
|
187
161
|
continue
|
|
188
162
|
if any(resource.match(pattern) for pattern in
|
|
@@ -190,66 +164,17 @@ def list_all_resources(executable : str, ocrd_tool : Optional[Dict[str, Any]] =
|
|
|
190
164
|
# code and data; `is_resource()` only singles out
|
|
191
165
|
# files over directories; but we want data files only
|
|
192
166
|
# todo: more code and cache exclusion patterns!
|
|
193
|
-
['*.py', '*.py[cod]', '*~', '
|
|
194
|
-
'__pycache__/*', '*.egg-info/*', '*.egg',
|
|
195
|
-
'copyright.txt', 'LICENSE*', 'README.md', 'MANIFEST',
|
|
196
|
-
'TAGS', '.DS_Store',
|
|
197
|
-
# C extensions
|
|
198
|
-
'*.so',
|
|
199
|
-
# translations
|
|
200
|
-
'*.mo', '*.pot',
|
|
201
|
-
'*.log', '*.orig', '*.BAK',
|
|
202
|
-
'.git/*',
|
|
203
|
-
# our stuff
|
|
204
|
-
'ocrd-tool.json',
|
|
167
|
+
['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
|
|
205
168
|
'environment.pickle', 'resource_list.yml', 'lib.bash']):
|
|
206
|
-
logger.debug("ignoring module candidate '%s'", resource)
|
|
207
169
|
continue
|
|
208
170
|
candidates.append(resource)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
if path.is_dir():
|
|
215
|
-
if not 'text/directory' in mimetypes:
|
|
216
|
-
logger.debug("ignoring directory candidate '%s'", path)
|
|
217
|
-
return False
|
|
218
|
-
if path.name in ['.git']:
|
|
219
|
-
logger.debug("ignoring directory candidate '%s'", path)
|
|
220
|
-
return False
|
|
221
|
-
return True
|
|
222
|
-
if not path.is_file():
|
|
223
|
-
logger.warning("ignoring non-file, non-directory candidate '%s'", path)
|
|
224
|
-
return False
|
|
225
|
-
res_mimetype = guess_media_type(path, fallback='')
|
|
226
|
-
if res_mimetype == 'application/json':
|
|
227
|
-
# always accept, regardless of configured mimetypes:
|
|
228
|
-
# needed for distributing or sharing parameter preset files
|
|
229
|
-
return True
|
|
230
|
-
if ['text/directory'] == mimetypes:
|
|
231
|
-
logger.debug("ignoring non-directory candidate '%s'", path)
|
|
232
|
-
return False
|
|
233
|
-
if 'application/octet-stream' in mimetypes:
|
|
234
|
-
# catch-all type - do not enforce anything
|
|
235
|
-
return True
|
|
236
|
-
if path.suffix in resource_suffixes:
|
|
237
|
-
return True
|
|
238
|
-
if any(path.suffix == MIME_TO_EXT.get(mime, None)
|
|
239
|
-
for mime in mimetypes):
|
|
240
|
-
return True
|
|
241
|
-
if not res_mimetype:
|
|
242
|
-
logger.warning("cannot determine content type of candidate '%s'", path)
|
|
243
|
-
return True
|
|
244
|
-
if any(apply_glob([res_mimetype], mime)
|
|
245
|
-
for mime in mimetypes):
|
|
246
|
-
return True
|
|
247
|
-
logger.debug("ignoring %s candidate '%s'", res_mimetype, path)
|
|
248
|
-
return False
|
|
249
|
-
candidates = sorted(filter(valid_resource_type, candidates))
|
|
250
|
-
return map(str, candidates)
|
|
171
|
+
# recurse once
|
|
172
|
+
for parent in candidates:
|
|
173
|
+
if parent.is_dir() and parent.name != '.git':
|
|
174
|
+
candidates += parent.iterdir()
|
|
175
|
+
return sorted([str(x) for x in candidates])
|
|
251
176
|
|
|
252
|
-
def get_processor_resource_types(executable
|
|
177
|
+
def get_processor_resource_types(executable, ocrd_tool=None):
|
|
253
178
|
"""
|
|
254
179
|
Determine what type of resource parameters a processor needs.
|
|
255
180
|
|
|
@@ -261,16 +186,13 @@ def get_processor_resource_types(executable : str, ocrd_tool : Optional[Dict[str
|
|
|
261
186
|
if not which(executable):
|
|
262
187
|
return ['*/*']
|
|
263
188
|
ocrd_tool = get_ocrd_tool_json(executable)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
if not len(mime_types):
|
|
269
|
-
# None of the parameters for this processor are resources
|
|
270
|
-
# (or the parameters' resource types are not properly declared,)
|
|
271
|
-
# so output both directories and files
|
|
189
|
+
if not next((True for p in ocrd_tool.get('parameters', {}).values() if 'content-type' in p), False):
|
|
190
|
+
# None of the parameters for this processor are resources (or not
|
|
191
|
+
# the resource parameters are not properly declared, so output both
|
|
192
|
+
# directories and files
|
|
272
193
|
return ['*/*']
|
|
273
|
-
return
|
|
194
|
+
return [p['content-type'] for p in ocrd_tool['parameters'].values()
|
|
195
|
+
if 'content-type' in p]
|
|
274
196
|
|
|
275
197
|
# ht @pabs3
|
|
276
198
|
# https://github.com/untitaker/python-atomicwrites/issues/42
|
|
@@ -289,12 +211,12 @@ class AtomicWriterPerms(AtomicWriter):
|
|
|
289
211
|
return f
|
|
290
212
|
|
|
291
213
|
@contextmanager
|
|
292
|
-
def atomic_write(fpath
|
|
214
|
+
def atomic_write(fpath):
|
|
293
215
|
with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f:
|
|
294
216
|
yield f
|
|
295
217
|
|
|
296
218
|
|
|
297
|
-
def is_file_in_directory(directory
|
|
219
|
+
def is_file_in_directory(directory, file):
|
|
298
220
|
"""
|
|
299
221
|
Return True if ``file`` is in ``directory`` (by checking that all components of ``directory`` are in ``file.parts``)
|
|
300
222
|
"""
|
|
@@ -302,7 +224,7 @@ def is_file_in_directory(directory : Union[str, PathLike], file : Union[str, Pat
|
|
|
302
224
|
file = Path(file)
|
|
303
225
|
return list(file.parts)[:len(directory.parts)] == list(directory.parts)
|
|
304
226
|
|
|
305
|
-
def itertree(path
|
|
227
|
+
def itertree(path):
|
|
306
228
|
"""
|
|
307
229
|
Generate a list of paths by recursively enumerating ``path``
|
|
308
230
|
"""
|
|
@@ -313,14 +235,14 @@ def itertree(path : Union[str, PathLike]) -> PathLike:
|
|
|
313
235
|
yield from itertree(subpath)
|
|
314
236
|
yield path
|
|
315
237
|
|
|
316
|
-
def directory_size(path
|
|
238
|
+
def directory_size(path):
|
|
317
239
|
"""
|
|
318
240
|
Calculates size of all files in directory ``path``
|
|
319
241
|
"""
|
|
320
242
|
path = Path(path)
|
|
321
243
|
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
|
|
322
244
|
|
|
323
|
-
def guess_media_type(input_file : str, fallback :
|
|
245
|
+
def guess_media_type(input_file : str, fallback : str = None, application_xml : str = 'application/xml'):
|
|
324
246
|
"""
|
|
325
247
|
Guess the media type of a file path
|
|
326
248
|
"""
|
|
@@ -332,7 +254,7 @@ def guess_media_type(input_file : str, fallback : Optional[str] = None, applicat
|
|
|
332
254
|
if mimetype is None:
|
|
333
255
|
mimetype = EXT_TO_MIME.get(''.join(Path(input_file).suffixes), fallback)
|
|
334
256
|
if mimetype is None:
|
|
335
|
-
raise ValueError("Could not determine MIME type of input_file
|
|
257
|
+
raise ValueError("Could not determine MIME type of input_file must")
|
|
336
258
|
if mimetype == 'application/xml':
|
|
337
259
|
mimetype = application_xml
|
|
338
260
|
return mimetype
|
|
@@ -142,21 +142,18 @@ properties:
|
|
|
142
142
|
description: List the allowed values if a fixed list.
|
|
143
143
|
content-type:
|
|
144
144
|
type: string
|
|
145
|
+
default: 'application/octet-stream'
|
|
145
146
|
description: >
|
|
146
|
-
If parameter is reference to file (type=string format=uri):
|
|
147
147
|
The media type of resources this processor expects for
|
|
148
148
|
this parameter. Most processors use files for resources
|
|
149
149
|
(e.g. `*.traineddata` for `ocrd-tesserocr-recognize`)
|
|
150
150
|
while others use directories of files (e.g. `default` for
|
|
151
|
-
`ocrd-eynollah-segment`).
|
|
152
|
-
|
|
151
|
+
`ocrd-eynollah-segment`). If a parameter requires
|
|
152
|
+
directories, it must set `content-type` to
|
|
153
153
|
`text/directory`.
|
|
154
154
|
cacheable:
|
|
155
155
|
type: boolean
|
|
156
|
-
description:
|
|
157
|
-
If parameter is reference to file (type=string format=uri):
|
|
158
|
-
Whether the file should be cached, e.g. because it is large
|
|
159
|
-
and won't change.
|
|
156
|
+
description: "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change."
|
|
160
157
|
default: false
|
|
161
158
|
description:
|
|
162
159
|
description: Concise description of what the tool does
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import click
|
|
2
|
-
from ocrd_network import ResourceManagerServer, ServerAddressParamType
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
@click.command('resmgr-server')
|
|
6
|
-
@click.option('-a', '--address',
|
|
7
|
-
help='The URL of the OCR-D resource manager server, format: host:port',
|
|
8
|
-
type=ServerAddressParamType(),
|
|
9
|
-
required=True)
|
|
10
|
-
def resource_manager_server_cli(address: str):
|
|
11
|
-
"""
|
|
12
|
-
Start standalone REST API OCR-D Resource Manager Server
|
|
13
|
-
"""
|
|
14
|
-
try:
|
|
15
|
-
# Note, the address is already validated with the type field
|
|
16
|
-
host, port = address.split(':')
|
|
17
|
-
resource_manager_server = ResourceManagerServer(
|
|
18
|
-
host = host,
|
|
19
|
-
port = int(port)
|
|
20
|
-
)
|
|
21
|
-
resource_manager_server.start()
|
|
22
|
-
except Exception as e:
|
|
23
|
-
raise Exception("OCR-D Resource Manager Server has failed with error") from e
|
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from os import getpid
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import requests
|
|
5
|
-
from shutil import which
|
|
6
|
-
from typing import Any
|
|
7
|
-
from uvicorn import run as uvicorn_run
|
|
8
|
-
from fastapi import APIRouter, FastAPI, HTTPException, status
|
|
9
|
-
|
|
10
|
-
from ocrd import OcrdResourceManager
|
|
11
|
-
from ocrd_utils import directory_size, getLogger, get_moduledir, get_ocrd_tool_json, initLogging
|
|
12
|
-
from .logging_utils import configure_file_handler_with_formatter, get_resource_manager_server_logging_file_path
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class ResourceManagerServer(FastAPI):
|
|
16
|
-
def __init__(self, host: str, port: int) -> None:
|
|
17
|
-
self.title = f"OCR-D Resource Manager Server"
|
|
18
|
-
super().__init__(
|
|
19
|
-
title=self.title,
|
|
20
|
-
on_startup=[self.on_startup],
|
|
21
|
-
on_shutdown=[self.on_shutdown],
|
|
22
|
-
description=self.title
|
|
23
|
-
)
|
|
24
|
-
initLogging()
|
|
25
|
-
self.log = getLogger("ocrd_network.resource_manager_server")
|
|
26
|
-
log_file = get_resource_manager_server_logging_file_path(pid=getpid())
|
|
27
|
-
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
|
|
28
|
-
|
|
29
|
-
self.resmgr_instance = OcrdResourceManager()
|
|
30
|
-
|
|
31
|
-
self.hostname = host
|
|
32
|
-
self.port = port
|
|
33
|
-
|
|
34
|
-
self.add_api_routes()
|
|
35
|
-
|
|
36
|
-
def start(self):
|
|
37
|
-
uvicorn_run(self, host=self.hostname, port=int(self.port))
|
|
38
|
-
|
|
39
|
-
async def on_startup(self):
|
|
40
|
-
self.log.info(f"Starting {self.title}")
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
async def on_shutdown(self) -> None:
|
|
44
|
-
pass
|
|
45
|
-
|
|
46
|
-
def add_api_routes(self):
|
|
47
|
-
base_router = APIRouter()
|
|
48
|
-
base_router.add_api_route(
|
|
49
|
-
path="/",
|
|
50
|
-
endpoint=self.home_page,
|
|
51
|
-
methods=["GET"],
|
|
52
|
-
status_code=status.HTTP_200_OK,
|
|
53
|
-
summary="Get information about the OCR-D Resource Manager Server"
|
|
54
|
-
)
|
|
55
|
-
base_router.add_api_route(
|
|
56
|
-
path="/list_available",
|
|
57
|
-
endpoint=self.list_available_resources,
|
|
58
|
-
methods=["GET"],
|
|
59
|
-
status_code=status.HTTP_200_OK,
|
|
60
|
-
summary=""
|
|
61
|
-
)
|
|
62
|
-
base_router.add_api_route(
|
|
63
|
-
path="/list_installed",
|
|
64
|
-
endpoint=self.list_installed_resources,
|
|
65
|
-
methods=["GET"],
|
|
66
|
-
status_code=status.HTTP_200_OK,
|
|
67
|
-
summary=""
|
|
68
|
-
)
|
|
69
|
-
base_router.add_api_route(
|
|
70
|
-
path="/download",
|
|
71
|
-
endpoint=self.download_resource,
|
|
72
|
-
methods=["GET"],
|
|
73
|
-
status_code=status.HTTP_200_OK,
|
|
74
|
-
summary=""
|
|
75
|
-
)
|
|
76
|
-
self.include_router(base_router)
|
|
77
|
-
|
|
78
|
-
async def home_page(self):
|
|
79
|
-
message = f"The home page of the {self.title}"
|
|
80
|
-
json_message = {
|
|
81
|
-
"message": message,
|
|
82
|
-
"time": datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
83
|
-
}
|
|
84
|
-
return json_message
|
|
85
|
-
|
|
86
|
-
async def list_available_resources(
|
|
87
|
-
self,
|
|
88
|
-
executable: Any = None,
|
|
89
|
-
dynamic: bool = True,
|
|
90
|
-
name: Any = None,
|
|
91
|
-
database: Any = None,
|
|
92
|
-
url: Any = None
|
|
93
|
-
):
|
|
94
|
-
result = self.resmgr_instance.list_available(executable, dynamic, name, database, url)
|
|
95
|
-
json_message = {
|
|
96
|
-
"result": result
|
|
97
|
-
}
|
|
98
|
-
return json_message
|
|
99
|
-
|
|
100
|
-
async def list_installed_resources(self, executable: Any = None):
|
|
101
|
-
result = self.resmgr_instance.list_available(executable)
|
|
102
|
-
json_message = {
|
|
103
|
-
"result": result
|
|
104
|
-
}
|
|
105
|
-
return json_message
|
|
106
|
-
|
|
107
|
-
async def download_resource(
|
|
108
|
-
self,
|
|
109
|
-
executable: str,
|
|
110
|
-
name: Any = None,
|
|
111
|
-
location: Any = None,
|
|
112
|
-
any_url: str = '',
|
|
113
|
-
no_dynamic: bool = False,
|
|
114
|
-
resource_type: str = 'file',
|
|
115
|
-
path_in_archive: str = '.',
|
|
116
|
-
allow_uninstalled: bool = True,
|
|
117
|
-
overwrite: bool = True
|
|
118
|
-
):
|
|
119
|
-
resmgr = OcrdResourceManager()
|
|
120
|
-
response = []
|
|
121
|
-
if executable != '*' and not name:
|
|
122
|
-
message = f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required"
|
|
123
|
-
self.log.error(message)
|
|
124
|
-
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
125
|
-
elif executable == '*':
|
|
126
|
-
executable = None
|
|
127
|
-
if name == '*':
|
|
128
|
-
name = None
|
|
129
|
-
if executable and not which(executable):
|
|
130
|
-
if not allow_uninstalled:
|
|
131
|
-
message = (f"Executable '{executable}' is not installed. To download resources anyway, "
|
|
132
|
-
f"use the -a/--allow-uninstalled flag")
|
|
133
|
-
self.log.error(message)
|
|
134
|
-
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)
|
|
135
|
-
else:
|
|
136
|
-
message = f"Executable '{executable}' is not installed, but downloading resources anyway."
|
|
137
|
-
self.log.info(message)
|
|
138
|
-
response.append(message)
|
|
139
|
-
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name)
|
|
140
|
-
if not any(r[1] for r in reslist):
|
|
141
|
-
message = f"No resources {name} found in registry for executable {executable}"
|
|
142
|
-
self.log.info(message)
|
|
143
|
-
response.append(message)
|
|
144
|
-
if executable and name:
|
|
145
|
-
reslist = [(executable, [{
|
|
146
|
-
'url': any_url or '???',
|
|
147
|
-
'name': name,
|
|
148
|
-
'type': resource_type,
|
|
149
|
-
'path_in_archive': path_in_archive}]
|
|
150
|
-
)]
|
|
151
|
-
for this_executable, this_reslist in reslist:
|
|
152
|
-
resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
|
|
153
|
-
if not location:
|
|
154
|
-
location = resource_locations[0]
|
|
155
|
-
elif location not in resource_locations:
|
|
156
|
-
response.append(
|
|
157
|
-
f"The selected --location {location} is not in the {this_executable}'s resource search path, "
|
|
158
|
-
f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
|
|
159
|
-
res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
|
|
160
|
-
for res_dict in this_reslist:
|
|
161
|
-
try:
|
|
162
|
-
fpath = resmgr.handle_resource(
|
|
163
|
-
res_dict=res_dict,
|
|
164
|
-
executable=this_executable,
|
|
165
|
-
dest_dir=res_dest_dir,
|
|
166
|
-
any_url=any_url,
|
|
167
|
-
overwrite=overwrite,
|
|
168
|
-
resource_type=resource_type,
|
|
169
|
-
path_in_archive=path_in_archive
|
|
170
|
-
)
|
|
171
|
-
if not fpath:
|
|
172
|
-
continue
|
|
173
|
-
except FileExistsError as exc:
|
|
174
|
-
response.append(str(exc))
|
|
175
|
-
usage = res_dict.get('parameter_usage', 'as-is')
|
|
176
|
-
response.append(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
|
|
177
|
-
json_message = { "result": response }
|
|
178
|
-
return json_message
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|