ocrd 3.2.0__py3-none-any.whl → 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/network.py +0 -2
- ocrd/cli/resmgr.py +61 -24
- ocrd/decorators/__init__.py +3 -3
- ocrd/processor/base.py +16 -6
- ocrd/processor/builtin/dummy/ocrd-tool.json +1 -0
- ocrd/processor/helpers.py +2 -2
- ocrd/resource_manager.py +116 -199
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/METADATA +2 -2
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/RECORD +23 -25
- ocrd_network/__init__.py +0 -1
- ocrd_network/cli/__init__.py +1 -3
- ocrd_network/constants.py +0 -3
- ocrd_network/logging_utils.py +0 -5
- ocrd_network/runtime_data/hosts.py +56 -47
- ocrd_network/runtime_data/network_agents.py +3 -26
- ocrd_utils/__init__.py +0 -2
- ocrd_utils/constants.py +0 -5
- ocrd_utils/os.py +52 -130
- ocrd_validators/ocrd_tool.schema.yml +4 -7
- ocrd_network/cli/resmgr_server.py +0 -23
- ocrd_network/resource_manager_server.py +0 -178
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/LICENSE +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/WHEEL +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/entry_points.txt +0 -0
- {ocrd-3.2.0.dist-info → ocrd-3.3.1.dist-info}/top_level.txt +0 -0
ocrd/cli/network.py
CHANGED
|
@@ -13,7 +13,6 @@ from ocrd_network.cli import (
|
|
|
13
13
|
processing_server_cli,
|
|
14
14
|
processing_worker_cli,
|
|
15
15
|
processor_server_cli,
|
|
16
|
-
resource_manager_server_cli
|
|
17
16
|
)
|
|
18
17
|
|
|
19
18
|
|
|
@@ -29,4 +28,3 @@ network_cli.add_command(client_cli)
|
|
|
29
28
|
network_cli.add_command(processing_server_cli)
|
|
30
29
|
network_cli.add_command(processing_worker_cli)
|
|
31
30
|
network_cli.add_command(processor_server_cli)
|
|
32
|
-
network_cli.add_command(resource_manager_server_cli)
|
ocrd/cli/resmgr.py
CHANGED
|
@@ -18,9 +18,9 @@ from ocrd_utils import (
|
|
|
18
18
|
getLogger,
|
|
19
19
|
get_moduledir,
|
|
20
20
|
get_ocrd_tool_json,
|
|
21
|
+
resource_filename,
|
|
21
22
|
initLogging,
|
|
22
23
|
RESOURCE_LOCATIONS,
|
|
23
|
-
RESOURCE_TYPES
|
|
24
24
|
)
|
|
25
25
|
from ocrd.constants import RESOURCE_USER_LIST_COMMENT
|
|
26
26
|
|
|
@@ -72,13 +72,13 @@ def list_installed(executable=None):
|
|
|
72
72
|
@click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from')
|
|
73
73
|
@click.option('-D', '--no-dynamic', default=False, is_flag=True,
|
|
74
74
|
help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources")
|
|
75
|
-
@click.option('-t', '--resource-type', type=click.Choice(
|
|
75
|
+
@click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file',
|
|
76
76
|
help='Type of resource',)
|
|
77
77
|
@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type')
|
|
78
78
|
@click.option('-a', '--allow-uninstalled', is_flag=True,
|
|
79
79
|
help="Allow installing resources for uninstalled processors",)
|
|
80
80
|
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True)
|
|
81
|
-
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
|
|
81
|
+
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
|
|
82
82
|
help="Where to store resources - defaults to first location in processor's 'resource_locations' "
|
|
83
83
|
"list or finally 'data'")
|
|
84
84
|
@click.argument('executable', required=True)
|
|
@@ -107,6 +107,8 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
|
|
|
107
107
|
executable = None
|
|
108
108
|
if name == '*':
|
|
109
109
|
name = None
|
|
110
|
+
is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False
|
|
111
|
+
is_filename = Path(any_url).exists() if any_url else False
|
|
110
112
|
if executable and not which(executable):
|
|
111
113
|
if not allow_uninstalled:
|
|
112
114
|
log.error(f"Executable '{executable}' is not installed. "
|
|
@@ -125,30 +127,65 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
|
|
|
125
127
|
'path_in_archive': path_in_archive}]
|
|
126
128
|
)]
|
|
127
129
|
for this_executable, this_reslist in reslist:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
for resdict in this_reslist:
|
|
131
|
+
if 'size' in resdict:
|
|
132
|
+
registered = "registered"
|
|
133
|
+
else:
|
|
134
|
+
registered = "unregistered"
|
|
135
|
+
if any_url:
|
|
136
|
+
resdict['url'] = any_url
|
|
137
|
+
if resdict['url'] == '???':
|
|
138
|
+
log.warning(f"Cannot download user resource {resdict['name']}")
|
|
139
|
+
continue
|
|
140
|
+
if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'):
|
|
141
|
+
log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})")
|
|
142
|
+
if 'size' not in resdict:
|
|
143
|
+
with requests.head(resdict['url']) as r:
|
|
144
|
+
resdict['size'] = int(r.headers.get('content-length', 0))
|
|
145
|
+
else:
|
|
146
|
+
log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})")
|
|
147
|
+
urlpath = Path(resdict['url'])
|
|
148
|
+
resdict['url'] = str(urlpath.resolve())
|
|
149
|
+
if Path(urlpath).is_dir():
|
|
150
|
+
resdict['size'] = directory_size(urlpath)
|
|
151
|
+
else:
|
|
152
|
+
resdict['size'] = urlpath.stat().st_size
|
|
153
|
+
if not location:
|
|
154
|
+
location = get_ocrd_tool_json(this_executable)['resource_locations'][0]
|
|
155
|
+
elif location not in get_ocrd_tool_json(this_executable)['resource_locations']:
|
|
156
|
+
log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
|
|
157
|
+
f"refusing to install to invalid location")
|
|
158
|
+
sys.exit(1)
|
|
159
|
+
if location != 'module':
|
|
160
|
+
basedir = resmgr.location_to_resource_dir(location)
|
|
161
|
+
else:
|
|
162
|
+
basedir = get_moduledir(this_executable)
|
|
163
|
+
if not basedir:
|
|
164
|
+
basedir = resmgr.location_to_resource_dir('data')
|
|
165
|
+
|
|
136
166
|
try:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
167
|
+
with click.progressbar(length=resdict['size']) as bar:
|
|
168
|
+
fpath = resmgr.download(
|
|
169
|
+
this_executable,
|
|
170
|
+
resdict['url'],
|
|
171
|
+
basedir,
|
|
172
|
+
name=resdict['name'],
|
|
173
|
+
resource_type=resdict.get('type', resource_type),
|
|
174
|
+
path_in_archive=resdict.get('path_in_archive', path_in_archive),
|
|
175
|
+
overwrite=overwrite,
|
|
176
|
+
no_subdir=location in ['cwd', 'module'],
|
|
177
|
+
progress_cb=lambda delta: bar.update(delta)
|
|
178
|
+
)
|
|
179
|
+
if registered == 'unregistered':
|
|
180
|
+
log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub "
|
|
181
|
+
f"in {resmgr.user_list}'")
|
|
182
|
+
resmgr.add_to_user_database(this_executable, fpath, url=any_url)
|
|
183
|
+
resmgr.save_user_list()
|
|
184
|
+
log.info(f"Installed resource {resdict['url']} under {fpath}")
|
|
148
185
|
except FileExistsError as exc:
|
|
149
186
|
log.info(str(exc))
|
|
150
|
-
|
|
151
|
-
|
|
187
|
+
log.info(f"Use in parameters as "
|
|
188
|
+
f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'")
|
|
152
189
|
|
|
153
190
|
|
|
154
191
|
@resmgr_cli.command('migrate')
|
ocrd/decorators/__init__.py
CHANGED
|
@@ -107,10 +107,10 @@ def ocrd_cli_wrap_processor(
|
|
|
107
107
|
if 'parameter_override' in kwargs:
|
|
108
108
|
set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override'))
|
|
109
109
|
# Assert -I / -O
|
|
110
|
-
if not kwargs
|
|
110
|
+
if not kwargs.get('input_file_grp', None):
|
|
111
111
|
raise ValueError('-I/--input-file-grp is required')
|
|
112
|
-
if not kwargs
|
|
113
|
-
raise ValueError('-O/--output-file-grp is required')
|
|
112
|
+
if 'output_file_grp' not in kwargs:
|
|
113
|
+
raise ValueError('-O/--output-file-grp is required') # actually, it may be None
|
|
114
114
|
resolver = Resolver()
|
|
115
115
|
working_dir, mets, _, mets_server_url = \
|
|
116
116
|
resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url)
|
ocrd/processor/base.py
CHANGED
|
@@ -43,14 +43,15 @@ from .ocrd_page_result import OcrdPageResult
|
|
|
43
43
|
from ocrd_utils import (
|
|
44
44
|
VERSION as OCRD_VERSION,
|
|
45
45
|
MIMETYPE_PAGE,
|
|
46
|
+
MIME_TO_EXT,
|
|
46
47
|
config,
|
|
47
48
|
getLogger,
|
|
48
49
|
list_resource_candidates,
|
|
50
|
+
pushd_popd,
|
|
49
51
|
list_all_resources,
|
|
50
52
|
get_processor_resource_types,
|
|
51
53
|
resource_filename,
|
|
52
54
|
parse_json_file_with_comments,
|
|
53
|
-
pushd_popd,
|
|
54
55
|
make_file_id,
|
|
55
56
|
deprecation_warning
|
|
56
57
|
)
|
|
@@ -904,8 +905,9 @@ class Processor():
|
|
|
904
905
|
cwd = self.old_pwd
|
|
905
906
|
else:
|
|
906
907
|
cwd = getcwd()
|
|
907
|
-
ret =
|
|
908
|
-
|
|
908
|
+
ret = [cand for cand in list_resource_candidates(executable, val,
|
|
909
|
+
cwd=cwd, moduled=self.moduledir)
|
|
910
|
+
if exists(cand)]
|
|
909
911
|
if ret:
|
|
910
912
|
self._base_logger.debug("Resolved %s to absolute path %s" % (val, ret[0]))
|
|
911
913
|
return ret[0]
|
|
@@ -936,9 +938,17 @@ class Processor():
|
|
|
936
938
|
"""
|
|
937
939
|
List all resources found in the filesystem and matching content-type by filename suffix
|
|
938
940
|
"""
|
|
939
|
-
|
|
941
|
+
mimetypes = get_processor_resource_types(None, self.ocrd_tool)
|
|
942
|
+
for res in list_all_resources(self.ocrd_tool['executable'], moduled=self.moduledir):
|
|
940
943
|
res = Path(res)
|
|
941
|
-
|
|
944
|
+
if not '*/*' in mimetypes:
|
|
945
|
+
if res.is_dir() and not 'text/directory' in mimetypes:
|
|
946
|
+
continue
|
|
947
|
+
# if we do not know all MIME types, then keep the file, otherwise require suffix match
|
|
948
|
+
if res.is_file() and not any(res.suffix == MIME_TO_EXT.get(mime, res.suffix)
|
|
949
|
+
for mime in mimetypes):
|
|
950
|
+
continue
|
|
951
|
+
yield res
|
|
942
952
|
|
|
943
953
|
@property
|
|
944
954
|
def module(self):
|
|
@@ -1102,7 +1112,7 @@ class Processor():
|
|
|
1102
1112
|
if not ifiles[i]:
|
|
1103
1113
|
# could be from non-unique with on_error=skip or from true gap
|
|
1104
1114
|
self._base_logger.error(f'Found no file for page {page} in file group {ifg}')
|
|
1105
|
-
if config.OCRD_MISSING_INPUT == '
|
|
1115
|
+
if config.OCRD_MISSING_INPUT == 'ABORT':
|
|
1106
1116
|
raise MissingInputFile(ifg, page, mimetype)
|
|
1107
1117
|
if not any(ifiles):
|
|
1108
1118
|
# must be from non-unique with on_error=skip
|
ocrd/processor/helpers.py
CHANGED
|
@@ -276,7 +276,7 @@ def get_processor(
|
|
|
276
276
|
# set current processing parameters
|
|
277
277
|
processor.workspace = workspace
|
|
278
278
|
processor.page_id = page_id
|
|
279
|
-
processor.input_file_grp = input_file_grp
|
|
280
|
-
processor.output_file_grp = output_file_grp
|
|
279
|
+
processor.input_file_grp = input_file_grp or ''
|
|
280
|
+
processor.output_file_grp = output_file_grp or ''
|
|
281
281
|
return processor
|
|
282
282
|
raise ValueError("Processor class is not known")
|