ocrd 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/network.py CHANGED
@@ -13,7 +13,6 @@ from ocrd_network.cli import (
13
13
  processing_server_cli,
14
14
  processing_worker_cli,
15
15
  processor_server_cli,
16
- resource_manager_server_cli
17
16
  )
18
17
 
19
18
 
@@ -29,4 +28,3 @@ network_cli.add_command(client_cli)
29
28
  network_cli.add_command(processing_server_cli)
30
29
  network_cli.add_command(processing_worker_cli)
31
30
  network_cli.add_command(processor_server_cli)
32
- network_cli.add_command(resource_manager_server_cli)
ocrd/cli/resmgr.py CHANGED
@@ -18,9 +18,9 @@ from ocrd_utils import (
18
18
  getLogger,
19
19
  get_moduledir,
20
20
  get_ocrd_tool_json,
21
+ resource_filename,
21
22
  initLogging,
22
23
  RESOURCE_LOCATIONS,
23
- RESOURCE_TYPES
24
24
  )
25
25
  from ocrd.constants import RESOURCE_USER_LIST_COMMENT
26
26
 
@@ -72,13 +72,13 @@ def list_installed(executable=None):
72
72
  @click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from')
73
73
  @click.option('-D', '--no-dynamic', default=False, is_flag=True,
74
74
  help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources")
75
- @click.option('-t', '--resource-type', type=click.Choice(RESOURCE_TYPES), default='file',
75
+ @click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file',
76
76
  help='Type of resource',)
77
77
  @click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type')
78
78
  @click.option('-a', '--allow-uninstalled', is_flag=True,
79
79
  help="Allow installing resources for uninstalled processors",)
80
80
  @click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True)
81
- @click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS), default='data',
81
+ @click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
82
82
  help="Where to store resources - defaults to first location in processor's 'resource_locations' "
83
83
  "list or finally 'data'")
84
84
  @click.argument('executable', required=True)
@@ -107,6 +107,8 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
107
107
  executable = None
108
108
  if name == '*':
109
109
  name = None
110
+ is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False
111
+ is_filename = Path(any_url).exists() if any_url else False
110
112
  if executable and not which(executable):
111
113
  if not allow_uninstalled:
112
114
  log.error(f"Executable '{executable}' is not installed. "
@@ -125,30 +127,65 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
125
127
  'path_in_archive': path_in_archive}]
126
128
  )]
127
129
  for this_executable, this_reslist in reslist:
128
- resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
129
- if not location:
130
- location = resource_locations[0]
131
- elif location not in resource_locations:
132
- log.warning(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
133
- f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
134
- res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
135
- for res_dict in this_reslist:
130
+ for resdict in this_reslist:
131
+ if 'size' in resdict:
132
+ registered = "registered"
133
+ else:
134
+ registered = "unregistered"
135
+ if any_url:
136
+ resdict['url'] = any_url
137
+ if resdict['url'] == '???':
138
+ log.warning(f"Cannot download user resource {resdict['name']}")
139
+ continue
140
+ if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'):
141
+ log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})")
142
+ if 'size' not in resdict:
143
+ with requests.head(resdict['url']) as r:
144
+ resdict['size'] = int(r.headers.get('content-length', 0))
145
+ else:
146
+ log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})")
147
+ urlpath = Path(resdict['url'])
148
+ resdict['url'] = str(urlpath.resolve())
149
+ if Path(urlpath).is_dir():
150
+ resdict['size'] = directory_size(urlpath)
151
+ else:
152
+ resdict['size'] = urlpath.stat().st_size
153
+ if not location:
154
+ location = get_ocrd_tool_json(this_executable)['resource_locations'][0]
155
+ elif location not in get_ocrd_tool_json(this_executable)['resource_locations']:
156
+ log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
157
+ f"refusing to install to invalid location")
158
+ sys.exit(1)
159
+ if location != 'module':
160
+ basedir = resmgr.location_to_resource_dir(location)
161
+ else:
162
+ basedir = get_moduledir(this_executable)
163
+ if not basedir:
164
+ basedir = resmgr.location_to_resource_dir('data')
165
+
136
166
  try:
137
- fpath = resmgr.handle_resource(
138
- res_dict=res_dict,
139
- executable=this_executable,
140
- dest_dir=res_dest_dir,
141
- any_url=any_url,
142
- overwrite=overwrite,
143
- resource_type=resource_type,
144
- path_in_archive=path_in_archive
145
- )
146
- if not fpath:
147
- continue
167
+ with click.progressbar(length=resdict['size']) as bar:
168
+ fpath = resmgr.download(
169
+ this_executable,
170
+ resdict['url'],
171
+ basedir,
172
+ name=resdict['name'],
173
+ resource_type=resdict.get('type', resource_type),
174
+ path_in_archive=resdict.get('path_in_archive', path_in_archive),
175
+ overwrite=overwrite,
176
+ no_subdir=location in ['cwd', 'module'],
177
+ progress_cb=lambda delta: bar.update(delta)
178
+ )
179
+ if registered == 'unregistered':
180
+ log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub "
181
+ f"in {resmgr.user_list}'")
182
+ resmgr.add_to_user_database(this_executable, fpath, url=any_url)
183
+ resmgr.save_user_list()
184
+ log.info(f"Installed resource {resdict['url']} under {fpath}")
148
185
  except FileExistsError as exc:
149
186
  log.info(str(exc))
150
- usage = res_dict.get('parameter_usage', 'as-is')
151
- log.info(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
187
+ log.info(f"Use in parameters as "
188
+ f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'")
152
189
 
153
190
 
154
191
  @resmgr_cli.command('migrate')
ocrd/processor/base.py CHANGED
@@ -43,14 +43,15 @@ from .ocrd_page_result import OcrdPageResult
43
43
  from ocrd_utils import (
44
44
  VERSION as OCRD_VERSION,
45
45
  MIMETYPE_PAGE,
46
+ MIME_TO_EXT,
46
47
  config,
47
48
  getLogger,
48
49
  list_resource_candidates,
50
+ pushd_popd,
49
51
  list_all_resources,
50
52
  get_processor_resource_types,
51
53
  resource_filename,
52
54
  parse_json_file_with_comments,
53
- pushd_popd,
54
55
  make_file_id,
55
56
  deprecation_warning
56
57
  )
@@ -904,8 +905,9 @@ class Processor():
904
905
  cwd = self.old_pwd
905
906
  else:
906
907
  cwd = getcwd()
907
- ret = list(filter(exists, list_resource_candidates(executable, val,
908
- cwd=cwd, moduled=self.moduledir)))
908
+ ret = [cand for cand in list_resource_candidates(executable, val,
909
+ cwd=cwd, moduled=self.moduledir)
910
+ if exists(cand)]
909
911
  if ret:
910
912
  self._base_logger.debug("Resolved %s to absolute path %s" % (val, ret[0]))
911
913
  return ret[0]
@@ -936,9 +938,17 @@ class Processor():
936
938
  """
937
939
  List all resources found in the filesystem and matching content-type by filename suffix
938
940
  """
939
- for res in list_all_resources(self.executable, ocrd_tool=self.ocrd_tool, moduled=self.moduledir):
941
+ mimetypes = get_processor_resource_types(None, self.ocrd_tool)
942
+ for res in list_all_resources(self.ocrd_tool['executable'], moduled=self.moduledir):
940
943
  res = Path(res)
941
- yield res.name
944
+ if not '*/*' in mimetypes:
945
+ if res.is_dir() and not 'text/directory' in mimetypes:
946
+ continue
947
+ # if we do not know all MIME types, then keep the file, otherwise require suffix match
948
+ if res.is_file() and not any(res.suffix == MIME_TO_EXT.get(mime, res.suffix)
949
+ for mime in mimetypes):
950
+ continue
951
+ yield res
942
952
 
943
953
  @property
944
954
  def module(self):