ocrd 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/resource_manager.py CHANGED
@@ -1,3 +1,4 @@
1
+ from logging import Logger
1
2
  from pathlib import Path
2
3
  from os.path import join
3
4
  from os import environ, listdir, getcwd, unlink
@@ -5,6 +6,7 @@ from shutil import copytree, rmtree, copy
5
6
  from fnmatch import filter as apply_glob
6
7
  from datetime import datetime
7
8
  from tarfile import open as open_tarfile
9
+ from typing import Dict, Optional
8
10
  from urllib.parse import urlparse, unquote
9
11
  from zipfile import ZipFile
10
12
 
@@ -29,6 +31,7 @@ yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:timestamp'
29
31
 
30
32
  from ocrd_validators import OcrdResourceListValidator
31
33
  from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config
34
+ from ocrd_utils.constants import RESOURCES_DIR_SYSTEM, RESOURCE_TYPES, MIME_TO_EXT
32
35
  from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json
33
36
  from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
34
37
 
@@ -47,6 +50,10 @@ class OcrdResourceManager:
47
50
  self._userdir = userdir
48
51
  self.user_list = Path(self.xdg_config_home, 'ocrd', 'resources.yml')
49
52
 
53
+ self.log.info(f"OcrdResourceManager data home path: {self.xdg_data_home}")
54
+ self.log.info(f"OcrdResourceManager config home path: {self.xdg_config_home}")
55
+ self.log.info(f"OcrdResourceManager user list path: {self.user_list}")
56
+
50
57
  if not skip_init:
51
58
  self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
52
59
  if not self.user_list.exists():
@@ -55,6 +62,12 @@ class OcrdResourceManager:
55
62
  self.save_user_list()
56
63
  self.load_resource_list(self.user_list)
57
64
 
65
+ def __repr__(self):
66
+ return f"user_list={str(self.user_list)} " + \
67
+ f"exists={self.user_list.exists()} " + \
68
+ f"database: {len(self.database)} executables " + \
69
+ f"{sum(map(len, self.database.values()))} resources"
70
+
58
71
  @property
59
72
  def userdir(self):
60
73
  if not self._userdir:
@@ -69,19 +82,22 @@ class OcrdResourceManager:
69
82
 
70
83
  @property
71
84
  def xdg_config_home(self):
72
- if self._xdg_config_home:
73
- return self._xdg_config_home
74
- return config.XDG_CONFIG_HOME
85
+ if not self._xdg_config_home:
86
+ self._xdg_config_home = config.XDG_CONFIG_HOME
87
+ return self._xdg_config_home
75
88
 
76
89
  def save_user_list(self, database=None):
77
90
  if not database:
78
91
  database = self.database
92
+ self.log.info(f"Saving resources to path: {self.user_list}")
93
+ self._dedup_database()
79
94
  with open(self.user_list, 'w', encoding='utf-8') as f:
80
95
  f.write(RESOURCE_USER_LIST_COMMENT)
81
96
  f.write('\n')
82
97
  f.write(safe_dump(database))
83
98
 
84
- def load_resource_list(self, list_filename, database=None):
99
+ def load_resource_list(self, list_filename: Path, database=None):
100
+ self.log.info(f"Loading resources from path: {list_filename}")
85
101
  if not database:
86
102
  database = self.database
87
103
  if list_filename.is_file():
@@ -98,30 +114,36 @@ class OcrdResourceManager:
98
114
  database[executable] = list_loaded[executable] + database[executable]
99
115
  return database
100
116
 
101
- def list_available(self, executable=None, dynamic=True, name=None, database=None, url=None):
117
+ def _search_executables(self, executable: Optional[str]):
118
+ skip_executables = ["ocrd-cis-data", "ocrd-import", "ocrd-make"]
119
+ for exec_dir in environ['PATH'].split(':'):
120
+ self.log.debug(f"Searching for executables inside path: {exec_dir}")
121
+ for exec_path in Path(exec_dir).glob(f'{executable}'):
122
+ if not exec_path.name.startswith('ocrd-'):
123
+ self.log.warning(f"OCR-D processor executable '{exec_path}' has no 'ocrd-' prefix")
124
+ if exec_path.name in skip_executables:
125
+ self.log.debug(f"Not an OCR-D processor CLI, skipping '{exec_path}'")
126
+ continue
127
+ self.log.debug(f"Inspecting '{exec_path} --dump-json' for resources")
128
+ ocrd_tool = get_ocrd_tool_json(exec_path)
129
+ for res_dict in ocrd_tool.get('resources', ()):
130
+ if exec_path.name not in self.database:
131
+ self.database[exec_path.name] = []
132
+ self.database[exec_path.name].insert(0, res_dict)
133
+
134
+ def list_available(
135
+ self, executable: str = None, dynamic: bool = True, name: str = None, database: Dict = None, url: str = None
136
+ ):
102
137
  """
103
138
  List models available for download by processor
104
139
  """
105
140
  if not database:
106
141
  database = self.database
107
142
  if not executable:
108
- return database.items()
143
+ return list(database.items())
109
144
  if dynamic:
110
- skip_executables = ["ocrd-cis-data", "ocrd-import", "ocrd-make"]
111
- for exec_dir in environ['PATH'].split(':'):
112
- for exec_path in Path(exec_dir).glob(f'{executable}'):
113
- if not exec_path.name.startswith('ocrd-'):
114
- self.log.warning(f"OCR-D processor executable '{exec_path}' has no 'ocrd-' prefix")
115
- if exec_path.name in skip_executables:
116
- self.log.debug(f"Not an OCR-D processor CLI, skipping '{exec_path}'")
117
- continue
118
- self.log.debug(f"Inspecting '{exec_path} --dump-json' for resources")
119
- ocrd_tool = get_ocrd_tool_json(exec_path)
120
- for resdict in ocrd_tool.get('resources', ()):
121
- if exec_path.name not in database:
122
- database[exec_path.name] = []
123
- database[exec_path.name].insert(0, resdict)
124
- database = self._dedup_database(database)
145
+ self._search_executables(executable)
146
+ self.save_user_list()
125
147
  found = False
126
148
  ret = []
127
149
  for k in database:
@@ -139,7 +161,7 @@ class OcrdResourceManager:
139
161
  ret = [(executable, [])]
140
162
  return ret
141
163
 
142
- def list_installed(self, executable=None):
164
+ def list_installed(self, executable: str = None):
143
165
  """
144
166
  List installed resources, matching with registry by ``name``
145
167
  """
@@ -150,28 +172,24 @@ class OcrdResourceManager:
150
172
  # resources we know about
151
173
  all_executables = list(self.database.keys())
152
174
  # resources in the file system
153
- parent_dirs = [join(x, 'ocrd-resources') for x in [self.xdg_data_home, '/usr/local/share']]
175
+ parent_dirs = [f"{join(self.xdg_data_home, 'ocrd-resources')}", RESOURCES_DIR_SYSTEM]
154
176
  for parent_dir in parent_dirs:
155
177
  if Path(parent_dir).exists():
156
178
  all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
157
179
  for this_executable in set(all_executables):
158
180
  reslist = []
159
- mimetypes = get_processor_resource_types(this_executable)
160
181
  moduledir = get_moduledir(this_executable)
161
- for res_filename in list_all_resources(this_executable, moduled=moduledir, xdg_data_home=self.xdg_data_home):
162
- res_filename = Path(res_filename)
163
- if not '*/*' in mimetypes:
164
- if res_filename.is_dir() and not 'text/directory' in mimetypes:
165
- continue
166
- if res_filename.is_file() and ['text/directory'] == mimetypes:
167
- continue
182
+ resdict_list = self.list_available(executable=this_executable)[0][1]
183
+ for res_filename in list_all_resources(this_executable,
184
+ moduled=moduledir,
185
+ xdg_data_home=self.xdg_data_home):
186
+ res_filename = Path(res_filename).resolve()
168
187
  res_name = res_filename.name
169
188
  res_type = 'file' if res_filename.is_file() else 'directory'
170
189
  res_size = res_filename.stat().st_size if res_filename.is_file() else directory_size(res_filename)
171
- resdict_list = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
172
- if resdict_list:
173
- resdict = resdict_list[0]
174
- elif str(res_filename.parent) == moduledir:
190
+ if resdict := next((res for res in resdict_list if res['name'] == res_name), False):
191
+ pass
192
+ elif str(res_filename.parent).startswith(moduledir):
175
193
  resdict = {
176
194
  'name': res_name,
177
195
  'url': str(res_filename),
@@ -181,28 +199,28 @@ class OcrdResourceManager:
181
199
  }
182
200
  else:
183
201
  resdict = self.add_to_user_database(this_executable, res_filename, resource_type=res_type)
184
- resdict['path'] = str(res_filename)
202
+ # resdict['path'] = str(res_filename)
185
203
  reslist.append(resdict)
186
204
  ret.append((this_executable, reslist))
205
+ self.save_user_list()
187
206
  return ret
188
207
 
189
208
  def add_to_user_database(self, executable, res_filename, url=None, resource_type='file'):
190
209
  """
191
210
  Add a stub entry to the user resource.yml
192
211
  """
193
- res_name = Path(res_filename).name
194
- self.log.info(f"{executable} resource '{res_name}' ({str(res_filename)}) not a known resource, "
195
- f"creating stub in {self.user_list}'")
212
+ res_name = res_filename.name
196
213
  if Path(res_filename).is_dir():
197
214
  res_size = directory_size(res_filename)
198
215
  else:
199
216
  res_size = Path(res_filename).stat().st_size
200
- with open(self.user_list, 'r', encoding='utf-8') as f:
201
- user_database = safe_load(f) or {}
217
+ user_database = self.load_resource_list(self.user_list)
202
218
  if executable not in user_database:
203
219
  user_database[executable] = []
204
220
  resources_found = self.list_available(executable=executable, name=res_name, database=user_database)[0][1]
205
221
  if not resources_found:
222
+ self.log.info(f"{executable} resource '{res_name}' ({str(res_filename)}) not a known resource, "
223
+ f"creating stub in {self.user_list}'")
206
224
  resdict = {
207
225
  'name': res_name,
208
226
  'url': url if url else '???',
@@ -222,20 +240,45 @@ class OcrdResourceManager:
222
240
  def default_resource_dir(self):
223
241
  return self.location_to_resource_dir('data')
224
242
 
225
- def location_to_resource_dir(self, location):
226
- return '/usr/local/share/ocrd-resources' if location == 'system' else \
227
- join(self.xdg_data_home, 'ocrd-resources') if location == 'data' else \
228
- getcwd()
243
+ def location_to_resource_dir(self, location: str) -> str:
244
+ if location == 'data':
245
+ return join(self.xdg_data_home, 'ocrd-resources')
246
+ if location == 'system':
247
+ return RESOURCES_DIR_SYSTEM
248
+ return getcwd()
229
249
 
230
- def resource_dir_to_location(self, resource_path):
250
+ def resource_dir_to_location(self, resource_path: Path) -> str:
231
251
  resource_path = str(resource_path)
232
- return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \
233
- 'data' if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')) else \
234
- 'cwd' if resource_path.startswith(getcwd()) else \
235
- resource_path
252
+ if resource_path.startswith(RESOURCES_DIR_SYSTEM):
253
+ return 'system'
254
+ if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')):
255
+ return 'data'
256
+ if resource_path.startswith(getcwd()):
257
+ return 'cwd'
258
+ return resource_path
259
+
260
+ def build_resource_dest_dir(self, location: str, executable: str) -> Path:
261
+ if location == 'module':
262
+ base_dir = get_moduledir(executable)
263
+ if not base_dir:
264
+ base_dir = self.location_to_resource_dir('data')
265
+ else:
266
+ base_dir = self.location_to_resource_dir(location)
267
+ no_subdir = location in ['cwd', 'module']
268
+ dest_dir = Path(base_dir) if no_subdir else Path(base_dir, executable)
269
+ return dest_dir
236
270
 
237
271
  @staticmethod
238
- def parameter_usage(name, usage='as-is'):
272
+ def remove_resource(log: Logger, resource_path: Path):
273
+ if resource_path.is_dir():
274
+ log.info(f"Removing existing target resource directory {resource_path}")
275
+ rmtree(str(resource_path))
276
+ else:
277
+ log.info(f"Removing existing target resource file {resource_path}")
278
+ unlink(str(resource_path))
279
+
280
+ @staticmethod
281
+ def parameter_usage(name: str, usage: str = 'as-is') -> str:
239
282
  if usage == 'as-is':
240
283
  return name
241
284
  elif usage == 'without-extension':
@@ -243,8 +286,7 @@ class OcrdResourceManager:
243
286
  raise ValueError(f"No such usage '{usage}'")
244
287
 
245
288
  @staticmethod
246
- def _download_impl(url, filename, progress_cb=None, size=None):
247
- log = getLogger('ocrd.resource_manager._download_impl')
289
+ def _download_impl(log: Logger, url: str, filename):
248
290
  log.info(f"Downloading {url} to {filename}")
249
291
  try:
250
292
  gdrive_file_id, is_gdrive_download_link = gparse_url(url, warning=False)
@@ -256,13 +298,11 @@ class OcrdResourceManager:
256
298
  if "Content-Disposition" not in r.headers:
257
299
  url = get_url_from_gdrive_confirmation(r.text)
258
300
  except RuntimeError as e:
259
- log.warning("Cannot unwrap Google Drive URL: %s", e)
301
+ log.warning(f"Cannot unwrap Google Drive URL: {e}")
260
302
  with open(filename, 'wb') as f:
261
303
  with requests.get(url, stream=True) as r:
262
304
  r.raise_for_status()
263
305
  for data in r.iter_content(chunk_size=4096):
264
- if progress_cb:
265
- progress_cb(len(data))
266
306
  f.write(data)
267
307
  except Exception as e:
268
308
  rmtree(filename, ignore_errors=True)
@@ -270,22 +310,18 @@ class OcrdResourceManager:
270
310
  raise e
271
311
 
272
312
  @staticmethod
273
- def _copy_file(src, dst, progress_cb=None):
274
- log = getLogger('ocrd.resource_manager._copy_file')
313
+ def _copy_file(log: Logger, src, dst):
275
314
  log.info(f"Copying file {src} to {dst}")
276
315
  with open(dst, 'wb') as f_out, open(src, 'rb') as f_in:
277
316
  while True:
278
317
  chunk = f_in.read(4096)
279
318
  if chunk:
280
319
  f_out.write(chunk)
281
- if progress_cb:
282
- progress_cb(len(chunk))
283
320
  else:
284
321
  break
285
322
 
286
323
  @staticmethod
287
- def _copy_dir(src, dst, progress_cb=None):
288
- log = getLogger('ocrd.resource_manager._copy_dir')
324
+ def _copy_dir(log: Logger, src, dst):
289
325
  log.info(f"Copying dir recursively from {src} to {dst}")
290
326
  if not Path(src).is_dir():
291
327
  raise ValueError(f"The source is not a directory: {src}")
@@ -293,76 +329,123 @@ class OcrdResourceManager:
293
329
  for child in Path(src).rglob('*'):
294
330
  child_dst = Path(dst) / child.relative_to(src)
295
331
  if Path(child).is_dir():
296
- OcrdResourceManager._copy_dir(child, child_dst, progress_cb)
332
+ OcrdResourceManager._copy_dir(log, child, child_dst)
297
333
  else:
298
- OcrdResourceManager._copy_file(child, child_dst, progress_cb)
334
+ OcrdResourceManager._copy_file(log, child, child_dst)
299
335
 
300
336
  @staticmethod
301
- def _copy_impl(src_filename, filename, progress_cb=None):
302
- log = getLogger('ocrd.resource_manager._copy_impl')
337
+ def _copy_impl(log: Logger, src_filename, filename):
303
338
  log.info(f"Copying {src_filename} to {filename}")
304
339
  if Path(src_filename).is_dir():
305
- OcrdResourceManager._copy_dir(src_filename, filename, progress_cb)
340
+ OcrdResourceManager._copy_dir(log, src_filename, filename)
306
341
  else:
307
- OcrdResourceManager._copy_file(src_filename, filename, progress_cb)
342
+ OcrdResourceManager._copy_file(log, src_filename, filename)
343
+
344
+ @staticmethod
345
+ def _extract_archive(log: Logger, tempdir: Path, path_in_archive: str, fpath: Path, archive_fname: str):
346
+ Path('out').mkdir()
347
+ with pushd_popd('out'):
348
+ mimetype = guess_media_type(f'../{archive_fname}', fallback='application/octet-stream')
349
+ log.info(f"Extracting {mimetype} archive to {tempdir}/out")
350
+ if mimetype == 'application/zip':
351
+ with ZipFile(f'../{archive_fname}', 'r') as zipf:
352
+ zipf.extractall()
353
+ elif mimetype in ('application/gzip', 'application/x-xz'):
354
+ with open_tarfile(f'../{archive_fname}', 'r:*') as tar:
355
+ tar.extractall()
356
+ else:
357
+ raise RuntimeError(f"Unable to handle extraction of {mimetype} archive")
358
+ log.info(f"Copying '{path_in_archive}' from archive to {fpath}")
359
+ if Path(path_in_archive).is_dir():
360
+ copytree(path_in_archive, str(fpath))
361
+ else:
362
+ copy(path_in_archive, str(fpath))
363
+
364
+ def copy_resource(
365
+ self, log: Logger, url: str, fpath: Path, resource_type: str = 'file', path_in_archive: str = '.'
366
+ ) -> Path:
367
+ """
368
+ Copy a local resource to another destination
369
+ """
370
+ if resource_type == 'archive':
371
+ archive_fname = 'download.tar.xx'
372
+ with pushd_popd(tempdir=True) as tempdir:
373
+ self._copy_impl(log, url, archive_fname)
374
+ self._extract_archive(log, tempdir, path_in_archive, fpath, archive_fname)
375
+ else:
376
+ self._copy_impl(log, url, fpath)
377
+ return fpath
378
+
379
+ def download_resource(
380
+ self, log: Logger, url: str, fpath: Path, resource_type: str = 'file', path_in_archive: str = '.'
381
+ ) -> Path:
382
+ """
383
+ Download a resource by URL to a destination directory
384
+ """
385
+ if resource_type == 'archive':
386
+ archive_fname = 'download.tar.xx'
387
+ with pushd_popd(tempdir=True) as tempdir:
388
+ self._download_impl(log, url, archive_fname)
389
+ self._extract_archive(log, tempdir, path_in_archive, fpath, archive_fname)
390
+ else:
391
+ self._download_impl(log, url, fpath)
392
+ return fpath
308
393
 
309
394
  # TODO Proper caching (make head request for size, If-Modified etc)
310
- def download(
311
- self, executable, url, basedir, overwrite=False, no_subdir=False, name=None, resource_type='file',
312
- path_in_archive='.', progress_cb=None,
313
- ):
395
+ def handle_resource(
396
+ self, res_dict: Dict, executable: str, dest_dir: Path, any_url: str, overwrite: bool = False,
397
+ resource_type: str = 'file', path_in_archive: str = '.'
398
+ ) -> Optional[Path]:
314
399
  """
315
- Download a resource by URL
400
+ Download or Copy a resource by URL to a destination directory
316
401
  """
317
- log = getLogger('ocrd.resource_manager.download')
318
- destdir = Path(basedir) if no_subdir else Path(basedir, executable)
319
- if not name:
320
- url_parsed = urlparse(url)
321
- name = Path(unquote(url_parsed.path)).name
322
- fpath = Path(destdir, name)
323
- is_url = url.startswith('https://') or url.startswith('http://')
402
+ log = getLogger('ocrd.resource_manager.handle_resource')
403
+ registered = "registered" if "size" in res_dict else "unregistered"
404
+ resource_type = res_dict.get('type', resource_type)
405
+ resource_name = res_dict.get('name', None)
406
+ path_in_archive = res_dict.get('path_in_archive', path_in_archive)
407
+
408
+ if resource_type not in RESOURCE_TYPES:
409
+ raise ValueError(f"Unknown resource type: {resource_type}, must be one of: {RESOURCE_TYPES}")
410
+ if any_url:
411
+ res_dict['url'] = any_url
412
+ if not resource_name:
413
+ url_parsed = urlparse(res_dict['url'])
414
+ resource_name = Path(unquote(url_parsed.path)).name
415
+ if resource_type == 'archive' and path_in_archive != '.':
416
+ resource_name = Path(path_in_archive).name
417
+ if res_dict['url'] == '???':
418
+ log.warning(f"Skipping user resource {resource_name} since download url is: {res_dict['url']}")
419
+ return None
420
+
421
+ fpath = Path(dest_dir, resource_name)
324
422
  if fpath.exists():
325
423
  if not overwrite:
326
424
  fpath_type = 'Directory' if fpath.is_dir() else 'File'
327
425
  log.warning(f"{fpath_type} {fpath} already exists but --overwrite is not set, skipping the download")
328
426
  # raise FileExistsError(f"{fpath_type} {fpath} already exists but --overwrite is not set")
329
427
  return fpath
330
- if fpath.is_dir():
331
- log.info(f"Removing existing target directory {fpath}")
332
- rmtree(str(fpath))
333
- else:
334
- log.info(f"Removing existing target file {fpath}")
335
- unlink(str(fpath))
336
- destdir.mkdir(parents=True, exist_ok=True)
337
- if resource_type in ('file', 'directory'):
338
- if is_url:
339
- self._download_impl(url, fpath, progress_cb)
340
- else:
341
- self._copy_impl(url, fpath, progress_cb)
342
- elif resource_type == 'archive':
343
- archive_fname = 'download.tar.xx'
344
- with pushd_popd(tempdir=True) as tempdir:
345
- if is_url:
346
- self._download_impl(url, archive_fname, progress_cb)
347
- else:
348
- self._copy_impl(url, archive_fname, progress_cb)
349
- Path('out').mkdir()
350
- with pushd_popd('out'):
351
- mimetype = guess_media_type(f'../{archive_fname}', fallback='application/octet-stream')
352
- log.info(f"Extracting {mimetype} archive to {tempdir}/out")
353
- if mimetype == 'application/zip':
354
- with ZipFile(f'../{archive_fname}', 'r') as zipf:
355
- zipf.extractall()
356
- elif mimetype in ('application/gzip', 'application/x-xz'):
357
- with open_tarfile(f'../{archive_fname}', 'r:*') as tar:
358
- tar.extractall()
359
- else:
360
- raise RuntimeError(f"Unable to handle extraction of {mimetype} archive {url}")
361
- log.info(f"Copying '{path_in_archive}' from archive to {fpath}")
362
- if Path(path_in_archive).is_dir():
363
- copytree(path_in_archive, str(fpath))
364
- else:
365
- copy(path_in_archive, str(fpath))
428
+ self.remove_resource(log, resource_path=fpath)
429
+ dest_dir.mkdir(parents=True, exist_ok=True)
430
+
431
+ # TODO @mehmedGIT: Consider properly handling cases for invalid URLs.
432
+ if res_dict['url'].startswith('https://') or res_dict['url'].startswith('http://'):
433
+ log.info(f"Downloading {registered} resource '{resource_name}' ({res_dict['url']})")
434
+ if 'size' not in res_dict:
435
+ with requests.head(res_dict['url']) as r:
436
+ res_dict['size'] = int(r.headers.get('content-length', 0))
437
+ fpath = self.download_resource(log, res_dict['url'], fpath, resource_type, path_in_archive)
438
+ else:
439
+ log.info(f"Copying {registered} resource '{resource_name}' ({res_dict['url']})")
440
+ urlpath = Path(res_dict['url'])
441
+ res_dict['url'] = str(urlpath.resolve())
442
+ res_dict['size'] = directory_size(urlpath) if Path(urlpath).is_dir() else urlpath.stat().st_size
443
+ fpath = self.copy_resource(log, res_dict['url'], fpath, resource_type, path_in_archive)
444
+
445
+ if registered == 'unregistered':
446
+ self.add_to_user_database(executable, fpath, url=res_dict['url'])
447
+ self.save_user_list()
448
+ log.info(f"Installed resource {res_dict['url']} under {fpath}")
366
449
  return fpath
367
450
 
368
451
  def _dedup_database(self, database=None, dedup_key='name'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.1.1
3
+ Version: 3.2.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -16,7 +16,7 @@ Requires-Dist: beanie~=1.7
16
16
  Requires-Dist: click>=7
17
17
  Requires-Dist: cryptography<43.0.0
18
18
  Requires-Dist: Deprecated==1.2.0
19
- Requires-Dist: docker
19
+ Requires-Dist: docker>=7.1.0
20
20
  Requires-Dist: elementpath
21
21
  Requires-Dist: fastapi>=0.78.0
22
22
  Requires-Dist: filetype
@@ -5,7 +5,7 @@ ocrd/mets_server.py,sha256=EL6CMtVjmVPs9--3Vn-JyhU2VDMNw-6QN_XWRKFp6uk,22451
5
5
  ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
6
  ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
- ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
8
+ ocrd/resource_manager.py,sha256=CLwzUnk_uVOauyAPd0KhMjqBHmKNVluvS5QsK5O0m1A,20447
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
10
  ocrd/workspace.py,sha256=-j3X83K0f4vtd5jwfu6_R53RJ2R8gt1HYpyrv8YP2bg,65661
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
@@ -13,12 +13,12 @@ ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,1197
13
13
  ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
14
14
  ocrd/cli/bashlib.py,sha256=ypFBM3-IULz_IEBx0Y04eGt9VbQWwEWm4ujm9g_hPWY,6009
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
- ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
16
+ ocrd/cli/network.py,sha256=LNibNBtHGvtXuEwzFz1bLAfkroLWyMPsadTaNVwzUko,687
17
17
  ocrd/cli/ocrd_tool.py,sha256=EyD5VdLm2WTzQnR-hZKpn-D4-dsWr2PIE5IoY1O3mfE,7357
18
18
  ocrd/cli/process.py,sha256=8KD0i7LT01H9u5CC1vktYMEVpS67da_rp_09_EOECmw,1233
19
- ocrd/cli/resmgr.py,sha256=mk8KZweC_7ENAFnC6FvFf7Zv_W1wqJTmk0EMd9XSvf4,10132
19
+ ocrd/cli/resmgr.py,sha256=h9cSbp3_4sHkBqhWU7cThoUxcIqgrtD81st2nxZdnaw,8024
20
20
  ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
- ocrd/cli/workspace.py,sha256=KTbSzIUrba5WoYETvM9ElRZVsDUHCGVvjoFgBGZS2nU,40468
21
+ ocrd/cli/workspace.py,sha256=bsp6YXEgwABIUFbSENmxV1c4oxRwc2L-BpeDPlYfhHE,40501
22
22
  ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
23
  ocrd/decorators/__init__.py,sha256=PyXX7vxdWkRHixas9dWUtyO3YLczcly8ZEpfZDSMVp8,7639
24
24
  ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
@@ -26,7 +26,7 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=GcfVrgCvfHbrxngwl1VzcaZ5z7QV2e1Cn7CIjBYdcHc,59480
29
+ ocrd/processor/base.py,sha256=tKN-xyPehFpwV3ztyLcOcuA_fWTNdRKu5E8V_jKOO2g,59400
30
30
  ocrd/processor/helpers.py,sha256=gIc6PdvOS1sR0UkYlrdZopImAXxXglDBNpgNZGWHO7Y,10987
31
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -36,7 +36,7 @@ ocrd/processor/builtin/dummy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
36
36
  ocrd/processor/builtin/dummy/ocrd-tool.json,sha256=VoI37paWiUyMkTN5Qqau8R1Clmw24-HcZu4wjy1Br9Y,2311
37
37
  ocrd_modelfactory/__init__.py,sha256=NyJT1uSvmeEwibRFOkh0AEoVnYfP0mzxU--pP23B-TQ,4404
38
38
  ocrd_models/__init__.py,sha256=A0aj0mOraNb-xfiUueACdoaqISnp0qH-F49nTJg2vCs,380
39
- ocrd_models/constants.py,sha256=fI6Qz4OPOm6UBLQ_P2dlpjcwB0XFJZ7AgxxKqgc75X0,2724
39
+ ocrd_models/constants.py,sha256=kvvAAro_1YOTRWwFgbrGEFeDZ8_u0S624Y3icNNk4Oo,2987
40
40
  ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,1203
41
41
  ocrd_models/ocrd_agent.py,sha256=E9OtDhz9UfKb6ou2qvsuCL9NlO1V6zMb0s8nVq8dVos,5609
42
42
  ocrd_models/ocrd_exif.py,sha256=wRSprHxCy9LCXw41Fi9kp-CbFc5NFX9ZFIFNszB41qk,4585
@@ -48,26 +48,28 @@ ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,
48
48
  ocrd_models/report.py,sha256=luZxvzAAQyGYOlRNSJQUIUIANG81iGmBW5ag-uXxKCA,2026
49
49
  ocrd_models/utils.py,sha256=0_WHf5NEn1WC8MKJc6X_RK8gW-70Z09_mslkKOj7uF8,2369
50
50
  ocrd_models/xpath_functions.py,sha256=AwR8tHf56-mmIksnw_GeOQ760sxNHqK92T7z9OfsEEs,1437
51
- ocrd_network/__init__.py,sha256=gMejC614J5PPGgXDKBiQS0jt-Jx8qOrLbWH7zt8x8Gs,374
51
+ ocrd_network/__init__.py,sha256=tLSOf5iTNSWjOWep_xY1sU2YZJu7uv9-3qLMuLrwyoU,433
52
52
  ocrd_network/client.py,sha256=rzqtamZ8krRRy-QTO-AeWH8Lr3HhRiQe2R1-Lovd40g,3020
53
53
  ocrd_network/client_utils.py,sha256=VVZMNBgGznh41exZ78S48X3DDwHcWTuOq-LNdxjRvak,5002
54
- ocrd_network/constants.py,sha256=IeNtcU6dqonDE3Zw83_61auhS8X2b8wsjAxYg1zvK-M,1902
54
+ ocrd_network/constants.py,sha256=fxbC4QyeQp38KDRWjqnbfYjt3nUx6xT_mkmDt6PylyY,2033
55
55
  ocrd_network/database.py,sha256=fcft7vdRDoR7vmPL1xNYTIeOg5DwRPcggwYDYxLy5ik,10706
56
- ocrd_network/logging_utils.py,sha256=bO9TQqBXw9CIZEKp8tHXbeE2NuJWMiaQDHzS05b4ajo,2153
56
+ ocrd_network/logging_utils.py,sha256=mNYaLmKIf2Wg1gCU_FbicJC54mPhYPEVinmB7rHEdUs,2374
57
57
  ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
58
58
  ocrd_network/process_helpers.py,sha256=KpkInXsa5bgrxvTOouyMJ0NgJhaz0J9Gjs5sZHBcH64,2373
59
59
  ocrd_network/processing_server.py,sha256=qBiYk4wgTLqhHvbmDWu_F626BfSfyvkoCD-i0ZwsBSE,42109
60
60
  ocrd_network/processing_worker.py,sha256=fhIvmDQAYOkHYtUs5IB8Jk2lOKUTIBk3DskAsFloijA,12591
61
61
  ocrd_network/processor_server.py,sha256=2CD9TlinXk6x1jFjP5VWOXgJe8lAQdxc9zjZuVy3EOw,9931
62
+ ocrd_network/resource_manager_server.py,sha256=dFFWsp1O7H3RzdjbQC_fn1SVV1QIjkVbg2l98MMh-HI,6852
62
63
  ocrd_network/server_cache.py,sha256=LpvJ-_Lbaeo4M3t8rZDdm9DAErZr8lDlma6pYc0m7aQ,13149
63
64
  ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,12222
64
65
  ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
65
66
  ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
66
- ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
67
+ ocrd_network/cli/__init__.py,sha256=YLt1LAcWrl1YfA_UsLYwWUGYsX84wCd1AZwgsUqjHC8,396
67
68
  ocrd_network/cli/client.py,sha256=gFEXjz-d074CpvimqaM4kJRbJVNYRAOK-jsUl2EAUVs,8424
68
69
  ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
69
70
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
70
71
  ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
72
+ ocrd_network/cli/resmgr_server.py,sha256=sc0VX_RehTbg8Qp7ht_DvVqsrdL5b9Zw3bBgWcAD13A,826
71
73
  ocrd_network/models/__init__.py,sha256=AcpZrenygOudMi47Wfr1UCrpbghq2gP8aMAKodgEIFM,527
72
74
  ocrd_network/models/job.py,sha256=2-E1cKfdTC3Y5AUemCLz1a_t7BlT8BNF6teAC0f8J5o,4442
73
75
  ocrd_network/models/messages.py,sha256=XnyLMX77NchgmtKJRtqtBFsk_sCR4OGEuWm_d3uDkj8,657
@@ -85,18 +87,18 @@ ocrd_network/runtime_data/__init__.py,sha256=3jYkmT4mxMUcpbDaSw7Ld0KTedGEx_5vUQP
85
87
  ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
86
88
  ocrd_network/runtime_data/connection_clients.py,sha256=DZyAvkNyMaIddGJs56s2pMP_fK-XWAtICxk1cjvkWYM,4207
87
89
  ocrd_network/runtime_data/deployer.py,sha256=LkDUG0uJf_V4SteiOM3EWwhKtdANCjmAOEAJJDshN30,9111
88
- ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZSj_WE,12185
89
- ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
90
+ ocrd_network/runtime_data/hosts.py,sha256=QkluX5aMCliE8YoH0rJcc9pNprx2Mw5l8_eOq5NJ74s,12319
91
+ ocrd_network/runtime_data/network_agents.py,sha256=uJA6bB8O5XUvRc-wG4b4YKuArQOrQXKJMpMq1TC5bco,6289
90
92
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
91
- ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
93
+ ocrd_utils/__init__.py,sha256=mdLZyNz2W8t3ht8JMvbGaEfvQmYqXCWLBFcJXY6_fmA,6020
92
94
  ocrd_utils/config.py,sha256=1_8j4kpKK5gxhFKObbBEzmq4JazTtKEkGe14Ch_tpw0,11796
93
- ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
95
+ ocrd_utils/constants.py,sha256=6lqMLeJdkFBlvGVmGjcExWbRKzNU6QT0kADBb5BkcBc,3464
94
96
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
95
97
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
96
98
  ocrd_utils/introspect.py,sha256=gfBlmeEFuRmRUSgdSK0jOxRpYqDRXl2IAE6gv2MZ6as,1977
97
99
  ocrd_utils/logging.py,sha256=sHNfih9kBfvKsHdLqMK_ew9Pld1GsRyYlrZHIySujnw,7313
98
100
  ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
99
- ocrd_utils/os.py,sha256=acRRdDBI8L6BK0Mf773yKEzwdpZSFRBJEKB2crL4EjU,9865
101
+ ocrd_utils/os.py,sha256=EuxDV1mW6VRrtbUxAYb3G-teV9blICA-cXqbpzJjJfo,13564
100
102
  ocrd_utils/str.py,sha256=cRgqYILDGOAqWr0qrCrV52I3y4wvpwDVtnBGEUjXNS4,10116
101
103
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
102
104
  ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
@@ -106,7 +108,7 @@ ocrd_validators/message_processing.schema.yml,sha256=HL7o96-7ejslVMXcp16sbo5IjfU
106
108
  ocrd_validators/message_result.schema.yml,sha256=G6vt_JgIU7OGSaHj-2Jna6KWQ3bFWol5tnBArWEiVjM,681
107
109
  ocrd_validators/mets.xsd,sha256=0Wrs9bObn0n-yEEIWyguIcUUuuP6KMEjD4I_p1_UlwY,138290
108
110
  ocrd_validators/ocrd_network_message_validator.py,sha256=oafNWOjieBmTHFfYeCtyFFpW1gI0lDT6ycRr5Kvmfq0,561
109
- ocrd_validators/ocrd_tool.schema.yml,sha256=BQkRIRDbn9B8gFeVxz_EpNdleh_x2dCtIpJEC4HqFHw,10125
111
+ ocrd_validators/ocrd_tool.schema.yml,sha256=fDNr-QdEOBtYbz8aHmjdOUirPBKr3vfLUDtC88gu75U,10231
110
112
  ocrd_validators/ocrd_tool_validator.py,sha256=CvfUHC0vJsPnmFBNf4ndQMXX6pcERCjJMAWk2L-T_ww,747
111
113
  ocrd_validators/ocrd_zip_validator.py,sha256=udAImFFliJY3y1FcKwZ52rhpjYB2Iv491hWDxdet8w0,3685
112
114
  ocrd_validators/page.xsd,sha256=abQ8C3gRLPMFm8lH62aTCfvTIWI23TpgEDcaW9YCt7I,85770
@@ -115,14 +117,14 @@ ocrd_validators/parameter_validator.py,sha256=_5Y3IS24Sf_xHBkB3TE3jB9VTCbbjWO8bS
115
117
  ocrd_validators/processing_server_config.schema.yml,sha256=8NQbhSshm1exTvbdYiu694rZZ-Xe70_vQtsJ0nd7ZCM,5432
116
118
  ocrd_validators/processing_server_config_validator.py,sha256=lQ2-ZxsvbFki_SvE_N4_1ptBnBHcwOTJ5grtL2G9F8A,810
117
119
  ocrd_validators/resource_list_validator.py,sha256=cFMj0n_x-tjhuNUpjgEvPP8iPVm7lme9TWAaqATasV0,776
118
- ocrd_validators/workspace_validator.py,sha256=rEXIwjtNpt8HcTv94fKed3vVlA3U4z7Xmm1ZL1VHC84,17892
120
+ ocrd_validators/workspace_validator.py,sha256=JNPsRVPgQI0vsaxcs_c3qj22GagdZcgO3v9u3sbBbBI,20340
119
121
  ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,3180
120
122
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
121
123
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
122
124
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
123
- ocrd-3.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
- ocrd-3.1.1.dist-info/METADATA,sha256=athV-jzUTYFDjcbykQcSH6JXva7mNDVyHJgf3LCof8s,10442
125
- ocrd-3.1.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
126
- ocrd-3.1.1.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
- ocrd-3.1.1.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
- ocrd-3.1.1.dist-info/RECORD,,
125
+ ocrd-3.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
126
+ ocrd-3.2.0.dist-info/METADATA,sha256=mOJkMaF0CIVwItP-6wrDIdRzZidf11AMcU5J_HubyxQ,10449
127
+ ocrd-3.2.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
128
+ ocrd-3.2.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
129
+ ocrd-3.2.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
130
+ ocrd-3.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.3.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5