ocrd 3.2.0__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/resource_manager.py CHANGED
@@ -1,4 +1,3 @@
1
- from logging import Logger
2
1
  from pathlib import Path
3
2
  from os.path import join
4
3
  from os import environ, listdir, getcwd, unlink
@@ -6,7 +5,6 @@ from shutil import copytree, rmtree, copy
6
5
  from fnmatch import filter as apply_glob
7
6
  from datetime import datetime
8
7
  from tarfile import open as open_tarfile
9
- from typing import Dict, Optional
10
8
  from urllib.parse import urlparse, unquote
11
9
  from zipfile import ZipFile
12
10
 
@@ -31,7 +29,6 @@ yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:timestamp'
31
29
 
32
30
  from ocrd_validators import OcrdResourceListValidator
33
31
  from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config
34
- from ocrd_utils.constants import RESOURCES_DIR_SYSTEM, RESOURCE_TYPES, MIME_TO_EXT
35
32
  from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json
36
33
  from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
37
34
 
@@ -50,10 +47,6 @@ class OcrdResourceManager:
50
47
  self._userdir = userdir
51
48
  self.user_list = Path(self.xdg_config_home, 'ocrd', 'resources.yml')
52
49
 
53
- self.log.info(f"OcrdResourceManager data home path: {self.xdg_data_home}")
54
- self.log.info(f"OcrdResourceManager config home path: {self.xdg_config_home}")
55
- self.log.info(f"OcrdResourceManager user list path: {self.user_list}")
56
-
57
50
  if not skip_init:
58
51
  self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
59
52
  if not self.user_list.exists():
@@ -62,12 +55,6 @@ class OcrdResourceManager:
62
55
  self.save_user_list()
63
56
  self.load_resource_list(self.user_list)
64
57
 
65
- def __repr__(self):
66
- return f"user_list={str(self.user_list)} " + \
67
- f"exists={self.user_list.exists()} " + \
68
- f"database: {len(self.database)} executables " + \
69
- f"{sum(map(len, self.database.values()))} resources"
70
-
71
58
  @property
72
59
  def userdir(self):
73
60
  if not self._userdir:
@@ -82,22 +69,19 @@ class OcrdResourceManager:
82
69
 
83
70
  @property
84
71
  def xdg_config_home(self):
85
- if not self._xdg_config_home:
86
- self._xdg_config_home = config.XDG_CONFIG_HOME
87
- return self._xdg_config_home
72
+ if self._xdg_config_home:
73
+ return self._xdg_config_home
74
+ return config.XDG_CONFIG_HOME
88
75
 
89
76
  def save_user_list(self, database=None):
90
77
  if not database:
91
78
  database = self.database
92
- self.log.info(f"Saving resources to path: {self.user_list}")
93
- self._dedup_database()
94
79
  with open(self.user_list, 'w', encoding='utf-8') as f:
95
80
  f.write(RESOURCE_USER_LIST_COMMENT)
96
81
  f.write('\n')
97
82
  f.write(safe_dump(database))
98
83
 
99
- def load_resource_list(self, list_filename: Path, database=None):
100
- self.log.info(f"Loading resources from path: {list_filename}")
84
+ def load_resource_list(self, list_filename, database=None):
101
85
  if not database:
102
86
  database = self.database
103
87
  if list_filename.is_file():
@@ -114,36 +98,30 @@ class OcrdResourceManager:
114
98
  database[executable] = list_loaded[executable] + database[executable]
115
99
  return database
116
100
 
117
- def _search_executables(self, executable: Optional[str]):
118
- skip_executables = ["ocrd-cis-data", "ocrd-import", "ocrd-make"]
119
- for exec_dir in environ['PATH'].split(':'):
120
- self.log.debug(f"Searching for executables inside path: {exec_dir}")
121
- for exec_path in Path(exec_dir).glob(f'{executable}'):
122
- if not exec_path.name.startswith('ocrd-'):
123
- self.log.warning(f"OCR-D processor executable '{exec_path}' has no 'ocrd-' prefix")
124
- if exec_path.name in skip_executables:
125
- self.log.debug(f"Not an OCR-D processor CLI, skipping '{exec_path}'")
126
- continue
127
- self.log.debug(f"Inspecting '{exec_path} --dump-json' for resources")
128
- ocrd_tool = get_ocrd_tool_json(exec_path)
129
- for res_dict in ocrd_tool.get('resources', ()):
130
- if exec_path.name not in self.database:
131
- self.database[exec_path.name] = []
132
- self.database[exec_path.name].insert(0, res_dict)
133
-
134
- def list_available(
135
- self, executable: str = None, dynamic: bool = True, name: str = None, database: Dict = None, url: str = None
136
- ):
101
+ def list_available(self, executable=None, dynamic=True, name=None, database=None, url=None):
137
102
  """
138
103
  List models available for download by processor
139
104
  """
140
105
  if not database:
141
106
  database = self.database
142
107
  if not executable:
143
- return list(database.items())
108
+ return database.items()
144
109
  if dynamic:
145
- self._search_executables(executable)
146
- self.save_user_list()
110
+ skip_executables = ["ocrd-cis-data", "ocrd-import", "ocrd-make"]
111
+ for exec_dir in environ['PATH'].split(':'):
112
+ for exec_path in Path(exec_dir).glob(f'{executable}'):
113
+ if not exec_path.name.startswith('ocrd-'):
114
+ self.log.warning(f"OCR-D processor executable '{exec_path}' has no 'ocrd-' prefix")
115
+ if exec_path.name in skip_executables:
116
+ self.log.debug(f"Not an OCR-D processor CLI, skipping '{exec_path}'")
117
+ continue
118
+ self.log.debug(f"Inspecting '{exec_path} --dump-json' for resources")
119
+ ocrd_tool = get_ocrd_tool_json(exec_path)
120
+ for resdict in ocrd_tool.get('resources', ()):
121
+ if exec_path.name not in database:
122
+ database[exec_path.name] = []
123
+ database[exec_path.name].insert(0, resdict)
124
+ database = self._dedup_database(database)
147
125
  found = False
148
126
  ret = []
149
127
  for k in database:
@@ -161,7 +139,7 @@ class OcrdResourceManager:
161
139
  ret = [(executable, [])]
162
140
  return ret
163
141
 
164
- def list_installed(self, executable: str = None):
142
+ def list_installed(self, executable=None):
165
143
  """
166
144
  List installed resources, matching with registry by ``name``
167
145
  """
@@ -172,24 +150,28 @@ class OcrdResourceManager:
172
150
  # resources we know about
173
151
  all_executables = list(self.database.keys())
174
152
  # resources in the file system
175
- parent_dirs = [f"{join(self.xdg_data_home, 'ocrd-resources')}", RESOURCES_DIR_SYSTEM]
153
+ parent_dirs = [join(x, 'ocrd-resources') for x in [self.xdg_data_home, '/usr/local/share']]
176
154
  for parent_dir in parent_dirs:
177
155
  if Path(parent_dir).exists():
178
156
  all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
179
157
  for this_executable in set(all_executables):
180
158
  reslist = []
159
+ mimetypes = get_processor_resource_types(this_executable)
181
160
  moduledir = get_moduledir(this_executable)
182
- resdict_list = self.list_available(executable=this_executable)[0][1]
183
- for res_filename in list_all_resources(this_executable,
184
- moduled=moduledir,
185
- xdg_data_home=self.xdg_data_home):
186
- res_filename = Path(res_filename).resolve()
161
+ for res_filename in list_all_resources(this_executable, moduled=moduledir, xdg_data_home=self.xdg_data_home):
162
+ res_filename = Path(res_filename)
163
+ if not '*/*' in mimetypes:
164
+ if res_filename.is_dir() and not 'text/directory' in mimetypes:
165
+ continue
166
+ if res_filename.is_file() and ['text/directory'] == mimetypes:
167
+ continue
187
168
  res_name = res_filename.name
188
169
  res_type = 'file' if res_filename.is_file() else 'directory'
189
170
  res_size = res_filename.stat().st_size if res_filename.is_file() else directory_size(res_filename)
190
- if resdict := next((res for res in resdict_list if res['name'] == res_name), False):
191
- pass
192
- elif str(res_filename.parent).startswith(moduledir):
171
+ resdict_list = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
172
+ if resdict_list:
173
+ resdict = resdict_list[0]
174
+ elif str(res_filename.parent) == moduledir:
193
175
  resdict = {
194
176
  'name': res_name,
195
177
  'url': str(res_filename),
@@ -199,28 +181,28 @@ class OcrdResourceManager:
199
181
  }
200
182
  else:
201
183
  resdict = self.add_to_user_database(this_executable, res_filename, resource_type=res_type)
202
- # resdict['path'] = str(res_filename)
184
+ resdict['path'] = str(res_filename)
203
185
  reslist.append(resdict)
204
186
  ret.append((this_executable, reslist))
205
- self.save_user_list()
206
187
  return ret
207
188
 
208
189
  def add_to_user_database(self, executable, res_filename, url=None, resource_type='file'):
209
190
  """
210
191
  Add a stub entry to the user resource.yml
211
192
  """
212
- res_name = res_filename.name
193
+ res_name = Path(res_filename).name
194
+ self.log.info(f"{executable} resource '{res_name}' ({str(res_filename)}) not a known resource, "
195
+ f"creating stub in {self.user_list}'")
213
196
  if Path(res_filename).is_dir():
214
197
  res_size = directory_size(res_filename)
215
198
  else:
216
199
  res_size = Path(res_filename).stat().st_size
217
- user_database = self.load_resource_list(self.user_list)
200
+ with open(self.user_list, 'r', encoding='utf-8') as f:
201
+ user_database = safe_load(f) or {}
218
202
  if executable not in user_database:
219
203
  user_database[executable] = []
220
204
  resources_found = self.list_available(executable=executable, name=res_name, database=user_database)[0][1]
221
205
  if not resources_found:
222
- self.log.info(f"{executable} resource '{res_name}' ({str(res_filename)}) not a known resource, "
223
- f"creating stub in {self.user_list}'")
224
206
  resdict = {
225
207
  'name': res_name,
226
208
  'url': url if url else '???',
@@ -240,45 +222,20 @@ class OcrdResourceManager:
240
222
  def default_resource_dir(self):
241
223
  return self.location_to_resource_dir('data')
242
224
 
243
- def location_to_resource_dir(self, location: str) -> str:
244
- if location == 'data':
245
- return join(self.xdg_data_home, 'ocrd-resources')
246
- if location == 'system':
247
- return RESOURCES_DIR_SYSTEM
248
- return getcwd()
225
+ def location_to_resource_dir(self, location):
226
+ return '/usr/local/share/ocrd-resources' if location == 'system' else \
227
+ join(self.xdg_data_home, 'ocrd-resources') if location == 'data' else \
228
+ getcwd()
249
229
 
250
- def resource_dir_to_location(self, resource_path: Path) -> str:
230
+ def resource_dir_to_location(self, resource_path):
251
231
  resource_path = str(resource_path)
252
- if resource_path.startswith(RESOURCES_DIR_SYSTEM):
253
- return 'system'
254
- if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')):
255
- return 'data'
256
- if resource_path.startswith(getcwd()):
257
- return 'cwd'
258
- return resource_path
259
-
260
- def build_resource_dest_dir(self, location: str, executable: str) -> Path:
261
- if location == 'module':
262
- base_dir = get_moduledir(executable)
263
- if not base_dir:
264
- base_dir = self.location_to_resource_dir('data')
265
- else:
266
- base_dir = self.location_to_resource_dir(location)
267
- no_subdir = location in ['cwd', 'module']
268
- dest_dir = Path(base_dir) if no_subdir else Path(base_dir, executable)
269
- return dest_dir
232
+ return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \
233
+ 'data' if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')) else \
234
+ 'cwd' if resource_path.startswith(getcwd()) else \
235
+ resource_path
270
236
 
271
237
  @staticmethod
272
- def remove_resource(log: Logger, resource_path: Path):
273
- if resource_path.is_dir():
274
- log.info(f"Removing existing target resource directory {resource_path}")
275
- rmtree(str(resource_path))
276
- else:
277
- log.info(f"Removing existing target resource file {resource_path}")
278
- unlink(str(resource_path))
279
-
280
- @staticmethod
281
- def parameter_usage(name: str, usage: str = 'as-is') -> str:
238
+ def parameter_usage(name, usage='as-is'):
282
239
  if usage == 'as-is':
283
240
  return name
284
241
  elif usage == 'without-extension':
@@ -286,7 +243,8 @@ class OcrdResourceManager:
286
243
  raise ValueError(f"No such usage '{usage}'")
287
244
 
288
245
  @staticmethod
289
- def _download_impl(log: Logger, url: str, filename):
246
+ def _download_impl(url, filename, progress_cb=None, size=None):
247
+ log = getLogger('ocrd.resource_manager._download_impl')
290
248
  log.info(f"Downloading {url} to {filename}")
291
249
  try:
292
250
  gdrive_file_id, is_gdrive_download_link = gparse_url(url, warning=False)
@@ -298,11 +256,13 @@ class OcrdResourceManager:
298
256
  if "Content-Disposition" not in r.headers:
299
257
  url = get_url_from_gdrive_confirmation(r.text)
300
258
  except RuntimeError as e:
301
- log.warning(f"Cannot unwrap Google Drive URL: {e}")
259
+ log.warning("Cannot unwrap Google Drive URL: %s", e)
302
260
  with open(filename, 'wb') as f:
303
261
  with requests.get(url, stream=True) as r:
304
262
  r.raise_for_status()
305
263
  for data in r.iter_content(chunk_size=4096):
264
+ if progress_cb:
265
+ progress_cb(len(data))
306
266
  f.write(data)
307
267
  except Exception as e:
308
268
  rmtree(filename, ignore_errors=True)
@@ -310,18 +270,22 @@ class OcrdResourceManager:
310
270
  raise e
311
271
 
312
272
  @staticmethod
313
- def _copy_file(log: Logger, src, dst):
273
+ def _copy_file(src, dst, progress_cb=None):
274
+ log = getLogger('ocrd.resource_manager._copy_file')
314
275
  log.info(f"Copying file {src} to {dst}")
315
276
  with open(dst, 'wb') as f_out, open(src, 'rb') as f_in:
316
277
  while True:
317
278
  chunk = f_in.read(4096)
318
279
  if chunk:
319
280
  f_out.write(chunk)
281
+ if progress_cb:
282
+ progress_cb(len(chunk))
320
283
  else:
321
284
  break
322
285
 
323
286
  @staticmethod
324
- def _copy_dir(log: Logger, src, dst):
287
+ def _copy_dir(src, dst, progress_cb=None):
288
+ log = getLogger('ocrd.resource_manager._copy_dir')
325
289
  log.info(f"Copying dir recursively from {src} to {dst}")
326
290
  if not Path(src).is_dir():
327
291
  raise ValueError(f"The source is not a directory: {src}")
@@ -329,123 +293,76 @@ class OcrdResourceManager:
329
293
  for child in Path(src).rglob('*'):
330
294
  child_dst = Path(dst) / child.relative_to(src)
331
295
  if Path(child).is_dir():
332
- OcrdResourceManager._copy_dir(log, child, child_dst)
296
+ OcrdResourceManager._copy_dir(child, child_dst, progress_cb)
333
297
  else:
334
- OcrdResourceManager._copy_file(log, child, child_dst)
298
+ OcrdResourceManager._copy_file(child, child_dst, progress_cb)
335
299
 
336
300
  @staticmethod
337
- def _copy_impl(log: Logger, src_filename, filename):
301
+ def _copy_impl(src_filename, filename, progress_cb=None):
302
+ log = getLogger('ocrd.resource_manager._copy_impl')
338
303
  log.info(f"Copying {src_filename} to {filename}")
339
304
  if Path(src_filename).is_dir():
340
- OcrdResourceManager._copy_dir(log, src_filename, filename)
305
+ OcrdResourceManager._copy_dir(src_filename, filename, progress_cb)
341
306
  else:
342
- OcrdResourceManager._copy_file(log, src_filename, filename)
343
-
344
- @staticmethod
345
- def _extract_archive(log: Logger, tempdir: Path, path_in_archive: str, fpath: Path, archive_fname: str):
346
- Path('out').mkdir()
347
- with pushd_popd('out'):
348
- mimetype = guess_media_type(f'../{archive_fname}', fallback='application/octet-stream')
349
- log.info(f"Extracting {mimetype} archive to {tempdir}/out")
350
- if mimetype == 'application/zip':
351
- with ZipFile(f'../{archive_fname}', 'r') as zipf:
352
- zipf.extractall()
353
- elif mimetype in ('application/gzip', 'application/x-xz'):
354
- with open_tarfile(f'../{archive_fname}', 'r:*') as tar:
355
- tar.extractall()
356
- else:
357
- raise RuntimeError(f"Unable to handle extraction of {mimetype} archive")
358
- log.info(f"Copying '{path_in_archive}' from archive to {fpath}")
359
- if Path(path_in_archive).is_dir():
360
- copytree(path_in_archive, str(fpath))
361
- else:
362
- copy(path_in_archive, str(fpath))
363
-
364
- def copy_resource(
365
- self, log: Logger, url: str, fpath: Path, resource_type: str = 'file', path_in_archive: str = '.'
366
- ) -> Path:
367
- """
368
- Copy a local resource to another destination
369
- """
370
- if resource_type == 'archive':
371
- archive_fname = 'download.tar.xx'
372
- with pushd_popd(tempdir=True) as tempdir:
373
- self._copy_impl(log, url, archive_fname)
374
- self._extract_archive(log, tempdir, path_in_archive, fpath, archive_fname)
375
- else:
376
- self._copy_impl(log, url, fpath)
377
- return fpath
378
-
379
- def download_resource(
380
- self, log: Logger, url: str, fpath: Path, resource_type: str = 'file', path_in_archive: str = '.'
381
- ) -> Path:
382
- """
383
- Download a resource by URL to a destination directory
384
- """
385
- if resource_type == 'archive':
386
- archive_fname = 'download.tar.xx'
387
- with pushd_popd(tempdir=True) as tempdir:
388
- self._download_impl(log, url, archive_fname)
389
- self._extract_archive(log, tempdir, path_in_archive, fpath, archive_fname)
390
- else:
391
- self._download_impl(log, url, fpath)
392
- return fpath
307
+ OcrdResourceManager._copy_file(src_filename, filename, progress_cb)
393
308
 
394
309
  # TODO Proper caching (make head request for size, If-Modified etc)
395
- def handle_resource(
396
- self, res_dict: Dict, executable: str, dest_dir: Path, any_url: str, overwrite: bool = False,
397
- resource_type: str = 'file', path_in_archive: str = '.'
398
- ) -> Optional[Path]:
310
+ def download(
311
+ self, executable, url, basedir, overwrite=False, no_subdir=False, name=None, resource_type='file',
312
+ path_in_archive='.', progress_cb=None,
313
+ ):
399
314
  """
400
- Download or Copy a resource by URL to a destination directory
315
+ Download a resource by URL
401
316
  """
402
- log = getLogger('ocrd.resource_manager.handle_resource')
403
- registered = "registered" if "size" in res_dict else "unregistered"
404
- resource_type = res_dict.get('type', resource_type)
405
- resource_name = res_dict.get('name', None)
406
- path_in_archive = res_dict.get('path_in_archive', path_in_archive)
407
-
408
- if resource_type not in RESOURCE_TYPES:
409
- raise ValueError(f"Unknown resource type: {resource_type}, must be one of: {RESOURCE_TYPES}")
410
- if any_url:
411
- res_dict['url'] = any_url
412
- if not resource_name:
413
- url_parsed = urlparse(res_dict['url'])
414
- resource_name = Path(unquote(url_parsed.path)).name
415
- if resource_type == 'archive' and path_in_archive != '.':
416
- resource_name = Path(path_in_archive).name
417
- if res_dict['url'] == '???':
418
- log.warning(f"Skipping user resource {resource_name} since download url is: {res_dict['url']}")
419
- return None
420
-
421
- fpath = Path(dest_dir, resource_name)
317
+ log = getLogger('ocrd.resource_manager.download')
318
+ destdir = Path(basedir) if no_subdir else Path(basedir, executable)
319
+ if not name:
320
+ url_parsed = urlparse(url)
321
+ name = Path(unquote(url_parsed.path)).name
322
+ fpath = Path(destdir, name)
323
+ is_url = url.startswith('https://') or url.startswith('http://')
422
324
  if fpath.exists():
423
325
  if not overwrite:
424
326
  fpath_type = 'Directory' if fpath.is_dir() else 'File'
425
327
  log.warning(f"{fpath_type} {fpath} already exists but --overwrite is not set, skipping the download")
426
328
  # raise FileExistsError(f"{fpath_type} {fpath} already exists but --overwrite is not set")
427
329
  return fpath
428
- self.remove_resource(log, resource_path=fpath)
429
- dest_dir.mkdir(parents=True, exist_ok=True)
430
-
431
- # TODO @mehmedGIT: Consider properly handling cases for invalid URLs.
432
- if res_dict['url'].startswith('https://') or res_dict['url'].startswith('http://'):
433
- log.info(f"Downloading {registered} resource '{resource_name}' ({res_dict['url']})")
434
- if 'size' not in res_dict:
435
- with requests.head(res_dict['url']) as r:
436
- res_dict['size'] = int(r.headers.get('content-length', 0))
437
- fpath = self.download_resource(log, res_dict['url'], fpath, resource_type, path_in_archive)
438
- else:
439
- log.info(f"Copying {registered} resource '{resource_name}' ({res_dict['url']})")
440
- urlpath = Path(res_dict['url'])
441
- res_dict['url'] = str(urlpath.resolve())
442
- res_dict['size'] = directory_size(urlpath) if Path(urlpath).is_dir() else urlpath.stat().st_size
443
- fpath = self.copy_resource(log, res_dict['url'], fpath, resource_type, path_in_archive)
444
-
445
- if registered == 'unregistered':
446
- self.add_to_user_database(executable, fpath, url=res_dict['url'])
447
- self.save_user_list()
448
- log.info(f"Installed resource {res_dict['url']} under {fpath}")
330
+ if fpath.is_dir():
331
+ log.info(f"Removing existing target directory {fpath}")
332
+ rmtree(str(fpath))
333
+ else:
334
+ log.info(f"Removing existing target file {fpath}")
335
+ unlink(str(fpath))
336
+ destdir.mkdir(parents=True, exist_ok=True)
337
+ if resource_type in ('file', 'directory'):
338
+ if is_url:
339
+ self._download_impl(url, fpath, progress_cb)
340
+ else:
341
+ self._copy_impl(url, fpath, progress_cb)
342
+ elif resource_type == 'archive':
343
+ archive_fname = 'download.tar.xx'
344
+ with pushd_popd(tempdir=True) as tempdir:
345
+ if is_url:
346
+ self._download_impl(url, archive_fname, progress_cb)
347
+ else:
348
+ self._copy_impl(url, archive_fname, progress_cb)
349
+ Path('out').mkdir()
350
+ with pushd_popd('out'):
351
+ mimetype = guess_media_type(f'../{archive_fname}', fallback='application/octet-stream')
352
+ log.info(f"Extracting {mimetype} archive to {tempdir}/out")
353
+ if mimetype == 'application/zip':
354
+ with ZipFile(f'../{archive_fname}', 'r') as zipf:
355
+ zipf.extractall()
356
+ elif mimetype in ('application/gzip', 'application/x-xz'):
357
+ with open_tarfile(f'../{archive_fname}', 'r:*') as tar:
358
+ tar.extractall()
359
+ else:
360
+ raise RuntimeError(f"Unable to handle extraction of {mimetype} archive {url}")
361
+ log.info(f"Copying '{path_in_archive}' from archive to {fpath}")
362
+ if Path(path_in_archive).is_dir():
363
+ copytree(path_in_archive, str(fpath))
364
+ else:
365
+ copy(path_in_archive, str(fpath))
449
366
  return fpath
450
367
 
451
368
  def _dedup_database(self, database=None, dedup_key='name'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -16,7 +16,7 @@ Requires-Dist: beanie~=1.7
16
16
  Requires-Dist: click>=7
17
17
  Requires-Dist: cryptography<43.0.0
18
18
  Requires-Dist: Deprecated==1.2.0
19
- Requires-Dist: docker>=7.1.0
19
+ Requires-Dist: docker
20
20
  Requires-Dist: elementpath
21
21
  Requires-Dist: fastapi>=0.78.0
22
22
  Requires-Dist: filetype
@@ -5,7 +5,7 @@ ocrd/mets_server.py,sha256=EL6CMtVjmVPs9--3Vn-JyhU2VDMNw-6QN_XWRKFp6uk,22451
5
5
  ocrd/ocrd-all-tool.json,sha256=EYXmMzP68p3KzL8nUZ16TCX2chQzKkAeISvuXqI_yIw,2094
6
6
  ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
- ocrd/resource_manager.py,sha256=CLwzUnk_uVOauyAPd0KhMjqBHmKNVluvS5QsK5O0m1A,20447
8
+ ocrd/resource_manager.py,sha256=kIWDoKxWH4IJE1gcoTcCRQjYjieCqiQclyuyF6Y9b8A,16813
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
10
  ocrd/workspace.py,sha256=-j3X83K0f4vtd5jwfu6_R53RJ2R8gt1HYpyrv8YP2bg,65661
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
@@ -13,10 +13,10 @@ ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,1197
13
13
  ocrd/cli/__init__.py,sha256=-BiwIakeCkWx0Jd2yX9_ahfdV4VKz_5yqGEJ_2zKakQ,2734
14
14
  ocrd/cli/bashlib.py,sha256=ypFBM3-IULz_IEBx0Y04eGt9VbQWwEWm4ujm9g_hPWY,6009
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
- ocrd/cli/network.py,sha256=LNibNBtHGvtXuEwzFz1bLAfkroLWyMPsadTaNVwzUko,687
16
+ ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
17
17
  ocrd/cli/ocrd_tool.py,sha256=EyD5VdLm2WTzQnR-hZKpn-D4-dsWr2PIE5IoY1O3mfE,7357
18
18
  ocrd/cli/process.py,sha256=8KD0i7LT01H9u5CC1vktYMEVpS67da_rp_09_EOECmw,1233
19
- ocrd/cli/resmgr.py,sha256=h9cSbp3_4sHkBqhWU7cThoUxcIqgrtD81st2nxZdnaw,8024
19
+ ocrd/cli/resmgr.py,sha256=mk8KZweC_7ENAFnC6FvFf7Zv_W1wqJTmk0EMd9XSvf4,10132
20
20
  ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
21
  ocrd/cli/workspace.py,sha256=bsp6YXEgwABIUFbSENmxV1c4oxRwc2L-BpeDPlYfhHE,40501
22
22
  ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
@@ -26,7 +26,7 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=lIvtE8re1VmpHm45u71ltE0QJS8nyd28HhLC7zGSvlo,2691
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=tKN-xyPehFpwV3ztyLcOcuA_fWTNdRKu5E8V_jKOO2g,59400
29
+ ocrd/processor/base.py,sha256=yN_sMfwm2B89wtr2ShNkEtcTjXNqnvAtjM4TbWTUNCk,59929
30
30
  ocrd/processor/helpers.py,sha256=gIc6PdvOS1sR0UkYlrdZopImAXxXglDBNpgNZGWHO7Y,10987
31
31
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,28 +48,26 @@ ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,
48
48
  ocrd_models/report.py,sha256=luZxvzAAQyGYOlRNSJQUIUIANG81iGmBW5ag-uXxKCA,2026
49
49
  ocrd_models/utils.py,sha256=0_WHf5NEn1WC8MKJc6X_RK8gW-70Z09_mslkKOj7uF8,2369
50
50
  ocrd_models/xpath_functions.py,sha256=AwR8tHf56-mmIksnw_GeOQ760sxNHqK92T7z9OfsEEs,1437
51
- ocrd_network/__init__.py,sha256=tLSOf5iTNSWjOWep_xY1sU2YZJu7uv9-3qLMuLrwyoU,433
51
+ ocrd_network/__init__.py,sha256=gMejC614J5PPGgXDKBiQS0jt-Jx8qOrLbWH7zt8x8Gs,374
52
52
  ocrd_network/client.py,sha256=rzqtamZ8krRRy-QTO-AeWH8Lr3HhRiQe2R1-Lovd40g,3020
53
53
  ocrd_network/client_utils.py,sha256=VVZMNBgGznh41exZ78S48X3DDwHcWTuOq-LNdxjRvak,5002
54
- ocrd_network/constants.py,sha256=fxbC4QyeQp38KDRWjqnbfYjt3nUx6xT_mkmDt6PylyY,2033
54
+ ocrd_network/constants.py,sha256=IeNtcU6dqonDE3Zw83_61auhS8X2b8wsjAxYg1zvK-M,1902
55
55
  ocrd_network/database.py,sha256=fcft7vdRDoR7vmPL1xNYTIeOg5DwRPcggwYDYxLy5ik,10706
56
- ocrd_network/logging_utils.py,sha256=mNYaLmKIf2Wg1gCU_FbicJC54mPhYPEVinmB7rHEdUs,2374
56
+ ocrd_network/logging_utils.py,sha256=bO9TQqBXw9CIZEKp8tHXbeE2NuJWMiaQDHzS05b4ajo,2153
57
57
  ocrd_network/param_validators.py,sha256=Jl1VwiPPKJ50k-xEHLdvW-1QDOkJHCiMz4k9Ipqm-Uc,1489
58
58
  ocrd_network/process_helpers.py,sha256=KpkInXsa5bgrxvTOouyMJ0NgJhaz0J9Gjs5sZHBcH64,2373
59
59
  ocrd_network/processing_server.py,sha256=qBiYk4wgTLqhHvbmDWu_F626BfSfyvkoCD-i0ZwsBSE,42109
60
60
  ocrd_network/processing_worker.py,sha256=fhIvmDQAYOkHYtUs5IB8Jk2lOKUTIBk3DskAsFloijA,12591
61
61
  ocrd_network/processor_server.py,sha256=2CD9TlinXk6x1jFjP5VWOXgJe8lAQdxc9zjZuVy3EOw,9931
62
- ocrd_network/resource_manager_server.py,sha256=dFFWsp1O7H3RzdjbQC_fn1SVV1QIjkVbg2l98MMh-HI,6852
63
62
  ocrd_network/server_cache.py,sha256=LpvJ-_Lbaeo4M3t8rZDdm9DAErZr8lDlma6pYc0m7aQ,13149
64
63
  ocrd_network/server_utils.py,sha256=Uge5F2VagPAEpcyU_Qf8AiecObIGXE0ilD8DaK7bTdE,12222
65
64
  ocrd_network/tcp_to_uds_mets_proxy.py,sha256=yRW-O6ihd31gf7xqQBIBb_ZQQgqisMyOdRI216ehq_A,3160
66
65
  ocrd_network/utils.py,sha256=XzPXeSPCVjWLQM540PCpxfJ5hqjJ85_OQBjnf9HlDtE,6759
67
- ocrd_network/cli/__init__.py,sha256=YLt1LAcWrl1YfA_UsLYwWUGYsX84wCd1AZwgsUqjHC8,396
66
+ ocrd_network/cli/__init__.py,sha256=F7YVqxw-9glz6-ghG0Kp5XXeV1-rL1emVSXLCWxdTF0,306
68
67
  ocrd_network/cli/client.py,sha256=gFEXjz-d074CpvimqaM4kJRbJVNYRAOK-jsUl2EAUVs,8424
69
68
  ocrd_network/cli/processing_server.py,sha256=rAci6RsHlZ0c87GuLdfdCQCiGNcDEu4NEEQiwKJqVUo,796
70
69
  ocrd_network/cli/processing_worker.py,sha256=ZuaCkbKV_WKJV7cGOjZ6RLrjjppymnwNCiznFMlclAg,1897
71
70
  ocrd_network/cli/processor_server.py,sha256=Vto7UekFo_g83aHqwDmhge9bhPzk0b7O-L46dSfIpJc,1259
72
- ocrd_network/cli/resmgr_server.py,sha256=sc0VX_RehTbg8Qp7ht_DvVqsrdL5b9Zw3bBgWcAD13A,826
73
71
  ocrd_network/models/__init__.py,sha256=AcpZrenygOudMi47Wfr1UCrpbghq2gP8aMAKodgEIFM,527
74
72
  ocrd_network/models/job.py,sha256=2-E1cKfdTC3Y5AUemCLz1a_t7BlT8BNF6teAC0f8J5o,4442
75
73
  ocrd_network/models/messages.py,sha256=XnyLMX77NchgmtKJRtqtBFsk_sCR4OGEuWm_d3uDkj8,657
@@ -87,18 +85,18 @@ ocrd_network/runtime_data/__init__.py,sha256=3jYkmT4mxMUcpbDaSw7Ld0KTedGEx_5vUQP
87
85
  ocrd_network/runtime_data/config_parser.py,sha256=Vr0FbsqmsoiuhDgZ7KFdeFZj9JvUulcOS2PCRFQQNHY,2364
88
86
  ocrd_network/runtime_data/connection_clients.py,sha256=DZyAvkNyMaIddGJs56s2pMP_fK-XWAtICxk1cjvkWYM,4207
89
87
  ocrd_network/runtime_data/deployer.py,sha256=LkDUG0uJf_V4SteiOM3EWwhKtdANCjmAOEAJJDshN30,9111
90
- ocrd_network/runtime_data/hosts.py,sha256=QkluX5aMCliE8YoH0rJcc9pNprx2Mw5l8_eOq5NJ74s,12319
91
- ocrd_network/runtime_data/network_agents.py,sha256=uJA6bB8O5XUvRc-wG4b4YKuArQOrQXKJMpMq1TC5bco,6289
88
+ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZSj_WE,12185
89
+ ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
92
90
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
93
- ocrd_utils/__init__.py,sha256=mdLZyNz2W8t3ht8JMvbGaEfvQmYqXCWLBFcJXY6_fmA,6020
91
+ ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
94
92
  ocrd_utils/config.py,sha256=1_8j4kpKK5gxhFKObbBEzmq4JazTtKEkGe14Ch_tpw0,11796
95
- ocrd_utils/constants.py,sha256=6lqMLeJdkFBlvGVmGjcExWbRKzNU6QT0kADBb5BkcBc,3464
93
+ ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
96
94
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
97
95
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
98
96
  ocrd_utils/introspect.py,sha256=gfBlmeEFuRmRUSgdSK0jOxRpYqDRXl2IAE6gv2MZ6as,1977
99
97
  ocrd_utils/logging.py,sha256=sHNfih9kBfvKsHdLqMK_ew9Pld1GsRyYlrZHIySujnw,7313
100
98
  ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
101
- ocrd_utils/os.py,sha256=EuxDV1mW6VRrtbUxAYb3G-teV9blICA-cXqbpzJjJfo,13564
99
+ ocrd_utils/os.py,sha256=acRRdDBI8L6BK0Mf773yKEzwdpZSFRBJEKB2crL4EjU,9865
102
100
  ocrd_utils/str.py,sha256=cRgqYILDGOAqWr0qrCrV52I3y4wvpwDVtnBGEUjXNS4,10116
103
101
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
104
102
  ocrd_validators/bagit-profile.yml,sha256=sdQJlSi7TOn1E9WYMOZ1shewJ-i_nPaKmsAFkh28TGY,1011
@@ -108,7 +106,7 @@ ocrd_validators/message_processing.schema.yml,sha256=HL7o96-7ejslVMXcp16sbo5IjfU
108
106
  ocrd_validators/message_result.schema.yml,sha256=G6vt_JgIU7OGSaHj-2Jna6KWQ3bFWol5tnBArWEiVjM,681
109
107
  ocrd_validators/mets.xsd,sha256=0Wrs9bObn0n-yEEIWyguIcUUuuP6KMEjD4I_p1_UlwY,138290
110
108
  ocrd_validators/ocrd_network_message_validator.py,sha256=oafNWOjieBmTHFfYeCtyFFpW1gI0lDT6ycRr5Kvmfq0,561
111
- ocrd_validators/ocrd_tool.schema.yml,sha256=fDNr-QdEOBtYbz8aHmjdOUirPBKr3vfLUDtC88gu75U,10231
109
+ ocrd_validators/ocrd_tool.schema.yml,sha256=BQkRIRDbn9B8gFeVxz_EpNdleh_x2dCtIpJEC4HqFHw,10125
112
110
  ocrd_validators/ocrd_tool_validator.py,sha256=CvfUHC0vJsPnmFBNf4ndQMXX6pcERCjJMAWk2L-T_ww,747
113
111
  ocrd_validators/ocrd_zip_validator.py,sha256=udAImFFliJY3y1FcKwZ52rhpjYB2Iv491hWDxdet8w0,3685
114
112
  ocrd_validators/page.xsd,sha256=abQ8C3gRLPMFm8lH62aTCfvTIWI23TpgEDcaW9YCt7I,85770
@@ -122,9 +120,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
122
120
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
123
121
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
124
122
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
125
- ocrd-3.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
126
- ocrd-3.2.0.dist-info/METADATA,sha256=mOJkMaF0CIVwItP-6wrDIdRzZidf11AMcU5J_HubyxQ,10449
127
- ocrd-3.2.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
128
- ocrd-3.2.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
129
- ocrd-3.2.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
130
- ocrd-3.2.0.dist-info/RECORD,,
123
+ ocrd-3.3.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
124
+ ocrd-3.3.0.dist-info/METADATA,sha256=K8u-P2RKlBlGPPq8h8sv5hLhWi0XgRmhF-Bf-F3Qgpc,10442
125
+ ocrd-3.3.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
126
+ ocrd-3.3.0.dist-info/entry_points.txt,sha256=4hcJ2LkK_OlIabHnKgFit35Ap7b5Lz1Gb4hzkxV0Kiw,152
127
+ ocrd-3.3.0.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
128
+ ocrd-3.3.0.dist-info/RECORD,,
ocrd_network/__init__.py CHANGED
@@ -4,5 +4,4 @@ from .processing_server import ProcessingServer
4
4
  from .processing_worker import ProcessingWorker
5
5
  from .processor_server import ProcessorServer
6
6
  from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
7
- from .resource_manager_server import ResourceManagerServer
8
7
  from .server_cache import CacheLockedPages, CacheProcessingRequests
@@ -2,12 +2,10 @@ from .client import client_cli
2
2
  from .processing_server import processing_server_cli
3
3
  from .processing_worker import processing_worker_cli
4
4
  from .processor_server import processor_server_cli
5
- from .resmgr_server import resource_manager_server_cli
6
5
 
7
6
  __all__ = [
8
7
  'client_cli',
9
8
  'processing_server_cli',
10
9
  'processing_worker_cli',
11
- 'processor_server_cli',
12
- 'resource_manager_server_cli'
10
+ 'processor_server_cli'
13
11
  ]