ocrd 3.7.0__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. ocrd/cli/network.py +2 -0
  2. ocrd/cli/resmgr.py +29 -65
  3. ocrd/constants.py +0 -2
  4. ocrd/ocrd-all-tool.json +25 -0
  5. ocrd/processor/base.py +6 -16
  6. ocrd/processor/builtin/dummy/ocrd-tool.json +25 -0
  7. ocrd/processor/builtin/merge_processor.py +131 -0
  8. ocrd/processor/builtin/param_command_header2unordered.json +7 -0
  9. ocrd/processor/builtin/param_command_heading2unordered.json +7 -0
  10. ocrd/processor/builtin/param_command_lines2orientation.json +6 -0
  11. ocrd/processor/builtin/param_command_page-update-version.json +5 -0
  12. ocrd/processor/builtin/param_command_transkribus-to-prima.json +8 -0
  13. ocrd/processor/builtin/shell_processor.py +128 -0
  14. ocrd/resource_manager.py +213 -124
  15. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/METADATA +22 -3
  16. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/RECORD +34 -26
  17. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/entry_points.txt +2 -0
  18. ocrd_models/ocrd_agent.py +3 -3
  19. ocrd_network/__init__.py +1 -0
  20. ocrd_network/cli/__init__.py +2 -0
  21. ocrd_network/cli/resmgr_server.py +23 -0
  22. ocrd_network/constants.py +3 -0
  23. ocrd_network/logging_utils.py +5 -0
  24. ocrd_network/resource_manager_server.py +182 -0
  25. ocrd_network/runtime_data/connection_clients.py +1 -1
  26. ocrd_network/runtime_data/hosts.py +43 -16
  27. ocrd_network/runtime_data/network_agents.py +15 -1
  28. ocrd_utils/__init__.py +5 -1
  29. ocrd_utils/constants.py +5 -0
  30. ocrd_utils/os.py +141 -61
  31. ocrd_validators/ocrd_tool.schema.yml +7 -4
  32. ocrd/resource_list.yml +0 -61
  33. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/LICENSE +0 -0
  34. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/WHEEL +0 -0
  35. {ocrd-3.7.0.dist-info → ocrd-3.8.1.dist-info}/top_level.txt +0 -0
ocrd_utils/constants.py CHANGED
@@ -5,6 +5,7 @@ from .introspect import dist_version
5
5
  from re import compile as regex_compile
6
6
 
7
7
  __all__ = [
8
+ 'DEFAULT_METS_BASENAME',
8
9
  'EXT_TO_MIME',
9
10
  'LOG_FORMAT',
10
11
  'LOG_TIMEFMT',
@@ -14,7 +15,9 @@ __all__ = [
14
15
  'PIL_TO_MIME',
15
16
  'REGEX_PREFIX',
16
17
  'REGEX_FILE_ID',
18
+ 'RESOURCES_DIR_SYSTEM',
17
19
  'RESOURCE_LOCATIONS',
20
+ 'RESOURCE_TYPES',
18
21
  'VERSION',
19
22
  ]
20
23
 
@@ -108,6 +111,8 @@ LOG_FORMAT = r'%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s'
108
111
  LOG_TIMEFMT = r'%H:%M:%S'
109
112
 
110
113
  RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module']
114
+ RESOURCE_TYPES = ['file', 'directory', 'archive']
115
+ RESOURCES_DIR_SYSTEM = '/usr/local/share/ocrd-resources'
111
116
 
112
117
  DEFAULT_METS_BASENAME = 'mets.xml'
113
118
 
ocrd_utils/os.py CHANGED
@@ -5,9 +5,11 @@ __all__ = [
5
5
  'abspath',
6
6
  'directory_size',
7
7
  'is_file_in_directory',
8
+ 'is_git_url',
8
9
  'get_ocrd_tool_json',
9
10
  'get_moduledir',
10
11
  'get_processor_resource_types',
12
+ 'get_env_locations',
11
13
  'guess_media_type',
12
14
  'pushd_popd',
13
15
  'unzip_file_to_dir',
@@ -15,29 +17,30 @@ __all__ = [
15
17
  'redirect_stderr_and_stdout_to_file',
16
18
  ]
17
19
 
20
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
18
21
  from tempfile import TemporaryDirectory, gettempdir
19
22
  from functools import lru_cache
20
23
  from contextlib import contextmanager, redirect_stderr, redirect_stdout
21
24
  from shutil import which
22
25
  from json import loads
23
26
  from json.decoder import JSONDecodeError
24
- from os import getcwd, chdir, stat, chmod, umask, environ
27
+ from os import getcwd, chdir, stat, chmod, umask, environ, PathLike
25
28
  from pathlib import Path
26
29
  from os.path import abspath as abspath_, join
27
30
  from zipfile import ZipFile
28
- from subprocess import run, PIPE
31
+ from subprocess import run, PIPE, CalledProcessError
29
32
  from mimetypes import guess_type as mimetypes_guess
30
33
  from filetype import guess as filetype_guess
34
+ from fnmatch import filter as apply_glob
31
35
 
32
36
  from atomicwrites import atomic_write as atomic_write_, AtomicWriter
33
37
 
34
- from .constants import EXT_TO_MIME
38
+ from .constants import EXT_TO_MIME, MIME_TO_EXT, RESOURCE_LOCATIONS, RESOURCES_DIR_SYSTEM
35
39
  from .config import config
36
40
  from .logging import getLogger
37
41
  from .introspect import resource_string
38
42
 
39
-
40
- def abspath(url):
43
+ def abspath(url : str) -> str:
41
44
  """
42
45
  Get a full path to a file or file URL
43
46
 
@@ -49,7 +52,7 @@ def abspath(url):
49
52
 
50
53
 
51
54
  @contextmanager
52
- def pushd_popd(newcwd=None, tempdir=False):
55
+ def pushd_popd(newcwd : Union[str, PathLike] = None, tempdir : bool = False) -> Iterator[PathLike]:
53
56
  if newcwd and tempdir:
54
57
  raise Exception("pushd_popd can accept either newcwd or tempdir, not both")
55
58
  try:
@@ -69,8 +72,7 @@ def pushd_popd(newcwd=None, tempdir=False):
69
72
  finally:
70
73
  chdir(oldcwd)
71
74
 
72
-
73
- def unzip_file_to_dir(path_to_zip, output_directory):
75
+ def unzip_file_to_dir(path_to_zip : Union[str, PathLike], output_directory : str) -> None:
74
76
  """
75
77
  Extract a ZIP archive to a directory
76
78
  """
@@ -79,7 +81,16 @@ def unzip_file_to_dir(path_to_zip, output_directory):
79
81
 
80
82
 
81
83
  @lru_cache()
82
- def get_ocrd_tool_json(executable):
84
+ def is_git_url(url: str) -> bool:
85
+ try:
86
+ run(['git', 'ls-remote', '--exit-code', '-q', '-h', url], check=True)
87
+ except CalledProcessError:
88
+ return False
89
+ return True
90
+
91
+
92
+ @lru_cache()
93
+ def get_ocrd_tool_json(executable : str) -> Dict[str, Any]:
83
94
  """
84
95
  Get the ``ocrd-tool`` description of ``executable``.
85
96
  """
@@ -93,12 +104,12 @@ def get_ocrd_tool_json(executable):
93
104
  except (JSONDecodeError, OSError) as e:
94
105
  getLogger('ocrd.utils.get_ocrd_tool_json').error(f'{executable} --dump-json produced invalid JSON: {e}')
95
106
  if 'resource_locations' not in ocrd_tool:
96
- ocrd_tool['resource_locations'] = ['data', 'cwd', 'system', 'module']
107
+ ocrd_tool['resource_locations'] = RESOURCE_LOCATIONS
97
108
  return ocrd_tool
98
109
 
99
110
 
100
111
  @lru_cache()
101
- def get_moduledir(executable):
112
+ def get_moduledir(executable : str) -> str:
102
113
  moduledir = None
103
114
  try:
104
115
  ocrd_all_moduledir = loads(resource_string('ocrd', 'ocrd-all-module-dir.json'))
@@ -110,59 +121,80 @@ def get_moduledir(executable):
110
121
  getLogger('ocrd.utils.get_moduledir').error(f'{executable} --dump-module-dir failed: {e}')
111
122
  return moduledir
112
123
 
124
+ def get_env_locations(executable: str) -> List[str]:
125
+ processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
126
+ if processor_path_var in environ:
127
+ return environ[processor_path_var].split(':')
128
+ return []
113
129
 
114
- def list_resource_candidates(executable, fname, cwd=getcwd(), moduled=None, xdg_data_home=None):
130
+ def list_resource_candidates(executable : str, fname : str, cwd : Optional[str] = None, moduled : Optional[str] = None, xdg_data_home : Optional[str] = None) -> List[str]:
115
131
  """
116
132
  Generate candidates for processor resources according to
117
133
  https://ocr-d.de/en/spec/ocrd_tool#file-parameters
118
134
  """
135
+ if cwd is None:
136
+ cwd = getcwd()
119
137
  candidates = []
120
138
  candidates.append(join(cwd, fname))
121
- xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
122
- processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
123
- if processor_path_var in environ:
124
- candidates += [join(x, fname) for x in environ[processor_path_var].split(':')]
139
+ xdg_data_home = xdg_data_home or config.XDG_DATA_HOME
140
+ for processor_path in get_env_locations(executable):
141
+ candidates.append(join(processor_path, fname))
125
142
  candidates.append(join(xdg_data_home, 'ocrd-resources', executable, fname))
126
- candidates.append(join('/usr/local/share/ocrd-resources', executable, fname))
143
+ candidates.append(join(RESOURCES_DIR_SYSTEM, executable, fname))
127
144
  if moduled:
128
145
  candidates.append(join(moduled, fname))
129
146
  return candidates
130
147
 
131
-
132
- def list_all_resources(executable, moduled=None, xdg_data_home=None):
148
+ def list_all_resources(executable : str, ocrd_tool : Optional[Dict[str, Any]] = None, moduled : Optional[str] = None, xdg_data_home : Optional[str] = None) -> List[str]:
133
149
  """
134
150
  List all processor resources in the filesystem according to
135
- https://ocr-d.de/en/spec/ocrd_tool#file-parameters
151
+ https://ocr-d.de/en/spec/ocrd_tool#resource-parameters
136
152
  """
137
- candidates = []
153
+ xdg_data_home = xdg_data_home or config.XDG_DATA_HOME
154
+ if ocrd_tool is None:
155
+ ocrd_tool = get_ocrd_tool_json(executable)
156
+ # processor we're looking for might not be installed, hence the fallbacks
138
157
  try:
139
- resource_locations = get_ocrd_tool_json(executable)['resource_locations']
140
- except FileNotFoundError:
141
- # processor we're looking for resource_locations of is not installed.
158
+ mimetypes = get_processor_resource_types(executable, ocrd_tool=ocrd_tool)
159
+ except KeyError:
160
+ mimetypes = ['*/*']
161
+ try:
162
+ resource_locations = ocrd_tool['resource_locations']
163
+ except KeyError:
142
164
  # Assume the default
143
- resource_locations = ['data', 'cwd', 'system', 'module']
144
- xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
145
- # XXX cwd would list too many false positives
165
+ resource_locations = RESOURCE_LOCATIONS
166
+ try:
167
+ # fixme: if resources_list contains directories, their "suffix" will interfere
168
+ # (e.g. dirname without dot means we falsely match files without suffix)
169
+ resource_suffixes = [Path(res['name']).suffix
170
+ for res in ocrd_tool['resources']]
171
+ except KeyError:
172
+ resource_suffixes = []
173
+ logger = getLogger('ocrd.utils.list_all_resources')
174
+ candidates = []
175
+ # cwd would list too many false positives:
146
176
  # if 'cwd' in resource_locations:
147
- # cwd_candidate = join(getcwd(), 'ocrd-resources', executable)
148
- # if Path(cwd_candidate).exists():
149
- # candidates.append(cwd_candidate)
150
- processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
151
- if processor_path_var in environ:
152
- for processor_path in environ[processor_path_var].split(':'):
153
- if Path(processor_path).is_dir():
154
- candidates += Path(processor_path).iterdir()
177
+ # cwddir = Path.cwd()
178
+ # candidates.append(cwddir.itertree())
179
+ # but we do not use this anyway:
180
+ # relative paths are tried w.r.t. CWD
181
+ # prior to list_all_resources resolution.
182
+ for processor_path in get_env_locations(executable):
183
+ processor_path = Path(processor_path)
184
+ if processor_path.is_dir():
185
+ candidates += processor_path.iterdir()
155
186
  if 'data' in resource_locations:
156
187
  datadir = Path(xdg_data_home, 'ocrd-resources', executable)
157
188
  if datadir.is_dir():
158
189
  candidates += datadir.iterdir()
159
190
  if 'system' in resource_locations:
160
- systemdir = Path('/usr/local/share/ocrd-resources', executable)
191
+ systemdir = Path(RESOURCES_DIR_SYSTEM, executable)
161
192
  if systemdir.is_dir():
162
193
  candidates += systemdir.iterdir()
163
194
  if 'module' in resource_locations and moduled:
164
195
  # recurse fully
165
- for resource in itertree(Path(moduled)):
196
+ moduled = Path(moduled)
197
+ for resource in moduled.iterdir():
166
198
  if resource.is_dir():
167
199
  continue
168
200
  if any(resource.match(pattern) for pattern in
@@ -170,18 +202,66 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None):
170
202
  # code and data; `is_resource()` only singles out
171
203
  # files over directories; but we want data files only
172
204
  # todo: more code and cache exclusion patterns!
173
- ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
205
+ ['*.py', '*.py[cod]', '*~', '.*.swp', '*.swo',
206
+ '__pycache__/*', '*.egg-info/*', '*.egg',
207
+ 'copyright.txt', 'LICENSE*', 'README.md', 'MANIFEST',
208
+ 'TAGS', '.DS_Store',
209
+ # C extensions
210
+ '*.so',
211
+ # translations
212
+ '*.mo', '*.pot',
213
+ '*.log', '*.orig', '*.BAK',
214
+ '.git/*',
215
+ # our stuff
216
+ 'ocrd-tool.json',
174
217
  'environment.pickle', 'resource_list.yml']):
218
+ logger.debug("ignoring module candidate '%s'", resource)
175
219
  continue
176
220
  candidates.append(resource)
177
- # recurse once
178
- for parent in candidates:
179
- if parent.is_dir() and parent.name != '.git':
180
- candidates += parent.iterdir()
181
- return sorted([str(x) for x in candidates])
182
-
183
-
184
- def get_processor_resource_types(executable, ocrd_tool=None):
221
+ if mimetypes != ['*/*']:
222
+ logger.debug("matching candidates for %s by content-type %s", executable, str(mimetypes))
223
+ def valid_resource_type(path):
224
+ if '*/*' in mimetypes:
225
+ return True
226
+ if path.is_dir():
227
+ if not 'text/directory' in mimetypes:
228
+ logger.debug("ignoring directory candidate '%s'", path)
229
+ return False
230
+ if path.name in ['.git']:
231
+ logger.debug("ignoring directory candidate '%s'", path)
232
+ return False
233
+ return True
234
+ if not path.is_file():
235
+ logger.warning("ignoring non-file, non-directory candidate '%s'", path)
236
+ return False
237
+ res_mimetype = guess_media_type(path, fallback='')
238
+ if res_mimetype == 'application/json':
239
+ # always accept, regardless of configured mimetypes:
240
+ # needed for distributing or sharing parameter preset files
241
+ return True
242
+ if ['text/directory'] == mimetypes:
243
+ logger.debug("ignoring non-directory candidate '%s'", path)
244
+ return False
245
+ if 'application/octet-stream' in mimetypes:
246
+ # catch-all type - do not enforce anything
247
+ return True
248
+ if path.suffix in resource_suffixes:
249
+ return True
250
+ if any(path.suffix == MIME_TO_EXT.get(mime, None)
251
+ for mime in mimetypes):
252
+ return True
253
+ if not res_mimetype:
254
+ logger.warning("cannot determine content type of candidate '%s'", path)
255
+ return True
256
+ if any(apply_glob([res_mimetype], mime)
257
+ for mime in mimetypes):
258
+ return True
259
+ logger.debug("ignoring %s candidate '%s'", res_mimetype, path)
260
+ return False
261
+ candidates = sorted(filter(valid_resource_type, candidates))
262
+ return map(str, candidates)
263
+
264
+ def get_processor_resource_types(executable : str, ocrd_tool : Optional[Dict[str, Any]] = None) -> List[str]:
185
265
  """
186
266
  Determine what type of resource parameters a processor needs.
187
267
 
@@ -193,13 +273,16 @@ def get_processor_resource_types(executable, ocrd_tool=None):
193
273
  if not which(executable):
194
274
  return ['*/*']
195
275
  ocrd_tool = get_ocrd_tool_json(executable)
196
- if not next((True for p in ocrd_tool.get('parameters', {}).values() if 'content-type' in p), False):
197
- # None of the parameters for this processor are resources (or not
198
- # the resource parameters are not properly declared, so output both
199
- # directories and files
276
+ mime_types = [mime
277
+ for param in ocrd_tool.get('parameters', {}).values()
278
+ if param['type'] == 'string' and param.get('format', '') == 'uri' and 'content-type' in param
279
+ for mime in param['content-type'].split(',')]
280
+ if not len(mime_types):
281
+ # None of the parameters for this processor are resources
282
+ # (or the parameters' resource types are not properly declared,)
283
+ # so output both directories and files
200
284
  return ['*/*']
201
- return [p['content-type'] for p in ocrd_tool['parameters'].values()
202
- if 'content-type' in p]
285
+ return mime_types
203
286
 
204
287
 
205
288
  # ht @pabs3
@@ -220,12 +303,12 @@ class AtomicWriterPerms(AtomicWriter):
220
303
 
221
304
 
222
305
  @contextmanager
223
- def atomic_write(fpath):
306
+ def atomic_write(fpath : str) -> Iterator[str]:
224
307
  with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f:
225
308
  yield f
226
309
 
227
310
 
228
- def is_file_in_directory(directory, file):
311
+ def is_file_in_directory(directory : Union[str, PathLike], file : Union[str, PathLike]) -> bool:
229
312
  """
230
313
  Return True if ``file`` is in ``directory`` (by checking that all components of ``directory`` are in ``file.parts``)
231
314
  """
@@ -233,8 +316,7 @@ def is_file_in_directory(directory, file):
233
316
  file = Path(file)
234
317
  return list(file.parts)[:len(directory.parts)] == list(directory.parts)
235
318
 
236
-
237
- def itertree(path):
319
+ def itertree(path : Union[str, PathLike]) -> PathLike:
238
320
  """
239
321
  Generate a list of paths by recursively enumerating ``path``
240
322
  """
@@ -245,16 +327,14 @@ def itertree(path):
245
327
  yield from itertree(subpath)
246
328
  yield path
247
329
 
248
-
249
- def directory_size(path):
330
+ def directory_size(path : Union[str, PathLike]) -> int:
250
331
  """
251
332
  Calculates size of all files in directory ``path``
252
333
  """
253
334
  path = Path(path)
254
335
  return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
255
336
 
256
-
257
- def guess_media_type(input_file: str, fallback: str = None, application_xml: str = 'application/xml'):
337
+ def guess_media_type(input_file : str, fallback : Optional[str] = None, application_xml : str = 'application/xml') -> str:
258
338
  """
259
339
  Guess the media type of a file path
260
340
  """
@@ -266,7 +346,7 @@ def guess_media_type(input_file: str, fallback: str = None, application_xml: str
266
346
  if mimetype is None:
267
347
  mimetype = EXT_TO_MIME.get(''.join(Path(input_file).suffixes), fallback)
268
348
  if mimetype is None:
269
- raise ValueError(f"Could not determine MIME type of {input_file}")
349
+ raise ValueError("Could not determine MIME type of input_file '%s'", str(input_file))
270
350
  if mimetype == 'application/xml':
271
351
  mimetype = application_xml
272
352
  return mimetype
@@ -142,18 +142,21 @@ properties:
142
142
  description: List the allowed values if a fixed list.
143
143
  content-type:
144
144
  type: string
145
- default: 'application/octet-stream'
146
145
  description: >
146
+ If parameter is reference to file (type=string format=uri):
147
147
  The media type of resources this processor expects for
148
148
  this parameter. Most processors use files for resources
149
149
  (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`)
150
150
  while others use directories of files (e.g. `default` for
151
- `ocrd-eynollah-segment`). If a parameter requires
152
- directories, it must set `content-type` to
151
+ `ocrd-eynollah-segment`).
152
+ If a parameter requires directories, it must set this to
153
153
  `text/directory`.
154
154
  cacheable:
155
155
  type: boolean
156
- description: "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change."
156
+ description: >
157
+ If parameter is reference to file (type=string format=uri):
158
+ Whether the file should be cached, e.g. because it is large
159
+ and won't change.
157
160
  default: false
158
161
  description:
159
162
  description: Concise description of what the tool does
ocrd/resource_list.yml DELETED
@@ -1,61 +0,0 @@
1
- # List available resources by processor for "ocrd resmgr"
2
- ocrd-calamari-recognize:
3
- # XXX disabled since older ocrd_calamari versions don't support resource resolving
4
- #- url: https://qurator-data.de/calamari-models/GT4HistOCR/2019-07-22T15_49+0200/model.tar.xz
5
- # type: archive
6
- # name: qurator-gt4histocr-0.3
7
- # description: Calamari model trained with GT4HistOCR
8
- # size: 116439072
9
- # path_in_archive: '.'
10
- # version_range: '< 1.0.0'
11
- - url: https://qurator-data.de/calamari-models/GT4HistOCR/2019-12-11T11_10+0100/model.tar.xz
12
- type: archive
13
- name: qurator-gt4histocr-1.0
14
- description: Calamari model trained with GT4HistOCR
15
- size: 90275264
16
- path_in_archive: '.'
17
- version_range: '>= 1.0.0'
18
- - url: https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_fraktur19-1.tar.gz
19
- type: archive
20
- name: zpd-fraktur19
21
- description: Model trained on 19th century german fraktur
22
- path_in_archive: 'c1_fraktur19-1'
23
- size: 86009886
24
- version_range: '>= 1.0.0'
25
- - url: https://github.com/Calamari-OCR/calamari_models_experimental/releases/download/v0.0.1-pre1/c1_latin-script-hist-3.tar.gz
26
- type: archive
27
- name: zpd-latin-script-hist-3
28
- path_in_archive: 'c1_latin-script-hist-3'
29
- description: Model trained on historical latin-script texts
30
- size: 88416863
31
- version_range: '>= 1.0.0'
32
- ocrd-cis-ocropy-recognize:
33
- - url: https://github.com/zuphilip/ocropy-models/raw/master/en-default.pyrnn.gz
34
- name: en-default.pyrnn.gz
35
- description: Default ocropy model
36
- size: 83826134
37
- - url: https://github.com/zuphilip/ocropy-models/raw/master/fraktur.pyrnn.gz
38
- name: fraktur.pyrnn.gz
39
- description: Default ocropy fraktur model
40
- size: 43882365
41
- - url: https://github.com/jze/ocropus-model_fraktur/raw/master/fraktur.pyrnn.gz
42
- name: fraktur-jze.pyrnn.gz
43
- description: ocropy fraktur model by github.com/jze
44
- size: 2961298
45
- - url: https://github.com/chreul/OCR_Testdata_EarlyPrintedBooks/raw/master/LatinHist-98000.pyrnn.gz
46
- name: LatinHist.pyrnn.gz
47
- description: ocropy historical latin model by github.com/chreul
48
- size: 16989864
49
- ocrd-sbb-binarize:
50
- - url: https://qurator-data.de/sbb_binarization/2021-03-09/models.tar.gz
51
- description: updated default models provided by github.com/qurator-spk
52
- name: default-2021-03-09
53
- type: archive
54
- path_in_archive: models
55
- size: 133363179
56
- - url: https://qurator-data.de/sbb_binarization/models.tar.gz
57
- description: default models provided by github.com/qurator-spk
58
- name: default
59
- type: archive
60
- path_in_archive: models
61
- size: 1654623597
File without changes
File without changes