ocrd 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd_utils/os.py CHANGED
@@ -8,6 +8,7 @@ __all__ = [
8
8
  'get_ocrd_tool_json',
9
9
  'get_moduledir',
10
10
  'get_processor_resource_types',
11
+ 'get_env_locations',
11
12
  'guess_media_type',
12
13
  'pushd_popd',
13
14
  'unzip_file_to_dir',
@@ -15,28 +16,30 @@ __all__ = [
15
16
  'redirect_stderr_and_stdout_to_file',
16
17
  ]
17
18
 
19
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
18
20
  from tempfile import TemporaryDirectory, gettempdir
19
21
  from functools import lru_cache
20
22
  from contextlib import contextmanager, redirect_stderr, redirect_stdout
21
23
  from shutil import which
22
24
  from json import loads
23
25
  from json.decoder import JSONDecodeError
24
- from os import getcwd, chdir, stat, chmod, umask, environ
26
+ from os import getcwd, chdir, stat, chmod, umask, environ, PathLike
25
27
  from pathlib import Path
26
28
  from os.path import abspath as abspath_, join
27
29
  from zipfile import ZipFile
28
30
  from subprocess import run, PIPE
29
31
  from mimetypes import guess_type as mimetypes_guess
30
32
  from filetype import guess as filetype_guess
33
+ from fnmatch import filter as apply_glob
31
34
 
32
35
  from atomicwrites import atomic_write as atomic_write_, AtomicWriter
33
36
 
34
- from .constants import EXT_TO_MIME
37
+ from .constants import EXT_TO_MIME, MIME_TO_EXT, RESOURCE_LOCATIONS, RESOURCES_DIR_SYSTEM
35
38
  from .config import config
36
39
  from .logging import getLogger
37
40
  from .introspect import resource_string
38
41
 
39
- def abspath(url):
42
+ def abspath(url : str) -> str:
40
43
  """
41
44
  Get a full path to a file or file URL
42
45
 
@@ -47,7 +50,7 @@ def abspath(url):
47
50
  return abspath_(url)
48
51
 
49
52
  @contextmanager
50
- def pushd_popd(newcwd=None, tempdir=False):
53
+ def pushd_popd(newcwd : Union[str, PathLike] = None, tempdir : bool = False) -> Iterator[PathLike]:
51
54
  if newcwd and tempdir:
52
55
  raise Exception("pushd_popd can accept either newcwd or tempdir, not both")
53
56
  try:
@@ -67,7 +70,7 @@ def pushd_popd(newcwd=None, tempdir=False):
67
70
  finally:
68
71
  chdir(oldcwd)
69
72
 
70
- def unzip_file_to_dir(path_to_zip, output_directory):
73
+ def unzip_file_to_dir(path_to_zip : Union[str, PathLike], output_directory : str) -> None:
71
74
  """
72
75
  Extract a ZIP archive to a directory
73
76
  """
@@ -75,7 +78,7 @@ def unzip_file_to_dir(path_to_zip, output_directory):
75
78
  z.extractall(output_directory)
76
79
 
77
80
  @lru_cache()
78
- def get_ocrd_tool_json(executable):
81
+ def get_ocrd_tool_json(executable : str) -> Dict[str, Any]:
79
82
  """
80
83
  Get the ``ocrd-tool`` description of ``executable``.
81
84
  """
@@ -90,11 +93,11 @@ def get_ocrd_tool_json(executable):
90
93
  except (JSONDecodeError, OSError) as e:
91
94
  getLogger('ocrd.utils.get_ocrd_tool_json').error(f'{executable} --dump-json produced invalid JSON: {e}')
92
95
  if 'resource_locations' not in ocrd_tool:
93
- ocrd_tool['resource_locations'] = ['data', 'cwd', 'system', 'module']
96
+ ocrd_tool['resource_locations'] = RESOURCE_LOCATIONS
94
97
  return ocrd_tool
95
98
 
96
99
  @lru_cache()
97
- def get_moduledir(executable):
100
+ def get_moduledir(executable : str) -> str:
98
101
  moduledir = None
99
102
  try:
100
103
  ocrd_all_moduledir = loads(resource_string('ocrd', 'ocrd-all-module-dir.json'))
@@ -106,57 +109,80 @@ def get_moduledir(executable):
106
109
  getLogger('ocrd.utils.get_moduledir').error(f'{executable} --dump-module-dir failed: {e}')
107
110
  return moduledir
108
111
 
109
- def list_resource_candidates(executable, fname, cwd=getcwd(), moduled=None, xdg_data_home=None):
112
+ def get_env_locations(executable: str) -> List[str]:
113
+ processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
114
+ if processor_path_var in environ:
115
+ return environ[processor_path_var].split(':')
116
+ return []
117
+
118
+ def list_resource_candidates(executable : str, fname : str, cwd : Optional[str] = None, moduled : Optional[str] = None, xdg_data_home : Optional[str] = None) -> List[str]:
110
119
  """
111
120
  Generate candidates for processor resources according to
112
121
  https://ocr-d.de/en/spec/ocrd_tool#file-parameters
113
122
  """
123
+ if cwd is None:
124
+ cwd = getcwd()
114
125
  candidates = []
115
126
  candidates.append(join(cwd, fname))
116
- xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
117
- processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
118
- if processor_path_var in environ:
119
- candidates += [join(x, fname) for x in environ[processor_path_var].split(':')]
127
+ xdg_data_home = xdg_data_home or config.XDG_DATA_HOME
128
+ for processor_path in get_env_locations(executable):
129
+ candidates.append(join(processor_path, fname))
120
130
  candidates.append(join(xdg_data_home, 'ocrd-resources', executable, fname))
121
- candidates.append(join('/usr/local/share/ocrd-resources', executable, fname))
131
+ candidates.append(join(RESOURCES_DIR_SYSTEM, executable, fname))
122
132
  if moduled:
123
133
  candidates.append(join(moduled, fname))
124
134
  return candidates
125
135
 
126
- def list_all_resources(executable, moduled=None, xdg_data_home=None):
136
+ def list_all_resources(executable : str, ocrd_tool : Optional[Dict[str, Any]] = None, moduled : Optional[str] = None, xdg_data_home : Optional[str] = None) -> List[str]:
127
137
  """
128
138
  List all processor resources in the filesystem according to
129
- https://ocr-d.de/en/spec/ocrd_tool#file-parameters
139
+ https://ocr-d.de/en/spec/ocrd_tool#resource-parameters
130
140
  """
131
- candidates = []
141
+ xdg_data_home = xdg_data_home or config.XDG_DATA_HOME
142
+ if ocrd_tool is None:
143
+ ocrd_tool = get_ocrd_tool_json(executable)
144
+ # processor we're looking for might not be installed, hence the fallbacks
132
145
  try:
133
- resource_locations = get_ocrd_tool_json(executable)['resource_locations']
134
- except FileNotFoundError:
135
- # processor we're looking for resource_locations of is not installed.
146
+ mimetypes = get_processor_resource_types(executable, ocrd_tool=ocrd_tool)
147
+ except KeyError:
148
+ mimetypes = ['*/*']
149
+ try:
150
+ resource_locations = ocrd_tool['resource_locations']
151
+ except KeyError:
136
152
  # Assume the default
137
- resource_locations = ['data', 'cwd', 'system', 'module']
138
- xdg_data_home = config.XDG_DATA_HOME if not xdg_data_home else xdg_data_home
139
- # XXX cwd would list too many false positives
153
+ resource_locations = RESOURCE_LOCATIONS
154
+ try:
155
+ # fixme: if resources_list contains directories, their "suffix" will interfere
156
+ # (e.g. dirname without dot means we falsely match files without suffix)
157
+ resource_suffixes = [Path(res['name']).suffix
158
+ for res in ocrd_tool['resources']]
159
+ except KeyError:
160
+ resource_suffixes = []
161
+ logger = getLogger('ocrd.utils.list_all_resources')
162
+ candidates = []
163
+ # cwd would list too many false positives:
140
164
  # if 'cwd' in resource_locations:
141
- # cwd_candidate = join(getcwd(), 'ocrd-resources', executable)
142
- # if Path(cwd_candidate).exists():
143
- # candidates.append(cwd_candidate)
144
- processor_path_var = '%s_PATH' % executable.replace('-', '_').upper()
145
- if processor_path_var in environ:
146
- for processor_path in environ[processor_path_var].split(':'):
147
- if Path(processor_path).is_dir():
148
- candidates += Path(processor_path).iterdir()
165
+ # cwddir = Path.cwd()
166
+ # candidates.append(cwddir.itertree())
167
+ # but we do not use this anyway:
168
+ # relative paths are tried w.r.t. CWD
169
+ # prior to list_all_resources resolution.
170
+ for processor_path in get_env_locations(executable):
171
+ processor_path = Path(processor_path)
172
+ if processor_path.is_dir():
173
+ candidates += processor_path.iterdir()
149
174
  if 'data' in resource_locations:
150
175
  datadir = Path(xdg_data_home, 'ocrd-resources', executable)
151
176
  if datadir.is_dir():
152
177
  candidates += datadir.iterdir()
153
178
  if 'system' in resource_locations:
154
- systemdir = Path('/usr/local/share/ocrd-resources', executable)
179
+ systemdir = Path(RESOURCES_DIR_SYSTEM, executable)
155
180
  if systemdir.is_dir():
156
181
  candidates += systemdir.iterdir()
157
182
  if 'module' in resource_locations and moduled:
158
183
  # recurse fully
159
- for resource in itertree(Path(moduled)):
184
+ moduled = Path(moduled)
185
+ for resource in moduled.iterdir():
160
186
  if resource.is_dir():
161
187
  continue
162
188
  if any(resource.match(pattern) for pattern in
@@ -164,17 +190,66 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None):
164
190
  # code and data; `is_resource()` only singles out
165
191
  # files over directories; but we want data files only
166
192
  # todo: more code and cache exclusion patterns!
167
- ['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
193
+ ['*.py', '*.py[cod]', '*~', '.*.swp', '*.swo',
194
+ '__pycache__/*', '*.egg-info/*', '*.egg',
195
+ 'copyright.txt', 'LICENSE*', 'README.md', 'MANIFEST',
196
+ 'TAGS', '.DS_Store',
197
+ # C extensions
198
+ '*.so',
199
+ # translations
200
+ '*.mo', '*.pot',
201
+ '*.log', '*.orig', '*.BAK',
202
+ '.git/*',
203
+ # our stuff
204
+ 'ocrd-tool.json',
168
205
  'environment.pickle', 'resource_list.yml', 'lib.bash']):
206
+ logger.debug("ignoring module candidate '%s'", resource)
169
207
  continue
170
208
  candidates.append(resource)
171
- # recurse once
172
- for parent in candidates:
173
- if parent.is_dir() and parent.name != '.git':
174
- candidates += parent.iterdir()
175
- return sorted([str(x) for x in candidates])
209
+ if mimetypes != ['*/*']:
210
+ logger.debug("matching candidates for %s by content-type %s", executable, str(mimetypes))
211
+ def valid_resource_type(path):
212
+ if '*/*' in mimetypes:
213
+ return True
214
+ if path.is_dir():
215
+ if not 'text/directory' in mimetypes:
216
+ logger.debug("ignoring directory candidate '%s'", path)
217
+ return False
218
+ if path.name in ['.git']:
219
+ logger.debug("ignoring directory candidate '%s'", path)
220
+ return False
221
+ return True
222
+ if not path.is_file():
223
+ logger.warning("ignoring non-file, non-directory candidate '%s'", path)
224
+ return False
225
+ res_mimetype = guess_media_type(path, fallback='')
226
+ if res_mimetype == 'application/json':
227
+ # always accept, regardless of configured mimetypes:
228
+ # needed for distributing or sharing parameter preset files
229
+ return True
230
+ if ['text/directory'] == mimetypes:
231
+ logger.debug("ignoring non-directory candidate '%s'", path)
232
+ return False
233
+ if 'application/octet-stream' in mimetypes:
234
+ # catch-all type - do not enforce anything
235
+ return True
236
+ if path.suffix in resource_suffixes:
237
+ return True
238
+ if any(path.suffix == MIME_TO_EXT.get(mime, None)
239
+ for mime in mimetypes):
240
+ return True
241
+ if not res_mimetype:
242
+ logger.warning("cannot determine content type of candidate '%s'", path)
243
+ return True
244
+ if any(apply_glob([res_mimetype], mime)
245
+ for mime in mimetypes):
246
+ return True
247
+ logger.debug("ignoring %s candidate '%s'", res_mimetype, path)
248
+ return False
249
+ candidates = sorted(filter(valid_resource_type, candidates))
250
+ return map(str, candidates)
176
251
 
177
- def get_processor_resource_types(executable, ocrd_tool=None):
252
+ def get_processor_resource_types(executable : str, ocrd_tool : Optional[Dict[str, Any]] = None) -> List[str]:
178
253
  """
179
254
  Determine what type of resource parameters a processor needs.
180
255
 
@@ -186,13 +261,16 @@ def get_processor_resource_types(executable, ocrd_tool=None):
186
261
  if not which(executable):
187
262
  return ['*/*']
188
263
  ocrd_tool = get_ocrd_tool_json(executable)
189
- if not next((True for p in ocrd_tool.get('parameters', {}).values() if 'content-type' in p), False):
190
- # None of the parameters for this processor are resources (or not
191
- # the resource parameters are not properly declared, so output both
192
- # directories and files
264
+ mime_types = [mime
265
+ for param in ocrd_tool.get('parameters', {}).values()
266
+ if param['type'] == 'string' and param.get('format', '') == 'uri' and 'content-type' in param
267
+ for mime in param['content-type'].split(',')]
268
+ if not len(mime_types):
269
+ # None of the parameters for this processor are resources
270
+ # (or the parameters' resource types are not properly declared,)
271
+ # so output both directories and files
193
272
  return ['*/*']
194
- return [p['content-type'] for p in ocrd_tool['parameters'].values()
195
- if 'content-type' in p]
273
+ return mime_types
196
274
 
197
275
  # ht @pabs3
198
276
  # https://github.com/untitaker/python-atomicwrites/issues/42
@@ -211,12 +289,12 @@ class AtomicWriterPerms(AtomicWriter):
211
289
  return f
212
290
 
213
291
  @contextmanager
214
- def atomic_write(fpath):
292
+ def atomic_write(fpath : str) -> Iterator[str]:
215
293
  with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f:
216
294
  yield f
217
295
 
218
296
 
219
- def is_file_in_directory(directory, file):
297
+ def is_file_in_directory(directory : Union[str, PathLike], file : Union[str, PathLike]) -> bool:
220
298
  """
221
299
  Return True if ``file`` is in ``directory`` (by checking that all components of ``directory`` are in ``file.parts``)
222
300
  """
@@ -224,7 +302,7 @@ def is_file_in_directory(directory, file):
224
302
  file = Path(file)
225
303
  return list(file.parts)[:len(directory.parts)] == list(directory.parts)
226
304
 
227
- def itertree(path):
305
+ def itertree(path : Union[str, PathLike]) -> PathLike:
228
306
  """
229
307
  Generate a list of paths by recursively enumerating ``path``
230
308
  """
@@ -235,14 +313,14 @@ def itertree(path):
235
313
  yield from itertree(subpath)
236
314
  yield path
237
315
 
238
- def directory_size(path):
316
+ def directory_size(path : Union[str, PathLike]) -> int:
239
317
  """
240
318
  Calculates size of all files in directory ``path``
241
319
  """
242
320
  path = Path(path)
243
321
  return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
244
322
 
245
- def guess_media_type(input_file : str, fallback : str = None, application_xml : str = 'application/xml'):
323
+ def guess_media_type(input_file : str, fallback : Optional[str] = None, application_xml : str = 'application/xml') -> str:
246
324
  """
247
325
  Guess the media type of a file path
248
326
  """
@@ -254,7 +332,7 @@ def guess_media_type(input_file : str, fallback : str = None, application_xml :
254
332
  if mimetype is None:
255
333
  mimetype = EXT_TO_MIME.get(''.join(Path(input_file).suffixes), fallback)
256
334
  if mimetype is None:
257
- raise ValueError("Could not determine MIME type of input_file must")
335
+ raise ValueError("Could not determine MIME type of input_file '%s'", str(input_file))
258
336
  if mimetype == 'application/xml':
259
337
  mimetype = application_xml
260
338
  return mimetype
@@ -142,18 +142,21 @@ properties:
142
142
  description: List the allowed values if a fixed list.
143
143
  content-type:
144
144
  type: string
145
- default: 'application/octet-stream'
146
145
  description: >
146
+ If parameter is reference to file (type=string format=uri):
147
147
  The media type of resources this processor expects for
148
148
  this parameter. Most processors use files for resources
149
149
  (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`)
150
150
  while others use directories of files (e.g. `default` for
151
- `ocrd-eynollah-segment`). If a parameter requires
152
- directories, it must set `content-type` to
151
+ `ocrd-eynollah-segment`).
152
+ If a parameter requires directories, it must set this to
153
153
  `text/directory`.
154
154
  cacheable:
155
155
  type: boolean
156
- description: "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change."
156
+ description: >
157
+ If parameter is reference to file (type=string format=uri):
158
+ Whether the file should be cached, e.g. because it is large
159
+ and won't change.
157
160
  default: false
158
161
  description:
159
162
  description: Concise description of what the tool does
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
 
8
8
  from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename, DEFAULT_METS_BASENAME
9
9
  from ocrd_models import ValidationReport
10
+ from ocrd_models.constants import PAGE_ALTIMG_FEATURES
10
11
  from ocrd_modelfactory import page_from_file
11
12
 
12
13
  from .constants import FILE_GROUP_CATEGORIES, FILE_GROUP_PREFIX
@@ -98,6 +99,9 @@ class WorkspaceValidator():
98
99
  self.page_coordinate_consistency = page_coordinate_consistency
99
100
  # there will be more options to come
100
101
  self.page_checks = [check for check in ['mets_fileid_page_pcgtsid',
102
+ 'imagefilename',
103
+ 'alternativeimage_filename',
104
+ 'alternativeimage_comments',
101
105
  'dimension',
102
106
  'page',
103
107
  'page_xsd']
@@ -118,7 +122,7 @@ class WorkspaceValidator():
118
122
  mets_url (string): URL of the METS file
119
123
  src_dir (string, None): Directory containing mets file
120
124
  skip (list): Validation checks to omit. One or more of
121
- 'mets_unique_identifier', 'mets_file_group_names',
125
+ 'mets_unique_identifier',
122
126
  'mets_files', 'pixel_density', 'dimension', 'url',
123
127
  'multipage', 'page', 'page_xsd', 'mets_xsd',
124
128
  'mets_fileid_page_pcgtsid'
@@ -145,8 +149,6 @@ class WorkspaceValidator():
145
149
  try:
146
150
  if 'mets_unique_identifier' not in self.skip:
147
151
  self._validate_mets_unique_identifier()
148
- if 'mets_file_group_names' not in self.skip:
149
- self._validate_mets_file_group_names()
150
152
  if 'mets_files' not in self.skip:
151
153
  self._validate_mets_files()
152
154
  if 'pixel_density' not in self.skip:
@@ -192,7 +194,11 @@ class WorkspaceValidator():
192
194
  self.workspace.download_file(f)
193
195
  page = page_from_file(f).get_Page()
194
196
  imageFilename = page.imageFilename
195
- if not self.mets.find_files(url=imageFilename, **self.find_kwargs):
197
+ if is_local_filename(imageFilename):
198
+ kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
199
+ else:
200
+ kwargs = dict(url=imageFilename, **self.find_kwargs)
201
+ if not self.mets.find_files(**kwargs):
196
202
  self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
197
203
  if is_local_filename(imageFilename) and not Path(imageFilename).exists():
198
204
  self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
@@ -295,6 +301,9 @@ class WorkspaceValidator():
295
301
  if f.url and 'url' not in self.skip:
296
302
  if re.match(r'^file:/[^/]', f.url):
297
303
  self.report.add_error(f"File '{f.ID}' has an invalid (Java-specific) file URL '{f.url}'")
304
+ elif ':' not in f.url:
305
+ self.report.add_error(f"File '{f.ID}' has an invalid (non-URI) file URL '{f.url}'")
306
+ continue
298
307
  scheme = f.url[0:f.url.index(':')]
299
308
  if scheme not in ('http', 'https', 'file'):
300
309
  self.report.add_warning(f"File '{f.ID}' has non-HTTP, non-file URL '{f.url}'")
@@ -321,17 +330,43 @@ class WorkspaceValidator():
321
330
  pcgts = page_from_file(f)
322
331
  page = pcgts.get_Page()
323
332
  if 'dimension' in self.page_checks:
324
- _, _, exif = self.workspace.image_from_page(page, f.pageId)
325
- if page.imageHeight != exif.height:
326
- self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {exif.height})")
327
- if page.imageWidth != exif.width:
328
- self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {exif.width})")
333
+ img = self.workspace._resolve_image_as_pil(page.imageFilename)
334
+ if page.imageHeight != img.height:
335
+ self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {img.height})")
336
+ if page.imageWidth != img.width:
337
+ self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {img.width})")
329
338
  if 'imagefilename' in self.page_checks:
330
339
  imageFilename = page.imageFilename
331
- if not self.mets.find_files(url=imageFilename):
340
+ if is_local_filename(imageFilename):
341
+ kwargs = dict(local_filename=imageFilename, **self.find_kwargs)
342
+ else:
343
+ kwargs = dict(url=imageFilename, **self.find_kwargs)
344
+ if not self.mets.find_files(**kwargs):
332
345
  self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
333
346
  if is_local_filename(imageFilename) and not Path(imageFilename).exists():
334
347
  self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
348
+ if 'alternativeimage_filename' in self.page_checks:
349
+ for altimg in page.get_AllAlternativeImages():
350
+ if is_local_filename(altimg.filename):
351
+ kwargs = dict(local_filename=altimg.filename, **self.find_kwargs)
352
+ else:
353
+ kwargs = dict(url=altimg.filename, **self.find_kwargs)
354
+ if not self.mets.find_files(**kwargs):
355
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
356
+ f"'{altimg.filename}' not found in METS")
357
+ if is_local_filename(altimg.filename) and not Path(altimg.filename).exists():
358
+ self.report.add_warning(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
359
+ f"'{altimg.filename}' points to non-existent local file")
360
+ if 'alternativeimage_comments' in self.page_checks:
361
+ for altimg in page.get_AllAlternativeImages():
362
+ if altimg.comments is None:
363
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
364
+ f"'{altimg.filename}' features not specified in PAGE")
365
+ else:
366
+ for feature in altimg.comments.split(','):
367
+ if feature not in PAGE_ALTIMG_FEATURES:
368
+ self.report.add_error(f"PAGE '{f.ID}': {altimg.parent_object_.id} AlternativeImage "
369
+ f"'{altimg.filename}' feature '{feature}' not standardized for PAGE")
335
370
  if 'mets_fileid_page_pcgtsid' in self.page_checks and pcgts.pcGtsId != f.ID:
336
371
  self.report.add_warning('pc:PcGts/@pcGtsId differs from mets:file/@ID: "%s" !== "%s"' % (pcgts.pcGtsId or '', f.ID or ''))
337
372
 
File without changes