ocrd 3.0.0b2__py3-none-any.whl → 3.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -61,11 +61,11 @@ Variables:
61
61
  \b
62
62
  {config.describe('OCRD_DOWNLOAD_INPUT')}
63
63
  \b
64
- {config.describe('OCRD_MISSING_INPUT')}
64
+ {config.describe('OCRD_MISSING_INPUT', wrap_text=False)}
65
65
  \b
66
- {config.describe('OCRD_MISSING_OUTPUT')}
66
+ {config.describe('OCRD_MISSING_OUTPUT', wrap_text=False)}
67
67
  \b
68
- {config.describe('OCRD_EXISTING_OUTPUT')}
68
+ {config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
69
69
  \b
70
70
  {config.describe('OCRD_METS_CACHING')}
71
71
  \b
ocrd/processor/base.py CHANGED
@@ -166,11 +166,14 @@ class Processor():
166
166
 
167
167
  (Override if ``ocrd-tool.json`` is not distributed with the Python package.)
168
168
  """
169
- # XXX HACK
170
- module_tokens = self.__module__.split('.')
171
- if module_tokens[0] == 'src':
172
- module_tokens.pop(0)
173
- return resource_filename(module_tokens[0], self.metadata_filename)
169
+ module = inspect.getmodule(self)
170
+ module_tokens = module.__package__.split('.')
171
+ # for namespace packages, we cannot just use the first token
172
+ for i in range(len(module_tokens)):
173
+ prefix = '.'.join(module_tokens[:i + 1])
174
+ if sys.modules[prefix].__spec__.has_location:
175
+ return resource_filename(prefix, self.metadata_filename)
176
+ raise Exception("cannot find top-level module prefix for %s", module.__package__)
174
177
 
175
178
  @cached_property
176
179
  def metadata_rawdict(self) -> dict:
@@ -455,22 +458,22 @@ class Processor():
455
458
  nr_copied = 0
456
459
 
457
460
  # set up multithreading
458
- if self.max_workers <= 0:
459
- max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES)
460
- else:
461
- max_workers = max(0, min(config.OCRD_MAX_PARALLEL_PAGES, self.max_workers))
461
+ max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES)
462
+ if self.max_workers > 0 and self.max_workers < config.OCRD_MAX_PARALLEL_PAGES:
463
+ self._base_logger.info("limiting number of threads from %d to %d", max_workers, self.max_workers)
464
+ max_workers = self.max_workers
462
465
  if max_workers > 1:
463
466
  assert isinstance(workspace.mets, ClientSideOcrdMets), \
464
467
  "OCRD_MAX_PARALLEL_PAGES>1 requires also using --mets-server-url"
465
- if self.max_page_seconds <= 0:
466
- max_seconds = max(0, config.OCRD_PROCESSING_PAGE_TIMEOUT)
467
- else:
468
- max_seconds = max(0, min(config.OCRD_PROCESSING_PAGE_TIMEOUT, self.max_page_seconds))
468
+ max_seconds = max(0, config.OCRD_PROCESSING_PAGE_TIMEOUT)
469
+ if self.max_page_seconds > 0 and self.max_page_seconds < config.OCRD_PROCESSING_PAGE_TIMEOUT:
470
+ self._base_logger.info("limiting page timeout from %d to %d sec", max_seconds, self.max_page_seconds)
471
+ max_seconds = self.max_page_seconds
469
472
  executor = ThreadPoolExecutor(
470
473
  max_workers=max_workers or 1,
471
474
  thread_name_prefix=f"pagetask.{workspace.mets.unique_identifier}"
472
475
  )
473
- self._base_logger.debug("started executor %s", str(executor))
476
+ self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
474
477
  tasks = {}
475
478
 
476
479
  for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False):
@@ -478,7 +481,7 @@ class Processor():
478
481
  page_id = next(input_file.pageId
479
482
  for input_file in input_file_tuple
480
483
  if input_file)
481
- self._base_logger.info(f"processing page {page_id}")
484
+ self._base_logger.info(f"preparing page {page_id}")
482
485
  for i, input_file in enumerate(input_file_tuple):
483
486
  if input_file is None:
484
487
  # file/page not found in this file grp
@@ -521,9 +524,10 @@ class Processor():
521
524
  # broad coverage of output failures (including TimeoutError)
522
525
  except (Exception, TimeoutError) as err:
523
526
  # FIXME: add re-usable/actionable logging
524
- self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
525
527
  if config.OCRD_MISSING_OUTPUT == 'ABORT':
528
+ self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
526
529
  raise err
530
+ self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
527
531
  if config.OCRD_MISSING_OUTPUT == 'SKIP':
528
532
  nr_skipped += 1
529
533
  continue
@@ -587,6 +591,7 @@ class Processor():
587
591
  input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
588
592
  assert isinstance(input_files[0], get_args(OcrdFileType))
589
593
  page_id = input_files[0].pageId
594
+ self._base_logger.info("processing page %s", page_id)
590
595
  for i, input_file in enumerate(input_files):
591
596
  assert isinstance(input_file, get_args(OcrdFileType))
592
597
  self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
ocrd/workspace.py CHANGED
@@ -121,7 +121,10 @@ class Workspace():
121
121
  """
122
122
  Reload METS from the filesystem.
123
123
  """
124
- self.mets = OcrdMets(filename=self.mets_target)
124
+ if self.is_remote:
125
+ self.mets.reload()
126
+ else:
127
+ self.mets = OcrdMets(filename=self.mets_target)
125
128
 
126
129
  @deprecated_alias(pageId="page_id")
127
130
  @deprecated_alias(ID="file_id")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0b2
3
+ Version: 3.0.0b4
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -7,10 +7,10 @@ ocrd/resolver.py,sha256=Ba9ALQbTXz6_mla4VqN9tAfHoj6aKuNJAU4tIDnjcHE,14952
7
7
  ocrd/resource_list.yml,sha256=82-PiqkZnka1kTj3MQqNn4wXWKHHtoFchsQuetWuqFs,2633
8
8
  ocrd/resource_manager.py,sha256=8BMVKJq8J56hugi8vtGn9Ffuk7oRkbs197aG74aKbCY,16733
9
9
  ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
- ocrd/workspace.py,sha256=4s0qscEosS7rQ0jfn1qJeT9B3eC31YippAX-RUjXghA,65608
10
+ ocrd/workspace.py,sha256=V-7w3mRc0l8XmUOpdbsUPE2BfqWS8K8106pQPrDHbN4,65684
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
12
  ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
- ocrd/cli/__init__.py,sha256=XyYcbIuajaS2YM6HEWD4dfitdAzn111AWIaFPsTHoKQ,2621
13
+ ocrd/cli/__init__.py,sha256=lNR6wMf7JhQ8Jf33tUkowJr0mB3423OMY0_6dkMRLvU,2672
14
14
  ocrd/cli/bashlib.py,sha256=XGcO-MmYM3xJBRkSCLEZcGs0hqbw2GR8oyijJPtKnYM,5888
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
16
  ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
@@ -26,7 +26,7 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=4pcBLAFPSpYZLj6r9Yj1GZOQl4r_RWU00pyA4mHwFQk,2621
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=5_ZyZIjXorT2RNjtmB0haJQOZlOKGggZsKAV7aIZrts,48624
29
+ ocrd/processor/base.py,sha256=Q3dJn_I7iXi9wNKzbIsHp3LQ8_qp5yqu5CGJ8z17Xgc,49318
30
30
  ocrd/processor/helpers.py,sha256=Lp9zbHYCLpT3GnPzl-p7UCSFU5Nx99gYEYXwW04v0RI,10157
31
31
  ocrd/processor/ocrd_page_result.py,sha256=AazEmnWyPEN47TxXVg0WUQpgFNV_mlIiExwwycUj0nQ,490
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -87,7 +87,7 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
87
87
  ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
88
88
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
89
89
  ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
90
- ocrd_utils/config.py,sha256=BqpUjLjv-GVMypDd2a3gezEeEehtEP7uT3hWTdi7WhE,10608
90
+ ocrd_utils/config.py,sha256=Rkqv5wWEmlDDD0l1IWo9TPgn5ppPnHPRH9FfkMST29E,11117
91
91
  ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
92
92
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
93
93
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
@@ -118,9 +118,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
118
118
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
119
119
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
120
120
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
121
- ocrd-3.0.0b2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
- ocrd-3.0.0b2.dist-info/METADATA,sha256=lZsgG2wrhlpAM2b5SYp07i7sDBMO43nDNYdDRol4ypY,10397
123
- ocrd-3.0.0b2.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
124
- ocrd-3.0.0b2.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
125
- ocrd-3.0.0b2.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
126
- ocrd-3.0.0b2.dist-info/RECORD,,
121
+ ocrd-3.0.0b4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
+ ocrd-3.0.0b4.dist-info/METADATA,sha256=akkVQvrp5FNEvuD1FsH9mxW9SmpYRR3ZyeTns1jCja8,10397
123
+ ocrd-3.0.0b4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
124
+ ocrd-3.0.0b4.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
125
+ ocrd-3.0.0b4.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
126
+ ocrd-3.0.0b4.dist-info/RECORD,,
ocrd_utils/config.py CHANGED
@@ -78,14 +78,26 @@ class OcrdEnvConfig():
78
78
  raise ValueError(f"Unregistered env variable {name}")
79
79
  return self._variables[name].has_default
80
80
 
81
+ def reset_defaults(self):
82
+ for name in self._variables:
83
+ try:
84
+ # we cannot use hasattr, because that delegates to getattr,
85
+ # which we override and provide defaults for (which of course
86
+ # cannot be removed)
87
+ if self.__getattribute__(name):
88
+ delattr(self, name)
89
+ except AttributeError:
90
+ pass
91
+
81
92
  def describe(self, name, *args, **kwargs):
82
93
  if not name in self._variables:
83
94
  raise ValueError(f"Unregistered env variable {name}")
84
95
  return self._variables[name].describe(*args, **kwargs)
85
96
 
86
97
  def __getattr__(self, name):
98
+ # will be called if name is not accessible (has not been added directly yet)
87
99
  if not name in self._variables:
88
- raise ValueError(f"Unregistered env variable {name}")
100
+ raise AttributeError(f"Unregistered env variable {name}")
89
101
  var_obj = self._variables[name]
90
102
  try:
91
103
  raw_value = self.raw_value(name)
File without changes