ocrd 3.0.0b2__py3-none-any.whl → 3.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/cli/__init__.py CHANGED
@@ -61,11 +61,11 @@ Variables:
61
61
  \b
62
62
  {config.describe('OCRD_DOWNLOAD_INPUT')}
63
63
  \b
64
- {config.describe('OCRD_MISSING_INPUT')}
64
+ {config.describe('OCRD_MISSING_INPUT', wrap_text=False)}
65
65
  \b
66
- {config.describe('OCRD_MISSING_OUTPUT')}
66
+ {config.describe('OCRD_MISSING_OUTPUT', wrap_text=False)}
67
67
  \b
68
- {config.describe('OCRD_EXISTING_OUTPUT')}
68
+ {config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
69
69
  \b
70
70
  {config.describe('OCRD_METS_CACHING')}
71
71
  \b
ocrd/processor/base.py CHANGED
@@ -470,7 +470,7 @@ class Processor():
470
470
  max_workers=max_workers or 1,
471
471
  thread_name_prefix=f"pagetask.{workspace.mets.unique_identifier}"
472
472
  )
473
- self._base_logger.debug("started executor %s", str(executor))
473
+ self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
474
474
  tasks = {}
475
475
 
476
476
  for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False):
@@ -478,7 +478,7 @@ class Processor():
478
478
  page_id = next(input_file.pageId
479
479
  for input_file in input_file_tuple
480
480
  if input_file)
481
- self._base_logger.info(f"processing page {page_id}")
481
+ self._base_logger.info(f"preparing page {page_id}")
482
482
  for i, input_file in enumerate(input_file_tuple):
483
483
  if input_file is None:
484
484
  # file/page not found in this file grp
@@ -521,9 +521,10 @@ class Processor():
521
521
  # broad coverage of output failures (including TimeoutError)
522
522
  except (Exception, TimeoutError) as err:
523
523
  # FIXME: add re-usable/actionable logging
524
- self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
525
524
  if config.OCRD_MISSING_OUTPUT == 'ABORT':
525
+ self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
526
526
  raise err
527
+ self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
527
528
  if config.OCRD_MISSING_OUTPUT == 'SKIP':
528
529
  nr_skipped += 1
529
530
  continue
@@ -587,6 +588,7 @@ class Processor():
587
588
  input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
588
589
  assert isinstance(input_files[0], get_args(OcrdFileType))
589
590
  page_id = input_files[0].pageId
591
+ self._base_logger.info("processing page %s", page_id)
590
592
  for i, input_file in enumerate(input_files):
591
593
  assert isinstance(input_file, get_args(OcrdFileType))
592
594
  self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0b2
3
+ Version: 3.0.0b3
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -10,7 +10,7 @@ ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
10
10
  ocrd/workspace.py,sha256=4s0qscEosS7rQ0jfn1qJeT9B3eC31YippAX-RUjXghA,65608
11
11
  ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
12
12
  ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
13
- ocrd/cli/__init__.py,sha256=XyYcbIuajaS2YM6HEWD4dfitdAzn111AWIaFPsTHoKQ,2621
13
+ ocrd/cli/__init__.py,sha256=lNR6wMf7JhQ8Jf33tUkowJr0mB3423OMY0_6dkMRLvU,2672
14
14
  ocrd/cli/bashlib.py,sha256=XGcO-MmYM3xJBRkSCLEZcGs0hqbw2GR8oyijJPtKnYM,5888
15
15
  ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
16
16
  ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
@@ -26,7 +26,7 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=4pcBLAFPSpYZLj6r9Yj1GZOQl4r_RWU00pyA4mHwFQk,2621
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=5_ZyZIjXorT2RNjtmB0haJQOZlOKGggZsKAV7aIZrts,48624
29
+ ocrd/processor/base.py,sha256=341APZGx6zCbuxgX-XTkKhPfeQkqblykmC9zSMPH3ss,48843
30
30
  ocrd/processor/helpers.py,sha256=Lp9zbHYCLpT3GnPzl-p7UCSFU5Nx99gYEYXwW04v0RI,10157
31
31
  ocrd/processor/ocrd_page_result.py,sha256=AazEmnWyPEN47TxXVg0WUQpgFNV_mlIiExwwycUj0nQ,490
32
32
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -87,7 +87,7 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
87
87
  ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
88
88
  ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
89
89
  ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
90
- ocrd_utils/config.py,sha256=BqpUjLjv-GVMypDd2a3gezEeEehtEP7uT3hWTdi7WhE,10608
90
+ ocrd_utils/config.py,sha256=Rkqv5wWEmlDDD0l1IWo9TPgn5ppPnHPRH9FfkMST29E,11117
91
91
  ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
92
92
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
93
93
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
@@ -118,9 +118,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
118
118
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
119
119
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
120
120
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
121
- ocrd-3.0.0b2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
- ocrd-3.0.0b2.dist-info/METADATA,sha256=lZsgG2wrhlpAM2b5SYp07i7sDBMO43nDNYdDRol4ypY,10397
123
- ocrd-3.0.0b2.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
124
- ocrd-3.0.0b2.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
125
- ocrd-3.0.0b2.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
126
- ocrd-3.0.0b2.dist-info/RECORD,,
121
+ ocrd-3.0.0b3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
+ ocrd-3.0.0b3.dist-info/METADATA,sha256=WZhPkJV0F8A5k-0IVK8HZ5zGWVWwYSa6FuDlpkuh4Xc,10397
123
+ ocrd-3.0.0b3.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
124
+ ocrd-3.0.0b3.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
125
+ ocrd-3.0.0b3.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
126
+ ocrd-3.0.0b3.dist-info/RECORD,,
ocrd_utils/config.py CHANGED
@@ -78,14 +78,26 @@ class OcrdEnvConfig():
78
78
  raise ValueError(f"Unregistered env variable {name}")
79
79
  return self._variables[name].has_default
80
80
 
81
+ def reset_defaults(self):
82
+ for name in self._variables:
83
+ try:
84
+ # we cannot use hasattr, because that delegates to getattr,
85
+ # which we override and provide defaults for (which of course
86
+ # cannot be removed)
87
+ if self.__getattribute__(name):
88
+ delattr(self, name)
89
+ except AttributeError:
90
+ pass
91
+
81
92
  def describe(self, name, *args, **kwargs):
82
93
  if not name in self._variables:
83
94
  raise ValueError(f"Unregistered env variable {name}")
84
95
  return self._variables[name].describe(*args, **kwargs)
85
96
 
86
97
  def __getattr__(self, name):
98
+ # will be called if name is not accessible (has not been added directly yet)
87
99
  if not name in self._variables:
88
- raise ValueError(f"Unregistered env variable {name}")
100
+ raise AttributeError(f"Unregistered env variable {name}")
89
101
  var_obj = self._variables[name]
90
102
  try:
91
103
  raw_value = self.raw_value(name)
File without changes