ocrd 3.0.0b2__py3-none-any.whl → 3.0.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +3 -3
- ocrd/processor/base.py +5 -3
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/METADATA +1 -1
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/RECORD +9 -9
- ocrd_utils/config.py +13 -1
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/LICENSE +0 -0
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/WHEEL +0 -0
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/entry_points.txt +0 -0
- {ocrd-3.0.0b2.dist-info → ocrd-3.0.0b3.dist-info}/top_level.txt +0 -0
ocrd/cli/__init__.py
CHANGED
|
@@ -61,11 +61,11 @@ Variables:
|
|
|
61
61
|
\b
|
|
62
62
|
{config.describe('OCRD_DOWNLOAD_INPUT')}
|
|
63
63
|
\b
|
|
64
|
-
{config.describe('OCRD_MISSING_INPUT')}
|
|
64
|
+
{config.describe('OCRD_MISSING_INPUT', wrap_text=False)}
|
|
65
65
|
\b
|
|
66
|
-
{config.describe('OCRD_MISSING_OUTPUT')}
|
|
66
|
+
{config.describe('OCRD_MISSING_OUTPUT', wrap_text=False)}
|
|
67
67
|
\b
|
|
68
|
-
{config.describe('OCRD_EXISTING_OUTPUT')}
|
|
68
|
+
{config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
|
|
69
69
|
\b
|
|
70
70
|
{config.describe('OCRD_METS_CACHING')}
|
|
71
71
|
\b
|
ocrd/processor/base.py
CHANGED
|
@@ -470,7 +470,7 @@ class Processor():
|
|
|
470
470
|
max_workers=max_workers or 1,
|
|
471
471
|
thread_name_prefix=f"pagetask.{workspace.mets.unique_identifier}"
|
|
472
472
|
)
|
|
473
|
-
self._base_logger.debug("started executor %s", str(executor))
|
|
473
|
+
self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
|
|
474
474
|
tasks = {}
|
|
475
475
|
|
|
476
476
|
for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False):
|
|
@@ -478,7 +478,7 @@ class Processor():
|
|
|
478
478
|
page_id = next(input_file.pageId
|
|
479
479
|
for input_file in input_file_tuple
|
|
480
480
|
if input_file)
|
|
481
|
-
self._base_logger.info(f"
|
|
481
|
+
self._base_logger.info(f"preparing page {page_id}")
|
|
482
482
|
for i, input_file in enumerate(input_file_tuple):
|
|
483
483
|
if input_file is None:
|
|
484
484
|
# file/page not found in this file grp
|
|
@@ -521,9 +521,10 @@ class Processor():
|
|
|
521
521
|
# broad coverage of output failures (including TimeoutError)
|
|
522
522
|
except (Exception, TimeoutError) as err:
|
|
523
523
|
# FIXME: add re-usable/actionable logging
|
|
524
|
-
self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
|
|
525
524
|
if config.OCRD_MISSING_OUTPUT == 'ABORT':
|
|
525
|
+
self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
|
|
526
526
|
raise err
|
|
527
|
+
self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
|
|
527
528
|
if config.OCRD_MISSING_OUTPUT == 'SKIP':
|
|
528
529
|
nr_skipped += 1
|
|
529
530
|
continue
|
|
@@ -587,6 +588,7 @@ class Processor():
|
|
|
587
588
|
input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
|
|
588
589
|
assert isinstance(input_files[0], get_args(OcrdFileType))
|
|
589
590
|
page_id = input_files[0].pageId
|
|
591
|
+
self._base_logger.info("processing page %s", page_id)
|
|
590
592
|
for i, input_file in enumerate(input_files):
|
|
591
593
|
assert isinstance(input_file, get_args(OcrdFileType))
|
|
592
594
|
self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
|
|
@@ -10,7 +10,7 @@ ocrd/task_sequence.py,sha256=spiaUQaMM7M8WdBDoQGmLuTPm7tOugYXD6rcJ2UXzxw,6991
|
|
|
10
10
|
ocrd/workspace.py,sha256=4s0qscEosS7rQ0jfn1qJeT9B3eC31YippAX-RUjXghA,65608
|
|
11
11
|
ocrd/workspace_backup.py,sha256=iab_JjZ_mMP-G8NIUk4PZmfpNlQuGRoqc3NbTSSew1w,3621
|
|
12
12
|
ocrd/workspace_bagger.py,sha256=yU8H3xR5WmQKvgQewac71ie-DUWcfLnMS01D55zsEHQ,11971
|
|
13
|
-
ocrd/cli/__init__.py,sha256=
|
|
13
|
+
ocrd/cli/__init__.py,sha256=lNR6wMf7JhQ8Jf33tUkowJr0mB3423OMY0_6dkMRLvU,2672
|
|
14
14
|
ocrd/cli/bashlib.py,sha256=XGcO-MmYM3xJBRkSCLEZcGs0hqbw2GR8oyijJPtKnYM,5888
|
|
15
15
|
ocrd/cli/log.py,sha256=6_FrVmTKIIVNUaNLkuOJx8pvPhensHMuayJ0PA7T-XA,1562
|
|
16
16
|
ocrd/cli/network.py,sha256=oWBHFEURxfUdb_t-F4svP_ri7o5mqBoNQnLZLbsZLTA,602
|
|
@@ -26,7 +26,7 @@ ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkT
|
|
|
26
26
|
ocrd/decorators/ocrd_cli_options.py,sha256=4pcBLAFPSpYZLj6r9Yj1GZOQl4r_RWU00pyA4mHwFQk,2621
|
|
27
27
|
ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
|
|
28
28
|
ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
|
|
29
|
-
ocrd/processor/base.py,sha256=
|
|
29
|
+
ocrd/processor/base.py,sha256=341APZGx6zCbuxgX-XTkKhPfeQkqblykmC9zSMPH3ss,48843
|
|
30
30
|
ocrd/processor/helpers.py,sha256=Lp9zbHYCLpT3GnPzl-p7UCSFU5Nx99gYEYXwW04v0RI,10157
|
|
31
31
|
ocrd/processor/ocrd_page_result.py,sha256=AazEmnWyPEN47TxXVg0WUQpgFNV_mlIiExwwycUj0nQ,490
|
|
32
32
|
ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -87,7 +87,7 @@ ocrd_network/runtime_data/hosts.py,sha256=ml19ptzH4TFofyJR-Qp_Mn3sZUFbWoNe__rRXZ
|
|
|
87
87
|
ocrd_network/runtime_data/network_agents.py,sha256=5p_zKLqECBIHLw-Ya6eKcKSZcUM4ESiipEIphVxHBEA,5192
|
|
88
88
|
ocrd_network/runtime_data/network_services.py,sha256=xrPpFUU_Pa-XzGe2FEt5RmO17xqykIUmTr_9g6S7XSs,7892
|
|
89
89
|
ocrd_utils/__init__.py,sha256=U_zAQJwxg_aJ4CR84CKMNAUP6Cob8Er8Ikj42JmnUKo,5977
|
|
90
|
-
ocrd_utils/config.py,sha256=
|
|
90
|
+
ocrd_utils/config.py,sha256=Rkqv5wWEmlDDD0l1IWo9TPgn5ppPnHPRH9FfkMST29E,11117
|
|
91
91
|
ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
|
|
92
92
|
ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
|
|
93
93
|
ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
|
|
@@ -118,9 +118,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
|
|
|
118
118
|
ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
|
|
119
119
|
ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
|
|
120
120
|
ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
|
|
121
|
-
ocrd-3.0.
|
|
122
|
-
ocrd-3.0.
|
|
123
|
-
ocrd-3.0.
|
|
124
|
-
ocrd-3.0.
|
|
125
|
-
ocrd-3.0.
|
|
126
|
-
ocrd-3.0.
|
|
121
|
+
ocrd-3.0.0b3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
122
|
+
ocrd-3.0.0b3.dist-info/METADATA,sha256=WZhPkJV0F8A5k-0IVK8HZ5zGWVWwYSa6FuDlpkuh4Xc,10397
|
|
123
|
+
ocrd-3.0.0b3.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
|
|
124
|
+
ocrd-3.0.0b3.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
|
|
125
|
+
ocrd-3.0.0b3.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
|
|
126
|
+
ocrd-3.0.0b3.dist-info/RECORD,,
|
ocrd_utils/config.py
CHANGED
|
@@ -78,14 +78,26 @@ class OcrdEnvConfig():
|
|
|
78
78
|
raise ValueError(f"Unregistered env variable {name}")
|
|
79
79
|
return self._variables[name].has_default
|
|
80
80
|
|
|
81
|
+
def reset_defaults(self):
|
|
82
|
+
for name in self._variables:
|
|
83
|
+
try:
|
|
84
|
+
# we cannot use hasattr, because that delegates to getattr,
|
|
85
|
+
# which we override and provide defaults for (which of course
|
|
86
|
+
# cannot be removed)
|
|
87
|
+
if self.__getattribute__(name):
|
|
88
|
+
delattr(self, name)
|
|
89
|
+
except AttributeError:
|
|
90
|
+
pass
|
|
91
|
+
|
|
81
92
|
def describe(self, name, *args, **kwargs):
|
|
82
93
|
if not name in self._variables:
|
|
83
94
|
raise ValueError(f"Unregistered env variable {name}")
|
|
84
95
|
return self._variables[name].describe(*args, **kwargs)
|
|
85
96
|
|
|
86
97
|
def __getattr__(self, name):
|
|
98
|
+
# will be called if name is not accessible (has not been added directly yet)
|
|
87
99
|
if not name in self._variables:
|
|
88
|
-
raise
|
|
100
|
+
raise AttributeError(f"Unregistered env variable {name}")
|
|
89
101
|
var_obj = self._variables[name]
|
|
90
102
|
try:
|
|
91
103
|
raw_value = self.raw_value(name)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|