ocrd 3.0.0b5__py3-none-any.whl → 3.0.0b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ocrd/processor/helpers.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Helper methods for running and documenting processors
3
3
  """
4
4
  from time import perf_counter, process_time
5
+ from os import times
5
6
  from functools import lru_cache
6
7
  import json
7
8
  import inspect
@@ -94,6 +95,7 @@ def run_processor(
94
95
  log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
95
96
  t0_wall = perf_counter()
96
97
  t0_cpu = process_time()
98
+ t0_os = times()
97
99
  if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']):
98
100
  backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil'
99
101
  from memory_profiler import memory_usage # pylint: disable=import-outside-toplevel
@@ -123,7 +125,13 @@ def run_processor(
123
125
 
124
126
  t1_wall = perf_counter() - t0_wall
125
127
  t1_cpu = process_time() - t0_cpu
126
- logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % (
128
+ t1_os = times()
129
+ # add CPU time from child processes (page worker etc)
130
+ t1_cpu += t1_os.children_user - t0_os.children_user
131
+ t1_cpu += t1_os.children_system - t0_os.children_system
132
+ logProfile.info(
133
+ "Executing processor '%s' took %fs (wall) %fs (CPU)( "
134
+ "[--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']",
127
135
  ocrd_tool['executable'],
128
136
  t1_wall,
129
137
  t1_cpu,
@@ -131,7 +139,7 @@ def run_processor(
131
139
  processor.output_file_grp or '',
132
140
  json.dumps(processor.parameter) or '',
133
141
  processor.page_id or ''
134
- ))
142
+ )
135
143
  workspace.mets.add_agent(
136
144
  name=name,
137
145
  _type='OTHER',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocrd
3
- Version: 3.0.0b5
3
+ Version: 3.0.0b7
4
4
  Summary: OCR-D framework
5
5
  Author-email: Konstantin Baierer <unixprog@gmail.com>
6
6
  License: Apache License 2.0
@@ -24,6 +24,7 @@ Requires-Dist: frozendict>=2.3.4
24
24
  Requires-Dist: gdown
25
25
  Requires-Dist: httpx>=0.22.0
26
26
  Requires-Dist: jsonschema>=4
27
+ Requires-Dist: loky
27
28
  Requires-Dist: lxml
28
29
  Requires-Dist: memory-profiler>=0.58.0
29
30
  Requires-Dist: numpy
@@ -18,16 +18,17 @@ ocrd/cli/ocrd_tool.py,sha256=qaJgt-LNH0tXkaupMNrEKXasxcgsabHdfLdYESEsomk,7035
18
18
  ocrd/cli/process.py,sha256=8KD0i7LT01H9u5CC1vktYMEVpS67da_rp_09_EOECmw,1233
19
19
  ocrd/cli/resmgr.py,sha256=bTE-MpF7RRCHhgAbknqZUFHgHScIK6FR3S4h4DEAets,10080
20
20
  ocrd/cli/validate.py,sha256=nvageDaHCETcE71X5lu7i_4JKpgo9MrvJKinVPLYUTI,5727
21
- ocrd/cli/workspace.py,sha256=InIQ5rfQWPn4Qsd1s_xA6AC6ndZLCsuyhoAEiqP8bK4,39479
21
+ ocrd/cli/workspace.py,sha256=KTbSzIUrba5WoYETvM9ElRZVsDUHCGVvjoFgBGZS2nU,40468
22
22
  ocrd/cli/zip.py,sha256=MMJLw3OXWiJVfVtrdJcBkbB8vA1IzSautluazZRuCQ0,5910
23
- ocrd/decorators/__init__.py,sha256=IJlA1XcdVBO6Hxm9rNDya7QYcqeWcaXXuLtGjfjcen8,7596
23
+ ocrd/decorators/__init__.py,sha256=NWqZUTSBve6yIBgTN3miGmz4w1IFefLhG92qXx5ADtw,7620
24
24
  ocrd/decorators/loglevel_option.py,sha256=tgipROEu3t4hkwWvFssd80k2SbTBwBIC4WNE6Gc-XAg,798
25
25
  ocrd/decorators/mets_find_options.py,sha256=d4oATKMP6bFQHNqOK6nLqgUiWF2FYdkPvzkTVRMYpKo,635
26
26
  ocrd/decorators/ocrd_cli_options.py,sha256=hr2EugwAY_-GJ7F7g77Od9o9eAqhfLBHSpfmCql2OCU,2665
27
27
  ocrd/decorators/parameter_option.py,sha256=n8hYw7XVTd3i3tvpK8F1Jx_CqRp6EGF9qJVH95yj92Q,1076
28
28
  ocrd/processor/__init__.py,sha256=39ymNwYRdc-b_OJzzKmWCvo2ga3KdsGSYDHE1Hzkn_w,274
29
- ocrd/processor/base.py,sha256=_TvaxKf_oMaIfuUcaFWas8YimhZ-l1d3RWGELlBJfy8,50307
30
- ocrd/processor/helpers.py,sha256=vPYUri6ucuhdTNrideywriJ0fCa8UE2QyBXOmS-7RcQ,10232
29
+ ocrd/processor/base.py,sha256=8kFrYLd1cSHBaBolHjXdIVHwmV8muKgBCBrAYw7kWTQ,59154
30
+ ocrd/processor/concurrent.py,sha256=IMMBFGDGqfpgm7Rp6J_dnXVckyBKntwARwcKDnxadHQ,38217
31
+ ocrd/processor/helpers.py,sha256=8ngrqAJ01BSoSJNsIoK_YfA8QdryM5y0MqaqA9f7ELM,10483
31
32
  ocrd/processor/ocrd_page_result.py,sha256=eDkpyVHcpaBzTHXiGrcNk9PP9Xr-XZru2w_uoX_ZeNA,510
32
33
  ocrd/processor/builtin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
34
  ocrd/processor/builtin/dummy_processor.py,sha256=iWiw_jJXOqwr7-hFjdkmTCCo1xGr6MLGOshx81PTu-8,3548
@@ -40,7 +41,7 @@ ocrd_models/mets-empty.xml,sha256=dFixfbxSXrgjZx9BfdIKWHX-khNmp7dNYaFe2qQSwCY,12
40
41
  ocrd_models/ocrd_agent.py,sha256=E9OtDhz9UfKb6ou2qvsuCL9NlO1V6zMb0s8nVq8dVos,5609
41
42
  ocrd_models/ocrd_exif.py,sha256=5BRLjvB6jg36V68i8jvVnT2SSNnpqLbhLsaMuP51Scw,4583
42
43
  ocrd_models/ocrd_file.py,sha256=7lyHezuNnl2FEYV1lV35-QTCrgYAL-3wO2ulFUNq2Ak,9717
43
- ocrd_models/ocrd_mets.py,sha256=h3y_WI5fVLsbBoUIRNH2ebjuO1-_P6T3BMIULX-ZOIs,42514
44
+ ocrd_models/ocrd_mets.py,sha256=cRBEnvRNmRUhcrKouqygTQqYA-XdgyA-FNLwt86V0vg,42878
44
45
  ocrd_models/ocrd_page.py,sha256=sVIvvMeBT8eZnOfW0DTjQUNyu62-llz0v_Ga5Xo-tUM,5393
45
46
  ocrd_models/ocrd_page_generateds.py,sha256=wfx3vESMAi08rl6-16zNVJe4E3B6APIvL6RCr1roAzg,774092
46
47
  ocrd_models/ocrd_xml_base.py,sha256=OW57mXLlwm1nH8CNefvXmwLRws9KL9zSrb-3vH--mX8,1641
@@ -92,8 +93,8 @@ ocrd_utils/constants.py,sha256=ImbG1d8t2MW3uuFi-mN6aY90Zn74liAKZBKlfuKN86w,3278
92
93
  ocrd_utils/deprecate.py,sha256=4i50sZsA3Eevqn5D-SL5yGf9KEZfGCV4A5Anzn1GRMs,1026
93
94
  ocrd_utils/image.py,sha256=zNNX1cnRy6yvrxx8mnYQiqWraAh5-i4a1AOfCCg4SmI,24781
94
95
  ocrd_utils/introspect.py,sha256=gfBlmeEFuRmRUSgdSK0jOxRpYqDRXl2IAE6gv2MZ6as,1977
95
- ocrd_utils/logging.py,sha256=5_-5T5OWSYicNk8SQyjVqdRj2bVl-gDK1Th-C7oW_HE,8248
96
- ocrd_utils/ocrd_logging.conf,sha256=kl9x9JS1d8h8F0QZabvrjZtW1iApIaChvkImYafKO5g,3623
96
+ ocrd_utils/logging.py,sha256=sHNfih9kBfvKsHdLqMK_ew9Pld1GsRyYlrZHIySujnw,7313
97
+ ocrd_utils/ocrd_logging.conf,sha256=JlWmA_5vg6HnjPGjTC4mA5vFHqmnEinwllSTiOw5CCo,3473
97
98
  ocrd_utils/os.py,sha256=acRRdDBI8L6BK0Mf773yKEzwdpZSFRBJEKB2crL4EjU,9865
98
99
  ocrd_utils/str.py,sha256=cRgqYILDGOAqWr0qrCrV52I3y4wvpwDVtnBGEUjXNS4,10116
99
100
  ocrd_validators/__init__.py,sha256=ZFc-UqRVBk9o1YesZFmr9lOepttNJ_NKx1Zdb7g_YsU,972
@@ -118,9 +119,9 @@ ocrd_validators/xlink.xsd,sha256=8fW7YAMWXN2PbB_MMvj9H5ZeFoEBDzuYBtlGC8_6ijw,318
118
119
  ocrd_validators/xsd_mets_validator.py,sha256=4GWfLyqkmca0x7osDuXuExYuM0HWVrKoqn0S35sFhHU,467
119
120
  ocrd_validators/xsd_page_validator.py,sha256=BNz_9u-Ek4UCeyZu3KxSQoolfW9lvuaSR9nIu1XXxeE,467
120
121
  ocrd_validators/xsd_validator.py,sha256=6HrVAf6SzCvfUIuQdIzz9bOq4V-zhyii9yrUPoK2Uvo,2094
121
- ocrd-3.0.0b5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
122
- ocrd-3.0.0b5.dist-info/METADATA,sha256=eIpkAoobj7QocP9VXYASLqE82wN35JlvYuYjSBLk30o,10397
123
- ocrd-3.0.0b5.dist-info/WHEEL,sha256=5Mi1sN9lKoFv_gxcPtisEVrJZihrm_beibeg5R6xb4I,91
124
- ocrd-3.0.0b5.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
125
- ocrd-3.0.0b5.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
126
- ocrd-3.0.0b5.dist-info/RECORD,,
122
+ ocrd-3.0.0b7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
123
+ ocrd-3.0.0b7.dist-info/METADATA,sha256=lc6oZVb9SPl97qvspPigal4yb_1DBBRZwE3GQUTAK_o,10417
124
+ ocrd-3.0.0b7.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
125
+ ocrd-3.0.0b7.dist-info/entry_points.txt,sha256=tV_gAdO8cbnOjS0GmKfJKbN60xBAV2DQRX6hEjleSjE,94
126
+ ocrd-3.0.0b7.dist-info/top_level.txt,sha256=pUgiN42t4KXC5rvpi6V8atza31XP4SCznXpXlVlvomM,75
127
+ ocrd-3.0.0b7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.0.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
ocrd_models/ocrd_mets.py CHANGED
@@ -599,7 +599,16 @@ class OcrdMets(OcrdXmlDocument):
599
599
  If return_divs is set, returns div memory objects instead of strings of ids
600
600
  """
601
601
  if for_fileIds is None and for_pageIds is None:
602
+ if return_divs:
603
+ if self._cache_flag:
604
+ return list(self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID].values())
605
+
606
+ return [x for x in self._tree.getroot().xpath(
607
+ 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]',
608
+ namespaces=NS)]
609
+
602
610
  return self.physical_pages
611
+
603
612
  # log = getLogger('ocrd.models.ocrd_mets.get_physical_pages')
604
613
  if for_pageIds is not None:
605
614
  ret = []
ocrd_utils/logging.py CHANGED
@@ -46,13 +46,8 @@ __all__ = [
46
46
  'setOverrideLogLevel',
47
47
  ]
48
48
 
49
- # These are the loggers we add handlers to
50
- ROOT_OCRD_LOGGERS = [
51
- 'ocrd',
52
- 'ocrd_network'
53
- ]
54
-
55
49
  LOGGING_DEFAULTS = {
50
+ '': logging.WARNING,
56
51
  'ocrd': logging.INFO,
57
52
  'ocrd_network': logging.INFO,
58
53
  # 'ocrd.resolver': logging.INFO,
@@ -113,18 +108,15 @@ def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG):
113
108
  lvl (string): Log level name.
114
109
  silent (boolean): Whether to log the override call
115
110
  """
116
- if not _initialized_flag:
117
- initLogging(silent=silent)
118
- ocrd_logger = logging.getLogger('ocrd')
119
-
120
- if lvl is None:
121
- if not silent:
122
- print('[LOGGING] Reset log level override', file=sys.stderr)
123
- ocrd_logger.setLevel(logging.NOTSET)
124
- else:
125
- if not silent:
126
- print(f'[LOGGING] Overriding ocrd log level to {lvl}', file=sys.stderr)
127
- ocrd_logger.setLevel(lvl)
111
+ if lvl is not None:
112
+ lvl = getLevelName(lvl)
113
+ if not _initialized_flag:
114
+ initLogging(silent=silent)
115
+ # affect all configured loggers
116
+ for logger_name in logging.root.manager.loggerDict:
117
+ if not silent:
118
+ print(f'[LOGGING] Overriding {logger_name} log level to {lvl}', file=sys.stderr)
119
+ logging.getLogger(logger_name).setLevel(lvl)
128
120
 
129
121
  def get_logging_config_files():
130
122
  """
@@ -158,20 +150,11 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
158
150
  - silent (bool): Whether to log logging behavior by printing to stderr
159
151
  """
160
152
  global _initialized_flag
161
- if _initialized_flag and not force_reinit:
162
- return
163
- # disableLogging()
164
-
165
- # https://docs.python.org/3/library/logging.html#logging.disable
166
- # If logging.disable(logging.NOTSET) is called, it effectively removes this
167
- # overriding level, so that logging output again depends on the effective
168
- # levels of individual loggers.
169
- logging.disable(logging.NOTSET)
170
-
171
- # remove all handlers for the ocrd root loggers
172
- for logger_name in ROOT_OCRD_LOGGERS:
173
- for handler in logging.getLogger(logger_name).handlers[:]:
174
- logging.getLogger(logger_name).removeHandler(handler)
153
+ if _initialized_flag:
154
+ if force_reinit:
155
+ disableLogging(silent=silent)
156
+ else:
157
+ return
175
158
 
176
159
  config_file = None
177
160
  if not builtin_only:
@@ -190,8 +173,8 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
190
173
  ocrd_handler = logging.StreamHandler(stream=sys.stderr)
191
174
  ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT))
192
175
  ocrd_handler.setLevel(logging.DEBUG)
193
- for logger_name in ROOT_OCRD_LOGGERS:
194
- logging.getLogger(logger_name).addHandler(ocrd_handler)
176
+ root_logger = logging.getLogger('')
177
+ root_logger.addHandler(ocrd_handler)
195
178
  for logger_name, logger_level in LOGGING_DEFAULTS.items():
196
179
  logging.getLogger(logger_name).setLevel(logger_level)
197
180
  _initialized_flag = True
@@ -207,24 +190,16 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
207
190
  if _initialized_flag and not silent:
208
191
  print("[LOGGING] Disabling logging", file=sys.stderr)
209
192
  _initialized_flag = False
210
- # logging.basicConfig(level=logging.CRITICAL)
211
- # logging.disable(logging.ERROR)
212
- # remove all handlers for the ocrd logger
213
- for logger_name in ROOT_OCRD_LOGGERS + ['']:
214
- for handler in logging.getLogger(logger_name).handlers[:]:
215
- logging.getLogger(logger_name).removeHandler(handler)
216
- for logger_name in LOGGING_DEFAULTS:
217
- logging.getLogger(logger_name).setLevel(logging.NOTSET)
193
+ # remove all handlers we might have added (via initLogging on builtin or file config)
194
+ for logger_name in logging.root.manager.loggerDict:
195
+ if not silent:
196
+ print(f'[LOGGING] Resetting {logger_name} log level and handlers')
197
+ logger = logging.getLogger(logger_name)
198
+ logger.setLevel(logging.NOTSET)
199
+ for handler in logger.handlers[:]:
200
+ logger.removeHandler(handler)
201
+ for handler in logging.root.handlers[:]:
202
+ logging.root.removeHandler(handler)
218
203
  # Python default log level is WARNING
219
204
  logging.root.setLevel(logging.WARNING)
220
205
 
221
- # Initializing stream handlers at module level
222
- # would cause message output in all runtime contexts,
223
- # including those which are already run for std output
224
- # (--dump-json, --version, ocrd-tool, bashlib etc).
225
- # So this needs to be an opt-in from the CLIs/decorators:
226
- #initLogging()
227
- # Also, we even have to block log output for libraries
228
- # (like matplotlib/tensorflow) which set up logging
229
- # themselves already:
230
- disableLogging()
@@ -34,7 +34,7 @@ keys=defaultFormatter,detailedFormatter
34
34
  # default logger "root" using consoleHandler
35
35
  #
36
36
  [logger_root]
37
- level=INFO
37
+ level=WARNING
38
38
  handlers=consoleHandler,fileHandler
39
39
 
40
40
 
@@ -56,22 +56,22 @@ handlers=consoleHandler,fileHandler
56
56
  # ocrd loggers
57
57
  [logger_ocrd]
58
58
  level=INFO
59
- handlers=consoleHandler,fileHandler
59
+ handlers=
60
60
  qualname=ocrd
61
- propagate=0
62
61
 
63
62
  [logger_ocrd_network]
64
63
  level=INFO
65
- handlers=consoleHandler,processingServerHandler
64
+ #handlers=consoleHandler,processingServerHandler
65
+ handlers=processingServerHandler
66
66
  qualname=ocrd_network
67
- propagate=0
67
+ #propagate=0
68
68
 
69
69
  #
70
70
  # logger tensorflow
71
71
  #
72
72
  [logger_ocrd_tensorflow]
73
73
  level=ERROR
74
- handlers=consoleHandler
74
+ handlers=
75
75
  qualname=tensorflow
76
76
 
77
77
  #
@@ -79,7 +79,7 @@ qualname=tensorflow
79
79
  #
80
80
  [logger_ocrd_shapely_geos]
81
81
  level=ERROR
82
- handlers=consoleHandler
82
+ handlers=
83
83
  qualname=shapely.geos
84
84
 
85
85
 
@@ -88,7 +88,7 @@ qualname=shapely.geos
88
88
  #
89
89
  [logger_ocrd_PIL]
90
90
  level=INFO
91
- handlers=consoleHandler
91
+ handlers=
92
92
  qualname=PIL
93
93
 
94
94
  #
@@ -96,34 +96,32 @@ qualname=PIL
96
96
  #
97
97
  [logger_paramiko]
98
98
  level=INFO
99
- handlers=consoleHandler
99
+ handlers=
100
100
  qualname=paramiko
101
- propagate=0
102
101
 
103
102
  [logger_paramiko_transport]
104
103
  level=INFO
105
- handlers=consoleHandler
104
+ handlers=
106
105
  qualname=paramiko.transport
107
- propagate=0
108
106
 
109
107
  #
110
108
  # uvicorn loggers
111
109
  #
112
110
  [logger_uvicorn]
113
111
  level=INFO
114
- handlers=consoleHandler
112
+ handlers=
115
113
  qualname=uvicorn
116
114
  [logger_uvicorn_access]
117
115
  level=WARN
118
- handlers=consoleHandler
116
+ handlers=
119
117
  qualname=uvicorn.access
120
118
  [logger_uvicorn_error]
121
119
  level=INFO
122
- handlers=consoleHandler
120
+ handlers=
123
121
  qualname=uvicorn.error
124
122
  [logger_multipart]
125
123
  level=INFO
126
- handlers=consoleHandler
124
+ handlers=
127
125
  qualname=multipart
128
126
 
129
127