ocrd 3.5.1__py3-none-any.whl → 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ocrd/cli/__init__.py +8 -6
  2. ocrd/cli/bashlib.py +8 -114
  3. ocrd/cli/network.py +0 -2
  4. ocrd/cli/ocrd_tool.py +26 -4
  5. ocrd/cli/process.py +1 -0
  6. ocrd/cli/resmgr.py +0 -1
  7. ocrd/cli/validate.py +32 -13
  8. ocrd/cli/workspace.py +125 -52
  9. ocrd/cli/zip.py +13 -4
  10. ocrd/decorators/__init__.py +28 -52
  11. ocrd/decorators/loglevel_option.py +4 -0
  12. ocrd/decorators/mets_find_options.py +2 -1
  13. ocrd/decorators/ocrd_cli_options.py +3 -7
  14. ocrd/decorators/parameter_option.py +12 -11
  15. ocrd/mets_server.py +11 -15
  16. ocrd/processor/base.py +88 -71
  17. ocrd/processor/builtin/dummy_processor.py +7 -4
  18. ocrd/processor/builtin/filter_processor.py +3 -2
  19. ocrd/processor/helpers.py +5 -6
  20. ocrd/processor/ocrd_page_result.py +7 -5
  21. ocrd/resolver.py +42 -32
  22. ocrd/task_sequence.py +11 -4
  23. ocrd/workspace.py +64 -54
  24. ocrd/workspace_backup.py +3 -0
  25. ocrd/workspace_bagger.py +15 -8
  26. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/METADATA +2 -8
  27. ocrd-3.7.0.dist-info/RECORD +123 -0
  28. ocrd_modelfactory/__init__.py +4 -2
  29. ocrd_models/constants.py +18 -1
  30. ocrd_models/ocrd_agent.py +1 -1
  31. ocrd_models/ocrd_exif.py +7 -3
  32. ocrd_models/ocrd_file.py +24 -19
  33. ocrd_models/ocrd_mets.py +90 -67
  34. ocrd_models/ocrd_page.py +17 -13
  35. ocrd_models/ocrd_xml_base.py +1 -0
  36. ocrd_models/report.py +2 -1
  37. ocrd_models/utils.py +4 -3
  38. ocrd_models/xpath_functions.py +3 -1
  39. ocrd_network/__init__.py +1 -2
  40. ocrd_network/cli/__init__.py +0 -2
  41. ocrd_network/cli/client.py +122 -50
  42. ocrd_network/cli/processing_server.py +1 -2
  43. ocrd_network/client.py +2 -2
  44. ocrd_network/client_utils.py +30 -13
  45. ocrd_network/constants.py +1 -6
  46. ocrd_network/database.py +3 -3
  47. ocrd_network/logging_utils.py +2 -7
  48. ocrd_network/models/__init__.py +0 -2
  49. ocrd_network/models/job.py +31 -33
  50. ocrd_network/models/messages.py +3 -2
  51. ocrd_network/models/workspace.py +5 -5
  52. ocrd_network/process_helpers.py +54 -17
  53. ocrd_network/processing_server.py +63 -114
  54. ocrd_network/processing_worker.py +6 -5
  55. ocrd_network/rabbitmq_utils/__init__.py +2 -0
  56. ocrd_network/rabbitmq_utils/helpers.py +24 -7
  57. ocrd_network/runtime_data/__init__.py +1 -2
  58. ocrd_network/runtime_data/deployer.py +12 -85
  59. ocrd_network/runtime_data/hosts.py +61 -130
  60. ocrd_network/runtime_data/network_agents.py +7 -31
  61. ocrd_network/runtime_data/network_services.py +1 -1
  62. ocrd_network/server_cache.py +1 -1
  63. ocrd_network/server_utils.py +13 -52
  64. ocrd_network/utils.py +1 -0
  65. ocrd_utils/__init__.py +4 -4
  66. ocrd_utils/config.py +86 -76
  67. ocrd_utils/deprecate.py +3 -0
  68. ocrd_utils/image.py +51 -23
  69. ocrd_utils/introspect.py +8 -3
  70. ocrd_utils/logging.py +15 -7
  71. ocrd_utils/os.py +17 -4
  72. ocrd_utils/str.py +32 -16
  73. ocrd_validators/json_validator.py +4 -1
  74. ocrd_validators/ocrd_tool_validator.py +2 -1
  75. ocrd_validators/ocrd_zip_validator.py +5 -4
  76. ocrd_validators/page_validator.py +21 -9
  77. ocrd_validators/parameter_validator.py +3 -2
  78. ocrd_validators/processing_server_config.schema.yml +1 -33
  79. ocrd_validators/resource_list_validator.py +3 -1
  80. ocrd_validators/workspace_validator.py +30 -20
  81. ocrd_validators/xsd_mets_validator.py +2 -1
  82. ocrd_validators/xsd_page_validator.py +2 -1
  83. ocrd_validators/xsd_validator.py +4 -2
  84. ocrd/cli/log.py +0 -51
  85. ocrd/lib.bash +0 -317
  86. ocrd-3.5.1.dist-info/RECORD +0 -128
  87. ocrd_network/cli/processor_server.py +0 -31
  88. ocrd_network/models/ocrd_tool.py +0 -12
  89. ocrd_network/processor_server.py +0 -255
  90. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/LICENSE +0 -0
  91. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/WHEEL +0 -0
  92. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/entry_points.txt +0 -0
  93. {ocrd-3.5.1.dist-info → ocrd-3.7.0.dist-info}/top_level.txt +0 -0
ocrd_utils/__init__.py CHANGED
@@ -8,11 +8,11 @@ Utility functions and constants usable in various circumstances.
8
8
  levels below page (i.e. region, line, word, glyph) between relative coordinates
9
9
  w.r.t. a corresponding image and absolute coordinates w.r.t. the top-level image.
10
10
  This includes rotation and offset correction, based on affine transformations.
11
- (Used by :py:class:`ocrd.workspace.Workspace` methods
12
- :py:meth:`ocrd.workspace.Workspace.image_from_page` and
11
+ (Used by :py:class:`ocrd.workspace.Workspace` methods
12
+ :py:meth:`ocrd.workspace.Workspace.image_from_page` and
13
13
  :py:meth:`ocrd.workspace.Workspace.image_from_segment`.)
14
14
 
15
- * :py:func:`rotate_coordinates`,
15
+ * :py:func:`rotate_coordinates`,
16
16
  :py:func:`scale_coordinates`,
17
17
  :py:func:`shift_coordinates`,
18
18
  :py:func:`transpose_coordinates`,
@@ -23,7 +23,7 @@ Utility functions and constants usable in various circumstances.
23
23
  used to pass down the coordinate system along with images (both invariably sharing
24
24
  the same operations context) when traversing the element hierarchy top to bottom.
25
25
  (Used by :py:class:`ocrd.workspace.Workspace` methods
26
- :py:meth:`ocrd.workspace.Workspace.image_from_page` and
26
+ :py:meth:`ocrd.workspace.Workspace.image_from_page` and
27
27
  :py:meth:`ocrd.workspace.Workspace.image_from_segment`.)
28
28
 
29
29
  * :py:func:`rotate_image`,
ocrd_utils/config.py CHANGED
@@ -16,9 +16,11 @@ from textwrap import fill, indent
16
16
  def _validator_boolean(val):
17
17
  return isinstance(val, bool) or str.lower(val) in ('true', 'false', '0', '1')
18
18
 
19
+
19
20
  def _parser_boolean(val):
20
21
  return bool(val) if isinstance(val, (int, bool)) else str.lower(val) in ('true', '1')
21
22
 
23
+
22
24
  class OcrdEnvVariable():
23
25
 
24
26
  def __init__(self, name, description, parser=str, validator=lambda _: True, default=[False, None]):
@@ -61,7 +63,7 @@ class OcrdEnvVariable():
61
63
  desc += ' '
62
64
  desc += f'(Default: "{default}")'
63
65
  ret = ''
64
- ret = f'{self.name}\n'
66
+ ret = f'{self.name}\n'
65
67
  if wrap_text:
66
68
  desc = fill(desc, width=50)
67
69
  if indent_text:
@@ -69,6 +71,7 @@ class OcrdEnvVariable():
69
71
  desc = indent(desc, ' ')
70
72
  return ret + desc
71
73
 
74
+
72
75
  class OcrdEnvConfig():
73
76
 
74
77
  def __init__(self):
@@ -83,7 +86,7 @@ class OcrdEnvConfig():
83
86
  return self._variables[name]
84
87
 
85
88
  def has_default(self, name):
86
- if not name in self._variables:
89
+ if name not in self._variables:
87
90
  raise ValueError(f"Unregistered env variable {name}")
88
91
  return self._variables[name].has_default
89
92
 
@@ -99,13 +102,13 @@ class OcrdEnvConfig():
99
102
  pass
100
103
 
101
104
  def describe(self, name, *args, **kwargs):
102
- if not name in self._variables:
105
+ if name not in self._variables:
103
106
  raise ValueError(f"Unregistered env variable {name}")
104
107
  return self._variables[name].describe(*args, **kwargs)
105
108
 
106
109
  def __getattr__(self, name):
107
110
  # will be called if name is not accessible (has not been added directly yet)
108
- if not name in self._variables:
111
+ if name not in self._variables:
109
112
  raise AttributeError(f"Unregistered env variable {name}")
110
113
  var_obj = self._variables[name]
111
114
  try:
@@ -120,39 +123,44 @@ class OcrdEnvConfig():
120
123
  return var_obj.parser(raw_value)
121
124
 
122
125
  def is_set(self, name):
123
- if not name in self._variables:
126
+ if name not in self._variables:
124
127
  raise ValueError(f"Unregistered env variable {name}")
125
128
  return name in environ
126
129
 
127
130
  def raw_value(self, name):
128
- if not name in self._variables:
131
+ if name not in self._variables:
129
132
  raise ValueError(f"Unregistered env variable {name}")
130
133
  return environ[name]
131
134
 
135
+
132
136
  config = OcrdEnvConfig()
133
137
 
134
138
  config.add('OCRD_METS_CACHING',
135
- description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
136
- validator=_validator_boolean,
137
- parser=_parser_boolean)
139
+ description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
140
+ validator=_validator_boolean,
141
+ parser=_parser_boolean)
138
142
 
139
143
  config.add('OCRD_MAX_PROCESSOR_CACHE',
140
- description="Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) for processing workers or processor servers.",
141
- parser=int,
142
- default=(True, 128))
144
+ description="Maximum number of processor instances (for each set of parameters) to be kept in memory "
145
+ "(including loaded models) for processing workers.",
146
+ parser=int,
147
+ default=(True, 128))
143
148
 
144
149
  config.add('OCRD_MAX_PARALLEL_PAGES',
145
- description="Maximum number of processor workers for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set >1, then a METS Server must be used for METS synchronisation.",
146
- parser=int,
147
- default=(True, 1))
150
+ description="Maximum number of processor workers for page-parallel processing (within "
151
+ "each Processor's selected page range, independent of the number of Processing "
152
+ "Workers). If set >1, then a METS Server must be used for METS synchronisation.",
153
+ parser=int,
154
+ default=(True, 1))
148
155
 
149
156
  config.add('OCRD_PROCESSING_PAGE_TIMEOUT',
150
- description="Timeout in seconds for processing a single page. If set >0, when exceeded, the same as OCRD_MISSING_OUTPUT applies.",
151
- parser=int,
152
- default=(True, 0))
157
+ description="Timeout in seconds for processing a single page. If set >0, when exceeded, "
158
+ "the same as OCRD_MISSING_OUTPUT applies.",
159
+ parser=int,
160
+ default=(True, 0))
153
161
 
154
162
  config.add("OCRD_PROFILE",
155
- description="""\
163
+ description="""\
156
164
  Whether to enable gathering runtime statistics
157
165
  on the `ocrd.profile` logger (comma-separated):
158
166
  \b
@@ -161,16 +169,18 @@ on the `ocrd.profile` logger (comma-separated):
161
169
  - `PSS`: also yields peak memory (proportional set size)
162
170
  \b
163
171
  """,
164
- validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
165
- default=(True, ''))
172
+ validator=lambda val: all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
173
+ default=(True, ''))
166
174
 
167
175
  config.add("OCRD_PROFILE_FILE",
168
- description="If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz")
176
+ description="If set, then the CPU profile is written to this file for later peruse "
177
+ "with a analysis tools like snakeviz")
169
178
 
170
179
  config.add("OCRD_DOWNLOAD_RETRIES",
171
- description="Number of times to retry failed attempts for downloads of resources or workspace files.",
172
- validator=int,
173
- parser=int)
180
+ description="Number of times to retry failed attempts for downloads of resources or workspace files.",
181
+ validator=int,
182
+ parser=int)
183
+
174
184
 
175
185
  def _ocrd_download_timeout_parser(val):
176
186
  timeout = val.split(',')
@@ -180,18 +190,19 @@ def _ocrd_download_timeout_parser(val):
180
190
  timeout = float(timeout[0])
181
191
  return timeout
182
192
 
193
+
183
194
  config.add("OCRD_DOWNLOAD_TIMEOUT",
184
- description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
185
- parser=_ocrd_download_timeout_parser)
195
+ description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
196
+ parser=_ocrd_download_timeout_parser)
186
197
 
187
198
  config.add("OCRD_DOWNLOAD_INPUT",
188
- description="Whether to download files not present locally during processing",
189
- default=(True, True),
190
- validator=_validator_boolean,
191
- parser=_parser_boolean)
199
+ description="Whether to download files not present locally during processing",
200
+ default=(True, True),
201
+ validator=_validator_boolean,
202
+ parser=_parser_boolean)
192
203
 
193
204
  config.add("OCRD_MISSING_INPUT",
194
- description="""\
205
+ description="""\
195
206
  How to deal with missing input files
196
207
  (for some fileGrp/pageId) during processing:
197
208
  \b
@@ -199,12 +210,12 @@ How to deal with missing input files
199
210
  - `ABORT`: throw :py:class:`.MissingInputFile`
200
211
  \b
201
212
  """,
202
- default=(True, 'SKIP'),
203
- validator=lambda val: val in ['SKIP', 'ABORT'],
204
- parser=str)
213
+ default=(True, 'SKIP'),
214
+ validator=lambda val: val in ['SKIP', 'ABORT'],
215
+ parser=str)
205
216
 
206
217
  config.add("OCRD_MISSING_OUTPUT",
207
- description="""\
218
+ description="""\
208
219
  How to deal with missing output files
209
220
  (for some fileGrp/pageId) during processing:
210
221
  \b
@@ -213,17 +224,18 @@ How to deal with missing output files
213
224
  - `ABORT`: re-throw whatever caused processing to fail
214
225
  \b
215
226
  """,
216
- default=(True, 'SKIP'),
217
- validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
218
- parser=str)
227
+ default=(True, 'SKIP'),
228
+ validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
229
+ parser=str)
219
230
 
220
231
  config.add("OCRD_MAX_MISSING_OUTPUTS",
221
- description="Maximal rate of skipped/fallback pages among all processed pages before aborting (decimal fraction, ignored if negative).",
222
- default=(True, 0.1),
223
- parser=float)
232
+ description="Maximal rate of skipped/fallback pages among all processed pages before aborting "
233
+ "(decimal fraction, ignored if negative).",
234
+ default=(True, 0.1),
235
+ parser=float)
224
236
 
225
237
  config.add("OCRD_EXISTING_OUTPUT",
226
- description="""\
238
+ description="""\
227
239
  How to deal with already existing output files
228
240
  (for some fileGrp/pageId) during processing:
229
241
  \b
@@ -232,13 +244,13 @@ How to deal with already existing output files
232
244
  - `ABORT`: re-throw :py:class:`FileExistsError`
233
245
  \b
234
246
  """,
235
- default=(True, 'SKIP'),
236
- validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
237
- parser=str)
247
+ default=(True, 'SKIP'),
248
+ validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
249
+ parser=str)
238
250
 
239
251
  config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
240
- description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
241
- default=(True, ''))
252
+ description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
253
+ default=(True, ''))
242
254
 
243
255
  config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP",
244
256
  description="How many seconds to sleep before trying again.",
@@ -251,27 +263,25 @@ config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT",
251
263
  default=(True, 3600))
252
264
 
253
265
  config.add("OCRD_NETWORK_SERVER_ADDR_WORKFLOW",
254
- description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).",
255
- default=(True, ''))
266
+ description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).",
267
+ default=(True, ''))
256
268
 
257
269
  config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
258
- description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).",
259
- default=(True, ''))
270
+ description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).",
271
+ default=(True, ''))
260
272
 
261
273
  config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS",
262
274
  description="Number of attempts for a RabbitMQ client to connect before failing.",
263
275
  parser=int,
264
276
  default=(True, 3))
265
277
 
266
- config.add(
267
- name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
268
- description="""
269
- Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
270
- proposed by broker. Use 0 to deactivate heartbeat.
271
- """,
272
- parser=int,
273
- default=(True, 0)
274
- )
278
+ config.add(name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
279
+ description="""
280
+ Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
281
+ proposed by broker. Use 0 to deactivate heartbeat.
282
+ """,
283
+ parser=int,
284
+ default=(True, 0))
275
285
 
276
286
  config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
277
287
  description="The root directory where all mets server related socket files are created",
@@ -296,24 +306,24 @@ except PermissionError:
296
306
  pass
297
307
 
298
308
  config.add("HOME",
299
- description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
300
- # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html",
301
- validator=lambda val: Path(val).is_dir(),
302
- parser=lambda val: Path(val),
303
- default=(True, lambda: Path.home()))
309
+ description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
310
+ # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html",
311
+ validator=lambda val: Path(val).is_dir(),
312
+ parser=lambda val: Path(val),
313
+ default=(True, lambda: Path.home()))
304
314
 
305
315
  config.add("XDG_DATA_HOME",
306
- description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)",
307
- parser=lambda val: Path(val),
308
- default=(True, lambda: Path(config.HOME, '.local/share')))
316
+ description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)",
317
+ parser=lambda val: Path(val),
318
+ default=(True, lambda: Path(config.HOME, '.local/share')))
309
319
 
310
320
  config.add("XDG_CONFIG_HOME",
311
- description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)",
312
- parser=lambda val: Path(val),
313
- default=(True, lambda: Path(config.HOME, '.config')))
321
+ description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)",
322
+ parser=lambda val: Path(val),
323
+ default=(True, lambda: Path(config.HOME, '.config')))
314
324
 
315
325
  config.add("OCRD_LOGGING_DEBUG",
316
- description="Print information about the logging setup to STDERR",
317
- default=(True, False),
318
- validator=_validator_boolean,
319
- parser=_parser_boolean)
326
+ description="Print information about the logging setup to STDERR",
327
+ default=(True, False),
328
+ validator=_validator_boolean,
329
+ parser=_parser_boolean)
ocrd_utils/deprecate.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import functools
2
2
  import warnings
3
3
 
4
+
4
5
  def deprecation_warning(msg, stacklevel=2):
5
6
  warnings.warn(msg, DeprecationWarning, stacklevel)
6
7
 
8
+
7
9
  def deprecated_alias(**aliases):
8
10
  """
9
11
  Deprecate a kwarg in favor of another kwarg
@@ -16,6 +18,7 @@ def deprecated_alias(**aliases):
16
18
  return wrapper
17
19
  return deco
18
20
 
21
+
19
22
  def rename_kwargs(func_name, kwargs, aliases):
20
23
  """
21
24
  https://stackoverflow.com/questions/49802412/how-to-implement-deprecation-in-python-with-argument-alias