ocrd 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocrd/cli/__init__.py +6 -2
- ocrd/cli/bashlib.py +7 -2
- ocrd/cli/log.py +7 -2
- ocrd/cli/network.py +0 -2
- ocrd/cli/ocrd_tool.py +26 -4
- ocrd/cli/process.py +1 -0
- ocrd/cli/resmgr.py +0 -1
- ocrd/cli/validate.py +32 -13
- ocrd/cli/workspace.py +125 -52
- ocrd/cli/zip.py +13 -4
- ocrd/decorators/__init__.py +28 -52
- ocrd/decorators/loglevel_option.py +4 -0
- ocrd/decorators/mets_find_options.py +2 -1
- ocrd/decorators/ocrd_cli_options.py +3 -7
- ocrd/decorators/parameter_option.py +12 -11
- ocrd/lib.bash +6 -13
- ocrd/mets_server.py +6 -10
- ocrd/processor/base.py +88 -71
- ocrd/processor/builtin/dummy_processor.py +7 -4
- ocrd/processor/builtin/filter_processor.py +3 -2
- ocrd/processor/helpers.py +5 -6
- ocrd/processor/ocrd_page_result.py +7 -5
- ocrd/resolver.py +42 -32
- ocrd/task_sequence.py +11 -4
- ocrd/workspace.py +64 -54
- ocrd/workspace_backup.py +3 -0
- ocrd/workspace_bagger.py +15 -8
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/METADATA +3 -2
- ocrd-3.6.0.dist-info/RECORD +125 -0
- ocrd_modelfactory/__init__.py +4 -2
- ocrd_models/constants.py +18 -1
- ocrd_models/ocrd_agent.py +1 -1
- ocrd_models/ocrd_exif.py +7 -3
- ocrd_models/ocrd_file.py +24 -19
- ocrd_models/ocrd_mets.py +90 -67
- ocrd_models/ocrd_page.py +17 -13
- ocrd_models/ocrd_xml_base.py +1 -0
- ocrd_models/report.py +2 -1
- ocrd_models/utils.py +4 -3
- ocrd_models/xpath_functions.py +3 -1
- ocrd_network/__init__.py +1 -2
- ocrd_network/cli/__init__.py +0 -2
- ocrd_network/cli/client.py +122 -50
- ocrd_network/cli/processing_server.py +1 -2
- ocrd_network/client.py +2 -2
- ocrd_network/client_utils.py +30 -13
- ocrd_network/constants.py +1 -6
- ocrd_network/database.py +3 -3
- ocrd_network/logging_utils.py +2 -7
- ocrd_network/models/__init__.py +0 -2
- ocrd_network/models/job.py +2 -5
- ocrd_network/models/workspace.py +1 -1
- ocrd_network/process_helpers.py +54 -17
- ocrd_network/processing_server.py +63 -114
- ocrd_network/processing_worker.py +6 -5
- ocrd_network/rabbitmq_utils/__init__.py +2 -0
- ocrd_network/rabbitmq_utils/helpers.py +24 -7
- ocrd_network/runtime_data/__init__.py +1 -2
- ocrd_network/runtime_data/deployer.py +12 -85
- ocrd_network/runtime_data/hosts.py +61 -130
- ocrd_network/runtime_data/network_agents.py +7 -31
- ocrd_network/runtime_data/network_services.py +1 -1
- ocrd_network/server_cache.py +1 -1
- ocrd_network/server_utils.py +13 -52
- ocrd_network/utils.py +1 -0
- ocrd_utils/__init__.py +4 -4
- ocrd_utils/config.py +86 -76
- ocrd_utils/deprecate.py +3 -0
- ocrd_utils/image.py +51 -23
- ocrd_utils/introspect.py +8 -3
- ocrd_utils/logging.py +12 -7
- ocrd_utils/os.py +16 -3
- ocrd_utils/str.py +32 -16
- ocrd_validators/json_validator.py +4 -1
- ocrd_validators/ocrd_tool_validator.py +2 -1
- ocrd_validators/ocrd_zip_validator.py +5 -4
- ocrd_validators/page_validator.py +21 -9
- ocrd_validators/parameter_validator.py +3 -2
- ocrd_validators/processing_server_config.schema.yml +1 -33
- ocrd_validators/resource_list_validator.py +3 -1
- ocrd_validators/workspace_validator.py +30 -20
- ocrd_validators/xsd_mets_validator.py +2 -1
- ocrd_validators/xsd_page_validator.py +2 -1
- ocrd_validators/xsd_validator.py +4 -2
- ocrd-3.5.0.dist-info/RECORD +0 -128
- ocrd_network/cli/processor_server.py +0 -31
- ocrd_network/models/ocrd_tool.py +0 -12
- ocrd_network/processor_server.py +0 -255
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/LICENSE +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/WHEEL +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/entry_points.txt +0 -0
- {ocrd-3.5.0.dist-info → ocrd-3.6.0.dist-info}/top_level.txt +0 -0
ocrd_utils/image.py
CHANGED
|
@@ -39,14 +39,15 @@ __all__ = [
|
|
|
39
39
|
'xywh_from_polygon',
|
|
40
40
|
]
|
|
41
41
|
|
|
42
|
+
|
|
42
43
|
def adjust_canvas_to_rotation(size, angle):
|
|
43
44
|
"""Calculate the enlarged image size after rotation.
|
|
44
|
-
|
|
45
|
+
|
|
45
46
|
Given a numpy array ``size`` of an original canvas (width and height),
|
|
46
47
|
and a rotation angle in degrees counter-clockwise ``angle``,
|
|
47
48
|
calculate the new size which is necessary to encompass the full
|
|
48
49
|
image after rotation.
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
Return a numpy array of the enlarged width and height.
|
|
51
52
|
"""
|
|
52
53
|
angle = np.deg2rad(angle)
|
|
@@ -56,13 +57,14 @@ def adjust_canvas_to_rotation(size, angle):
|
|
|
56
57
|
[sin, cos]]),
|
|
57
58
|
np.array(size))
|
|
58
59
|
|
|
60
|
+
|
|
59
61
|
def adjust_canvas_to_transposition(size, method):
|
|
60
62
|
"""Calculate the flipped image size after transposition.
|
|
61
|
-
|
|
63
|
+
|
|
62
64
|
Given a numpy array ``size`` of an original canvas (width and height),
|
|
63
65
|
and a transposition mode ``method`` (see ``transpose_image``),
|
|
64
66
|
calculate the new size after transposition.
|
|
65
|
-
|
|
67
|
+
|
|
66
68
|
Return a numpy array of the enlarged width and height.
|
|
67
69
|
"""
|
|
68
70
|
if method in [Image.Transpose.ROTATE_90,
|
|
@@ -72,11 +74,13 @@ def adjust_canvas_to_transposition(size, method):
|
|
|
72
74
|
size = size[::-1]
|
|
73
75
|
return size
|
|
74
76
|
|
|
77
|
+
|
|
75
78
|
def bbox_from_points(points):
|
|
76
79
|
"""Construct a numeric list representing a bounding box from polygon coordinates in page representation."""
|
|
77
80
|
xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')]
|
|
78
81
|
return bbox_from_polygon(xys)
|
|
79
82
|
|
|
83
|
+
|
|
80
84
|
def bbox_from_polygon(polygon):
|
|
81
85
|
"""Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation."""
|
|
82
86
|
minx = sys.maxsize
|
|
@@ -94,6 +98,7 @@ def bbox_from_polygon(polygon):
|
|
|
94
98
|
maxy = xy[1]
|
|
95
99
|
return minx, miny, maxx, maxy
|
|
96
100
|
|
|
101
|
+
|
|
97
102
|
def bbox_from_xywh(xywh):
|
|
98
103
|
"""Convert a bounding box from a numeric dict to a numeric list representation."""
|
|
99
104
|
return (
|
|
@@ -103,23 +108,24 @@ def bbox_from_xywh(xywh):
|
|
|
103
108
|
xywh['y'] + xywh['h']
|
|
104
109
|
)
|
|
105
110
|
|
|
111
|
+
|
|
106
112
|
def coordinates_of_segment(segment, parent_image, parent_coords):
|
|
107
113
|
"""Extract the coordinates of a PAGE segment element relative to its parent.
|
|
108
114
|
|
|
115
|
+
\b
|
|
109
116
|
Given...
|
|
110
|
-
|
|
111
117
|
- ``segment``, a PAGE segment object in absolute coordinates
|
|
112
118
|
(i.e. RegionType / TextLineType / WordType / GlyphType), and
|
|
113
119
|
- ``parent_image``, the PIL.Image of its corresponding parent object
|
|
114
120
|
(i.e. PageType / RegionType / TextLineType / WordType), (not used),
|
|
115
121
|
along with
|
|
116
122
|
- ``parent_coords``, its corresponding affine transformation,
|
|
117
|
-
|
|
118
123
|
...calculate the relative coordinates of the segment within the image.
|
|
119
124
|
|
|
120
125
|
That is, apply the given transform to the points annotated in ``segment``.
|
|
121
126
|
The transform encodes (recursively):
|
|
122
127
|
|
|
128
|
+
\b
|
|
123
129
|
1. Whenever ``parent_image`` or any of its parents was cropped,
|
|
124
130
|
all points must be shifted by the offset
|
|
125
131
|
(i.e. coordinate system gets translated by the upper left).
|
|
@@ -138,6 +144,7 @@ def coordinates_of_segment(segment, parent_image, parent_coords):
|
|
|
138
144
|
polygon = transform_coordinates(polygon, parent_coords['transform'])
|
|
139
145
|
return np.round(polygon).astype(np.int32)
|
|
140
146
|
|
|
147
|
+
|
|
141
148
|
def polygon_from_points(points):
|
|
142
149
|
"""
|
|
143
150
|
Convert polygon coordinates in page representation to polygon coordinates in numeric list representation.
|
|
@@ -152,17 +159,17 @@ def polygon_from_points(points):
|
|
|
152
159
|
def coordinates_for_segment(polygon, parent_image, parent_coords):
|
|
153
160
|
"""Convert relative coordinates to absolute.
|
|
154
161
|
|
|
162
|
+
\b
|
|
155
163
|
Given...
|
|
156
|
-
|
|
157
164
|
- ``polygon``, a numpy array of points relative to
|
|
158
165
|
- ``parent_image``, a PIL.Image (not used), along with
|
|
159
166
|
- ``parent_coords``, its corresponding affine transformation,
|
|
160
|
-
|
|
161
167
|
...calculate the absolute coordinates within the page.
|
|
162
|
-
|
|
168
|
+
|
|
163
169
|
That is, apply the given transform inversely to ``polygon``
|
|
164
170
|
The transform encodes (recursively):
|
|
165
171
|
|
|
172
|
+
\b
|
|
166
173
|
1. Whenever ``parent_image`` or any of its parents was cropped,
|
|
167
174
|
all points must be shifted by the offset in opposite direction
|
|
168
175
|
(i.e. coordinate system gets translated by the upper left).
|
|
@@ -176,12 +183,13 @@ def coordinates_for_segment(polygon, parent_image, parent_coords):
|
|
|
176
183
|
|
|
177
184
|
Return the rounded numpy array of the resulting polygon.
|
|
178
185
|
"""
|
|
179
|
-
polygon = np.array(polygon, dtype=np.float32)
|
|
186
|
+
polygon = np.array(polygon, dtype=np.float32) # avoid implicit type cast problems
|
|
180
187
|
# apply inverse of affine transform:
|
|
181
188
|
inv_transform = np.linalg.inv(parent_coords['transform'])
|
|
182
189
|
polygon = transform_coordinates(polygon, inv_transform)
|
|
183
190
|
return np.round(polygon).astype(np.int32)
|
|
184
191
|
|
|
192
|
+
|
|
185
193
|
def polygon_mask(image, coordinates):
|
|
186
194
|
""""Create a mask image of a polygon.
|
|
187
195
|
|
|
@@ -197,6 +205,7 @@ def polygon_mask(image, coordinates):
|
|
|
197
205
|
ImageDraw.Draw(mask).polygon(coordinates, outline=0, fill=255)
|
|
198
206
|
return mask
|
|
199
207
|
|
|
208
|
+
|
|
200
209
|
def rotate_coordinates(transform, angle, orig=np.array([0, 0])):
|
|
201
210
|
"""Compose an affine coordinate transformation with a passive rotation.
|
|
202
211
|
|
|
@@ -209,7 +218,7 @@ def rotate_coordinates(transform, angle, orig=np.array([0, 0])):
|
|
|
209
218
|
by pure rotation, and subsequent translation back. However, since
|
|
210
219
|
rotation necessarily increases the bounding box, and thus image size,
|
|
211
220
|
do not translate back the same amount, but to the enlarged offset.)
|
|
212
|
-
|
|
221
|
+
|
|
213
222
|
Return a numpy array of the resulting affine transformation matrix.
|
|
214
223
|
"""
|
|
215
224
|
LOG = getLogger('ocrd.utils.coords.rotate_coordinates')
|
|
@@ -234,6 +243,7 @@ def rotate_coordinates(transform, angle, orig=np.array([0, 0])):
|
|
|
234
243
|
adjust_canvas_to_rotation(orig, angle))
|
|
235
244
|
return transform
|
|
236
245
|
|
|
246
|
+
|
|
237
247
|
def rotate_image(image, angle, fill='background', transparency=False):
|
|
238
248
|
""""Rotate an image, enlarging and filling with background.
|
|
239
249
|
|
|
@@ -242,6 +252,7 @@ def rotate_image(image, angle, fill='background', transparency=False):
|
|
|
242
252
|
size at the margins accordingly, and filling everything outside
|
|
243
253
|
the original image according to ``fill``:
|
|
244
254
|
|
|
255
|
+
\b
|
|
245
256
|
- if ``background`` (the default),
|
|
246
257
|
then use the median color of the image;
|
|
247
258
|
- otherwise use the given color, e.g. ``'white'`` or (255,255,255).
|
|
@@ -267,7 +278,7 @@ def rotate_image(image, angle, fill='background', transparency=False):
|
|
|
267
278
|
if len(background.bands) > 1:
|
|
268
279
|
background = background.median
|
|
269
280
|
if image.mode in ['RGBA', 'LA']:
|
|
270
|
-
background[-1] = 0
|
|
281
|
+
background[-1] = 0 # fully transparent
|
|
271
282
|
background = tuple(background)
|
|
272
283
|
else:
|
|
273
284
|
background = background.median[0]
|
|
@@ -295,7 +306,7 @@ def shift_coordinates(transform, offset):
|
|
|
295
306
|
``offset`` of the translation vector, calculate the affine
|
|
296
307
|
coordinate transform corresponding to the composition of both
|
|
297
308
|
transformations.
|
|
298
|
-
|
|
309
|
+
|
|
299
310
|
Return a numpy array of the resulting affine transformation matrix.
|
|
300
311
|
"""
|
|
301
312
|
LOG = getLogger('ocrd.utils.coords.shift_coordinates')
|
|
@@ -305,6 +316,7 @@ def shift_coordinates(transform, offset):
|
|
|
305
316
|
shift[1, 2] = offset[1]
|
|
306
317
|
return np.dot(shift, transform)
|
|
307
318
|
|
|
319
|
+
|
|
308
320
|
def scale_coordinates(transform, factors):
|
|
309
321
|
"""Compose an affine coordinate transformation with a proportional scaling.
|
|
310
322
|
Given a numpy array ``transform`` of an existing transformation
|
|
@@ -312,7 +324,7 @@ def scale_coordinates(transform, factors):
|
|
|
312
324
|
``factors`` of the scaling factors, calculate the affine
|
|
313
325
|
coordinate transform corresponding to the composition of both
|
|
314
326
|
transformations.
|
|
315
|
-
|
|
327
|
+
|
|
316
328
|
Return a numpy array of the resulting affine transformation matrix.
|
|
317
329
|
"""
|
|
318
330
|
LOG = getLogger('ocrd.utils.coords.scale_coordinates')
|
|
@@ -322,6 +334,7 @@ def scale_coordinates(transform, factors):
|
|
|
322
334
|
scale[1, 1] = factors[1]
|
|
323
335
|
return np.dot(scale, transform)
|
|
324
336
|
|
|
337
|
+
|
|
325
338
|
def transform_coordinates(polygon, transform=None):
|
|
326
339
|
"""Apply an affine transformation to a set of points.
|
|
327
340
|
Augment the 2d numpy array of points ``polygon`` with a an extra
|
|
@@ -331,23 +344,24 @@ def transform_coordinates(polygon, transform=None):
|
|
|
331
344
|
"""
|
|
332
345
|
if transform is None:
|
|
333
346
|
transform = np.eye(3)
|
|
334
|
-
polygon = np.insert(polygon, 2, 1, axis=1)
|
|
347
|
+
polygon = np.insert(polygon, 2, 1, axis=1) # make 3d homogeneous coordinates
|
|
335
348
|
polygon = np.dot(transform, polygon.T).T
|
|
336
349
|
# ones = polygon[:,2]
|
|
337
350
|
# assert np.all(np.array_equal(ones, np.clip(ones, 1 - 1e-2, 1 + 1e-2))), \
|
|
338
351
|
# 'affine transform failed' # should never happen
|
|
339
|
-
polygon = np.delete(polygon, 2, axis=1)
|
|
352
|
+
polygon = np.delete(polygon, 2, axis=1) # remove z coordinate again
|
|
340
353
|
return polygon
|
|
341
354
|
|
|
355
|
+
|
|
342
356
|
def transpose_coordinates(transform, method, orig=np.array([0, 0])):
|
|
343
357
|
""""Compose an affine coordinate transformation with a transposition (i.e. flip or rotate in 90° multiples).
|
|
344
358
|
|
|
359
|
+
\b
|
|
345
360
|
Given a numpy array ``transform`` of an existing transformation
|
|
346
361
|
matrix in homogeneous (3d) coordinates, a transposition mode ``method``,
|
|
347
362
|
as well as a numpy array ``orig`` of the center of the image,
|
|
348
363
|
calculate the affine coordinate transform corresponding to the composition
|
|
349
364
|
of both transformations, which is respectively:
|
|
350
|
-
|
|
351
365
|
- ``PIL.Image.Transpose.FLIP_LEFT_RIGHT``:
|
|
352
366
|
entails translation to the center, followed by pure reflection
|
|
353
367
|
about the y-axis, and subsequent translation back
|
|
@@ -395,7 +409,7 @@ def transpose_coordinates(transform, method, orig=np.array([0, 0])):
|
|
|
395
409
|
Image.Transpose.ROTATE_270: [rot90, reflx, refly],
|
|
396
410
|
Image.Transpose.TRANSPOSE: [rot90, reflx],
|
|
397
411
|
Image.Transpose.TRANSVERSE: [rot90, refly]
|
|
398
|
-
}.get(method)
|
|
412
|
+
}.get(method) # no default
|
|
399
413
|
for operation in operations:
|
|
400
414
|
transform = np.dot(operation, transform)
|
|
401
415
|
transform = shift_coordinates(
|
|
@@ -405,12 +419,13 @@ def transpose_coordinates(transform, method, orig=np.array([0, 0])):
|
|
|
405
419
|
adjust_canvas_to_transposition(orig, method))
|
|
406
420
|
return transform
|
|
407
421
|
|
|
422
|
+
|
|
408
423
|
def transpose_image(image, method):
|
|
409
424
|
""""Transpose (i.e. flip or rotate in 90° multiples) an image.
|
|
410
425
|
|
|
426
|
+
\b
|
|
411
427
|
Given a PIL.Image ``image`` and a transposition mode ``method``,
|
|
412
428
|
apply the respective operation:
|
|
413
|
-
|
|
414
429
|
- ``PIL.Image.Transpose.FLIP_LEFT_RIGHT``:
|
|
415
430
|
all pixels get mirrored at half the width of the image
|
|
416
431
|
- ``PIL.Image.Transpose.FLIP_TOP_BOTTOM``:
|
|
@@ -438,13 +453,14 @@ def transpose_image(image, method):
|
|
|
438
453
|
columns become rows (but counted from the bottom),
|
|
439
454
|
i.e. all pixels get mirrored at the opposite diagonal;
|
|
440
455
|
width becomes height and vice versa
|
|
441
|
-
|
|
456
|
+
|
|
442
457
|
Return a new PIL.Image.
|
|
443
458
|
"""
|
|
444
459
|
LOG = getLogger('ocrd.utils.transpose_image')
|
|
445
460
|
LOG.debug('transposing image with %s', membername(Image, method))
|
|
446
461
|
return image.transpose(method)
|
|
447
462
|
|
|
463
|
+
|
|
448
464
|
def crop_image(image, box=None):
|
|
449
465
|
""""Crop an image to a rectangle, filling with background.
|
|
450
466
|
|
|
@@ -475,10 +491,11 @@ def crop_image(image, box=None):
|
|
|
475
491
|
else:
|
|
476
492
|
background = background.median[0]
|
|
477
493
|
new_image = Image.new(image.mode, (xywh['w'], xywh['h']),
|
|
478
|
-
background)
|
|
494
|
+
background) # or 'white'
|
|
479
495
|
new_image.paste(image, (-xywh['x'], -xywh['y']))
|
|
480
496
|
return new_image
|
|
481
497
|
|
|
498
|
+
|
|
482
499
|
def image_from_polygon(image, polygon, fill='background', transparency=False):
|
|
483
500
|
""""Mask an image with a polygon.
|
|
484
501
|
|
|
@@ -497,7 +514,7 @@ def image_from_polygon(image, polygon, fill='background', transparency=False):
|
|
|
497
514
|
Images which already have an alpha channel will have it shrunk
|
|
498
515
|
from the polygon mask (i.e. everything outside the polygon will
|
|
499
516
|
be transparent, in addition to existing transparent pixels).
|
|
500
|
-
|
|
517
|
+
|
|
501
518
|
Return a new PIL.Image.
|
|
502
519
|
"""
|
|
503
520
|
if fill == 'none' or fill is None:
|
|
@@ -521,22 +538,25 @@ def image_from_polygon(image, polygon, fill='background', transparency=False):
|
|
|
521
538
|
# which can be inconsistent on unbinarized images):
|
|
522
539
|
if image.mode in ['RGBA', 'LA']:
|
|
523
540
|
# ensure transparency maximizes (i.e. parent mask AND mask):
|
|
524
|
-
mask = ImageChops.darker(mask, image.getchannel('A'))
|
|
541
|
+
mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque
|
|
525
542
|
new_image.putalpha(mask)
|
|
526
543
|
elif transparency and image.mode in ['RGB', 'L']:
|
|
527
544
|
# introduce transparency:
|
|
528
545
|
new_image.putalpha(mask)
|
|
529
546
|
return new_image
|
|
530
547
|
|
|
548
|
+
|
|
531
549
|
def points_from_bbox(minx, miny, maxx, maxy):
|
|
532
550
|
"""Construct polygon coordinates in page representation from a numeric list representing a bounding box."""
|
|
533
551
|
return "%i,%i %i,%i %i,%i %i,%i" % (
|
|
534
552
|
minx, miny, maxx, miny, maxx, maxy, minx, maxy)
|
|
535
553
|
|
|
554
|
+
|
|
536
555
|
def points_from_polygon(polygon):
|
|
537
556
|
"""Convert polygon coordinates from a numeric list representation to a page representation."""
|
|
538
557
|
return " ".join("%i,%i" % (x, y) for x, y in polygon)
|
|
539
558
|
|
|
559
|
+
|
|
540
560
|
def points_from_xywh(box):
|
|
541
561
|
"""
|
|
542
562
|
Construct polygon coordinates in page representation from numeric dict representing a bounding box.
|
|
@@ -549,6 +569,8 @@ def points_from_xywh(box):
|
|
|
549
569
|
x + w, y + h,
|
|
550
570
|
x, y + h
|
|
551
571
|
)
|
|
572
|
+
|
|
573
|
+
|
|
552
574
|
def points_from_y0x0y1x1(yxyx):
|
|
553
575
|
"""
|
|
554
576
|
Construct a polygon representation from a rectangle described as a list [y0, x0, y1, x1]
|
|
@@ -564,6 +586,7 @@ def points_from_y0x0y1x1(yxyx):
|
|
|
564
586
|
x0, y1
|
|
565
587
|
)
|
|
566
588
|
|
|
589
|
+
|
|
567
590
|
def points_from_x0y0x1y1(xyxy):
|
|
568
591
|
"""
|
|
569
592
|
Construct a polygon representation from a rectangle described as a list [x0, y0, x1, y1]
|
|
@@ -579,10 +602,12 @@ def points_from_x0y0x1y1(xyxy):
|
|
|
579
602
|
x0, y1
|
|
580
603
|
)
|
|
581
604
|
|
|
605
|
+
|
|
582
606
|
def polygon_from_bbox(minx, miny, maxx, maxy):
|
|
583
607
|
"""Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box."""
|
|
584
608
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]]
|
|
585
609
|
|
|
610
|
+
|
|
586
611
|
def polygon_from_x0y0x1y1(x0y0x1y1):
|
|
587
612
|
"""Construct polygon coordinates in numeric list representation from a string list representing a bounding box."""
|
|
588
613
|
minx = int(x0y0x1y1[0])
|
|
@@ -591,10 +616,12 @@ def polygon_from_x0y0x1y1(x0y0x1y1):
|
|
|
591
616
|
maxy = int(x0y0x1y1[3])
|
|
592
617
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]]
|
|
593
618
|
|
|
619
|
+
|
|
594
620
|
def polygon_from_xywh(xywh):
|
|
595
621
|
"""Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box."""
|
|
596
622
|
return polygon_from_bbox(*bbox_from_xywh(xywh))
|
|
597
623
|
|
|
624
|
+
|
|
598
625
|
def xywh_from_bbox(minx, miny, maxx, maxy):
|
|
599
626
|
"""Convert a bounding box from a numeric list to a numeric dict representation."""
|
|
600
627
|
return {
|
|
@@ -604,6 +631,7 @@ def xywh_from_bbox(minx, miny, maxx, maxy):
|
|
|
604
631
|
'h': maxy - miny,
|
|
605
632
|
}
|
|
606
633
|
|
|
634
|
+
|
|
607
635
|
def xywh_from_points(points):
|
|
608
636
|
"""
|
|
609
637
|
Construct a numeric dict representing a bounding box from polygon coordinates in page representation.
|
ocrd_utils/introspect.py
CHANGED
|
@@ -23,6 +23,7 @@ else:
|
|
|
23
23
|
file_manager = ExitStack()
|
|
24
24
|
atexit.register(file_manager.close)
|
|
25
25
|
|
|
26
|
+
|
|
26
27
|
# Taken from https://github.com/OCR-D/core/pull/884
|
|
27
28
|
def freeze_args(func):
|
|
28
29
|
"""
|
|
@@ -41,6 +42,7 @@ def membername(class_, val):
|
|
|
41
42
|
"""Convert a member variable/constant into a member name string."""
|
|
42
43
|
return next((k for k, v in class_.__dict__.items() if v == val), str(val))
|
|
43
44
|
|
|
45
|
+
|
|
44
46
|
def set_json_key_value_overrides(obj, *kvpairs):
|
|
45
47
|
for kv in kvpairs:
|
|
46
48
|
k, v = kv
|
|
@@ -50,13 +52,16 @@ def set_json_key_value_overrides(obj, *kvpairs):
|
|
|
50
52
|
obj[k] = v
|
|
51
53
|
return obj
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
|
|
56
|
+
def resource_filename(pkg: str, fname: str) -> Path:
|
|
54
57
|
ref = importlib_resources.files(pkg) / fname
|
|
55
58
|
return file_manager.enter_context(importlib_resources.as_file(ref))
|
|
56
59
|
|
|
57
|
-
|
|
60
|
+
|
|
61
|
+
def resource_string(pkg: str, fname: str) -> str:
|
|
58
62
|
with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f:
|
|
59
63
|
return f.read()
|
|
60
64
|
|
|
61
|
-
|
|
65
|
+
|
|
66
|
+
def dist_version(module: str) -> str:
|
|
62
67
|
return importlib_metadata.version(module)
|
ocrd_utils/logging.py
CHANGED
|
@@ -73,12 +73,13 @@ _ocrdLevel2pythonLevel = {
|
|
|
73
73
|
'FATAL': 'ERROR',
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
|
|
76
77
|
def tf_disable_interactive_logs():
|
|
77
78
|
try:
|
|
78
|
-
from os import environ
|
|
79
|
+
from os import environ # pylint: disable=import-outside-toplevel
|
|
79
80
|
# This env variable must be set before importing from Keras
|
|
80
81
|
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
81
|
-
from tensorflow.keras.utils import disable_interactive_logging
|
|
82
|
+
from tensorflow.keras.utils import disable_interactive_logging # pylint: disable=import-outside-toplevel
|
|
82
83
|
# Enabled interactive logging throws an exception
|
|
83
84
|
# due to a call of sys.stdout.flush()
|
|
84
85
|
disable_interactive_logging()
|
|
@@ -86,6 +87,7 @@ def tf_disable_interactive_logs():
|
|
|
86
87
|
# Nothing should be handled here if TF is not available
|
|
87
88
|
pass
|
|
88
89
|
|
|
90
|
+
|
|
89
91
|
def getLevelName(lvl):
|
|
90
92
|
"""
|
|
91
93
|
Get (string) python logging level for (string) spec-defined log level name.
|
|
@@ -93,6 +95,7 @@ def getLevelName(lvl):
|
|
|
93
95
|
lvl = _ocrdLevel2pythonLevel.get(lvl, lvl)
|
|
94
96
|
return logging.getLevelName(lvl)
|
|
95
97
|
|
|
98
|
+
|
|
96
99
|
def getLogger(*args, **kwargs):
|
|
97
100
|
"""
|
|
98
101
|
Wrapper around ``logging.getLogger`` that calls :py:func:`initLogging` if
|
|
@@ -101,6 +104,7 @@ def getLogger(*args, **kwargs):
|
|
|
101
104
|
logger = logging.getLogger(*args, **kwargs)
|
|
102
105
|
return logger
|
|
103
106
|
|
|
107
|
+
|
|
104
108
|
def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG):
|
|
105
109
|
"""
|
|
106
110
|
Override the output log level of the handlers attached to the ``ocrd`` logger.
|
|
@@ -119,6 +123,7 @@ def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG):
|
|
|
119
123
|
print(f'[LOGGING] Overriding {logger_name} log level to {lvl}', file=sys.stderr)
|
|
120
124
|
logging.getLogger(logger_name).setLevel(lvl)
|
|
121
125
|
|
|
126
|
+
|
|
122
127
|
def get_logging_config_files():
|
|
123
128
|
"""
|
|
124
129
|
Return a list of all ``ocrd_logging.conf`` files found in CWD, HOME or /etc.
|
|
@@ -128,9 +133,9 @@ def get_logging_config_files():
|
|
|
128
133
|
Path.home(),
|
|
129
134
|
Path('/etc'),
|
|
130
135
|
]
|
|
131
|
-
return [
|
|
132
|
-
|
|
133
|
-
|
|
136
|
+
return [file for file in [path / 'ocrd_logging.conf' for path in CONFIG_PATHS]
|
|
137
|
+
if file.exists()]
|
|
138
|
+
|
|
134
139
|
|
|
135
140
|
def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_LOGGING_DEBUG):
|
|
136
141
|
"""
|
|
@@ -189,6 +194,7 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
|
|
|
189
194
|
logging.getLogger(logger_name).setLevel(logger_level)
|
|
190
195
|
_initialized_flag = True
|
|
191
196
|
|
|
197
|
+
|
|
192
198
|
def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
|
|
193
199
|
"""
|
|
194
200
|
Disables all logging of the ``ocrd`` logger and descendants
|
|
@@ -196,7 +202,7 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
|
|
|
196
202
|
Keyword Args:
|
|
197
203
|
- silent (bool, True): Whether to log logging behavior by printing to stderr
|
|
198
204
|
"""
|
|
199
|
-
global _initialized_flag
|
|
205
|
+
global _initialized_flag # pylint: disable=global-statement
|
|
200
206
|
if _initialized_flag and not silent:
|
|
201
207
|
print("[LOGGING] Disabling logging", file=sys.stderr)
|
|
202
208
|
_initialized_flag = False
|
|
@@ -212,4 +218,3 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
|
|
|
212
218
|
logging.root.removeHandler(handler)
|
|
213
219
|
# Python default log level is WARNING
|
|
214
220
|
logging.root.setLevel(logging.WARNING)
|
|
215
|
-
|
ocrd_utils/os.py
CHANGED
|
@@ -36,6 +36,7 @@ from .config import config
|
|
|
36
36
|
from .logging import getLogger
|
|
37
37
|
from .introspect import resource_string
|
|
38
38
|
|
|
39
|
+
|
|
39
40
|
def abspath(url):
|
|
40
41
|
"""
|
|
41
42
|
Get a full path to a file or file URL
|
|
@@ -46,6 +47,7 @@ def abspath(url):
|
|
|
46
47
|
url = url[len('file://'):]
|
|
47
48
|
return abspath_(url)
|
|
48
49
|
|
|
50
|
+
|
|
49
51
|
@contextmanager
|
|
50
52
|
def pushd_popd(newcwd=None, tempdir=False):
|
|
51
53
|
if newcwd and tempdir:
|
|
@@ -67,6 +69,7 @@ def pushd_popd(newcwd=None, tempdir=False):
|
|
|
67
69
|
finally:
|
|
68
70
|
chdir(oldcwd)
|
|
69
71
|
|
|
72
|
+
|
|
70
73
|
def unzip_file_to_dir(path_to_zip, output_directory):
|
|
71
74
|
"""
|
|
72
75
|
Extract a ZIP archive to a directory
|
|
@@ -74,13 +77,13 @@ def unzip_file_to_dir(path_to_zip, output_directory):
|
|
|
74
77
|
with ZipFile(path_to_zip, 'r') as z:
|
|
75
78
|
z.extractall(output_directory)
|
|
76
79
|
|
|
80
|
+
|
|
77
81
|
@lru_cache()
|
|
78
82
|
def get_ocrd_tool_json(executable):
|
|
79
83
|
"""
|
|
80
84
|
Get the ``ocrd-tool`` description of ``executable``.
|
|
81
85
|
"""
|
|
82
86
|
ocrd_tool = {}
|
|
83
|
-
executable_name = Path(executable).name
|
|
84
87
|
try:
|
|
85
88
|
ocrd_all_tool = loads(resource_string('ocrd', 'ocrd-all-tool.json'))
|
|
86
89
|
ocrd_tool = ocrd_all_tool[executable]
|
|
@@ -93,6 +96,7 @@ def get_ocrd_tool_json(executable):
|
|
|
93
96
|
ocrd_tool['resource_locations'] = ['data', 'cwd', 'system', 'module']
|
|
94
97
|
return ocrd_tool
|
|
95
98
|
|
|
99
|
+
|
|
96
100
|
@lru_cache()
|
|
97
101
|
def get_moduledir(executable):
|
|
98
102
|
moduledir = None
|
|
@@ -106,6 +110,7 @@ def get_moduledir(executable):
|
|
|
106
110
|
getLogger('ocrd.utils.get_moduledir').error(f'{executable} --dump-module-dir failed: {e}')
|
|
107
111
|
return moduledir
|
|
108
112
|
|
|
113
|
+
|
|
109
114
|
def list_resource_candidates(executable, fname, cwd=getcwd(), moduled=None, xdg_data_home=None):
|
|
110
115
|
"""
|
|
111
116
|
Generate candidates for processor resources according to
|
|
@@ -123,6 +128,7 @@ def list_resource_candidates(executable, fname, cwd=getcwd(), moduled=None, xdg_
|
|
|
123
128
|
candidates.append(join(moduled, fname))
|
|
124
129
|
return candidates
|
|
125
130
|
|
|
131
|
+
|
|
126
132
|
def list_all_resources(executable, moduled=None, xdg_data_home=None):
|
|
127
133
|
"""
|
|
128
134
|
List all processor resources in the filesystem according to
|
|
@@ -164,7 +170,7 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None):
|
|
|
164
170
|
# code and data; `is_resource()` only singles out
|
|
165
171
|
# files over directories; but we want data files only
|
|
166
172
|
# todo: more code and cache exclusion patterns!
|
|
167
|
-
['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
|
|
173
|
+
['*.py', '*.py[cod]', '*~', 'ocrd-tool.json',
|
|
168
174
|
'environment.pickle', 'resource_list.yml', 'lib.bash']):
|
|
169
175
|
continue
|
|
170
176
|
candidates.append(resource)
|
|
@@ -174,6 +180,7 @@ def list_all_resources(executable, moduled=None, xdg_data_home=None):
|
|
|
174
180
|
candidates += parent.iterdir()
|
|
175
181
|
return sorted([str(x) for x in candidates])
|
|
176
182
|
|
|
183
|
+
|
|
177
184
|
def get_processor_resource_types(executable, ocrd_tool=None):
|
|
178
185
|
"""
|
|
179
186
|
Determine what type of resource parameters a processor needs.
|
|
@@ -194,6 +201,7 @@ def get_processor_resource_types(executable, ocrd_tool=None):
|
|
|
194
201
|
return [p['content-type'] for p in ocrd_tool['parameters'].values()
|
|
195
202
|
if 'content-type' in p]
|
|
196
203
|
|
|
204
|
+
|
|
197
205
|
# ht @pabs3
|
|
198
206
|
# https://github.com/untitaker/python-atomicwrites/issues/42
|
|
199
207
|
class AtomicWriterPerms(AtomicWriter):
|
|
@@ -210,6 +218,7 @@ class AtomicWriterPerms(AtomicWriter):
|
|
|
210
218
|
chmod(fd, mode)
|
|
211
219
|
return f
|
|
212
220
|
|
|
221
|
+
|
|
213
222
|
@contextmanager
|
|
214
223
|
def atomic_write(fpath):
|
|
215
224
|
with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f:
|
|
@@ -224,6 +233,7 @@ def is_file_in_directory(directory, file):
|
|
|
224
233
|
file = Path(file)
|
|
225
234
|
return list(file.parts)[:len(directory.parts)] == list(directory.parts)
|
|
226
235
|
|
|
236
|
+
|
|
227
237
|
def itertree(path):
|
|
228
238
|
"""
|
|
229
239
|
Generate a list of paths by recursively enumerating ``path``
|
|
@@ -235,6 +245,7 @@ def itertree(path):
|
|
|
235
245
|
yield from itertree(subpath)
|
|
236
246
|
yield path
|
|
237
247
|
|
|
248
|
+
|
|
238
249
|
def directory_size(path):
|
|
239
250
|
"""
|
|
240
251
|
Calculates size of all files in directory ``path``
|
|
@@ -242,7 +253,8 @@ def directory_size(path):
|
|
|
242
253
|
path = Path(path)
|
|
243
254
|
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
|
|
244
255
|
|
|
245
|
-
|
|
256
|
+
|
|
257
|
+
def guess_media_type(input_file: str, fallback: str = None, application_xml: str = 'application/xml'):
|
|
246
258
|
"""
|
|
247
259
|
Guess the media type of a file path
|
|
248
260
|
"""
|
|
@@ -259,6 +271,7 @@ def guess_media_type(input_file : str, fallback : str = None, application_xml :
|
|
|
259
271
|
mimetype = application_xml
|
|
260
272
|
return mimetype
|
|
261
273
|
|
|
274
|
+
|
|
262
275
|
@contextmanager
|
|
263
276
|
def redirect_stderr_and_stdout_to_file(filename):
|
|
264
277
|
with open(filename, 'at', encoding='utf-8') as f:
|