deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
deepdoctection/pipe/order.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Ordering text and layout segments
|
|
20
20
|
"""
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
@@ -43,22 +43,22 @@ from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr,
|
|
|
43
43
|
|
|
44
44
|
class OrderGenerator:
|
|
45
45
|
"""
|
|
46
|
-
Class for implementing text ordering logic and tasks that have preparational character.
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
Class for implementing text ordering logic and tasks that have preparational character.
|
|
47
|
+
|
|
48
|
+
This includes logic for grouping word type `ImageAnnotation` into text lines, splitting text lines into sub-lines
|
|
49
|
+
(by detecting gaps between words), as well as ordering text blocks (e.g., titles, tables, etc.).
|
|
49
50
|
"""
|
|
50
51
|
|
|
51
52
|
def __init__(self, starting_point_tolerance: float, broken_line_tolerance: float, height_tolerance: float):
|
|
52
53
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
be separated. Scaling factor of relative text block height.
|
|
54
|
+
Args:
|
|
55
|
+
starting_point_tolerance: Threshold to identify if two text blocks belong to one column. To check if two
|
|
56
|
+
text blocks belong to the same column, one condition says that x-coordinates of
|
|
57
|
+
vertices should not differ more than this threshold.
|
|
58
|
+
broken_line_tolerance: Threshold to identify if two consecutive words belonging to one line should be in two
|
|
59
|
+
different sub-lines (because they belong to two different text columns).
|
|
60
|
+
height_tolerance: Threshold to identify if two columns lying over each other belong together or need to be
|
|
61
|
+
separated. Scaling factor of relative text block height.
|
|
62
62
|
"""
|
|
63
63
|
self.starting_point_tolerance = starting_point_tolerance
|
|
64
64
|
self.broken_line_tolerance = broken_line_tolerance
|
|
@@ -70,11 +70,27 @@ class OrderGenerator:
|
|
|
70
70
|
def group_words_into_lines(
|
|
71
71
|
word_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
|
|
72
72
|
) -> list[tuple[int, int, str]]:
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
"""
|
|
74
|
+
Arranges words into horizontal text lines and sorts text lines vertically to provide an enumeration of words
|
|
75
|
+
used for establishing the reading order.
|
|
76
|
+
|
|
77
|
+
Using this reading order arrangement makes sense only for words within a rectangle and needs to be revised in
|
|
78
|
+
more complex appearances.
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
```python
|
|
82
|
+
group_words_into_lines(word_anns, image_id)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
word_anns: Sequence of `ImageAnnotation` representing words.
|
|
87
|
+
image_id: Optional image ID.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of triplets for every word annotation: (word reading order position, text line position, word
|
|
91
|
+
annotation id).
|
|
92
|
+
|
|
93
|
+
|
|
78
94
|
"""
|
|
79
95
|
reading_lines = []
|
|
80
96
|
rows: list[dict[str, float]] = []
|
|
@@ -118,11 +134,22 @@ class OrderGenerator:
|
|
|
118
134
|
line_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
|
|
119
135
|
) -> list[tuple[int, int, str]]:
|
|
120
136
|
"""
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
137
|
+
Sorts reading lines.
|
|
138
|
+
|
|
139
|
+
Returns for a list of `ImageAnnotation` a list of tuples, each tuple containing the reading order and the
|
|
140
|
+
`annotation_id` for each list element.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
line_anns: Sequence of text line `ImageAnnotation`.
|
|
144
|
+
image_id: Image ID of underlying image (to get the bounding boxes).
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of tuples (reading_order, reading_order, annotation_id).
|
|
148
|
+
|
|
149
|
+
Example:
|
|
150
|
+
```python
|
|
151
|
+
group_lines_into_lines(line_anns, image_id)
|
|
152
|
+
```
|
|
126
153
|
"""
|
|
127
154
|
reading_lines = []
|
|
128
155
|
for ann in line_anns:
|
|
@@ -173,17 +200,27 @@ class OrderGenerator:
|
|
|
173
200
|
self, anns: list[ImageAnnotation], image_width: float, image_height: float, image_id: Optional[str] = None
|
|
174
201
|
) -> Sequence[tuple[int, str]]:
|
|
175
202
|
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
:
|
|
185
|
-
|
|
186
|
-
|
|
203
|
+
Determines a text ordering of text blocks.
|
|
204
|
+
|
|
205
|
+
These text blocks should be larger sections than just words. It will first try to detect columns, then try to
|
|
206
|
+
consolidate columns, and finally try to detect connected components of columns. A connected component of columns
|
|
207
|
+
is a group of columns that lie next to each other. Having two connected components lying over each other will
|
|
208
|
+
infer a reading order where the upper block of the connected component will be read first, followed by text
|
|
209
|
+
blocks of columns of the second.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
```python
|
|
213
|
+
order_blocks(anns, image_width, image_height, image_id)
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
anns: List of `ImageAnnotation` with all elements to sort.
|
|
218
|
+
image_width: Image width (to re-calculate bounding boxes into relative coordinates).
|
|
219
|
+
image_height: Image height (to re-calculate bounding boxes into relative coordinates).
|
|
220
|
+
image_id: Image ID.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
List of tuples with reading order position and `annotation_id`.
|
|
187
224
|
"""
|
|
188
225
|
if not anns:
|
|
189
226
|
return []
|
|
@@ -343,16 +380,24 @@ class OrderGenerator:
|
|
|
343
380
|
|
|
344
381
|
class TextLineGenerator:
|
|
345
382
|
"""
|
|
346
|
-
Class for generating synthetic text lines from words.
|
|
347
|
-
|
|
383
|
+
Class for generating synthetic text lines from words.
|
|
384
|
+
|
|
385
|
+
Possible to break text lines into sub-lines by using a paragraph break threshold. This allows detection of a
|
|
386
|
+
multi-column structure just by observing sub-lines.
|
|
387
|
+
|
|
388
|
+
|
|
348
389
|
"""
|
|
349
390
|
|
|
350
391
|
def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
|
|
351
392
|
"""
|
|
352
|
-
:
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
393
|
+
Args:
|
|
394
|
+
make_sub_lines: Whether to build sub-lines from lines.
|
|
395
|
+
paragraph_break: Threshold of two consecutive words. If distance is larger than threshold, two sub-lines
|
|
396
|
+
will be built. Relative coordinates are used to calculate the distance between two consecutive words.
|
|
397
|
+
A reasonable value is `0.035`.
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
ValueError: If `make_sub_lines` is `True` and `paragraph_break` is `None`.
|
|
356
401
|
"""
|
|
357
402
|
if make_sub_lines and paragraph_break is None:
|
|
358
403
|
raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
|
|
@@ -376,13 +421,22 @@ class TextLineGenerator:
|
|
|
376
421
|
highest_level: bool = True,
|
|
377
422
|
) -> Sequence[DetectionResult]:
|
|
378
423
|
"""
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
:
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
424
|
+
Creates detection result of lines (or sub-lines) from given word type `ImageAnnotation`.
|
|
425
|
+
|
|
426
|
+
Example:
|
|
427
|
+
```python
|
|
428
|
+
create_detection_result(word_anns, image_width, image_height, image_id)
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
word_anns: List of given word type `ImageAnnotation`.
|
|
433
|
+
image_width: Image width.
|
|
434
|
+
image_height: Image height.
|
|
435
|
+
image_id: Image ID.
|
|
436
|
+
highest_level: Whether this is the highest level of line creation.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Sequence of `DetectionResult`.
|
|
386
440
|
"""
|
|
387
441
|
if not word_anns:
|
|
388
442
|
return []
|
|
@@ -462,9 +516,12 @@ class TextLineGenerator:
|
|
|
462
516
|
|
|
463
517
|
class TextLineServiceMixin(PipelineComponent, ABC):
|
|
464
518
|
"""
|
|
465
|
-
This class is used to create text lines similar to TextOrderService
|
|
466
|
-
|
|
467
|
-
It
|
|
519
|
+
This class is used to create text lines similar to `TextOrderService`.
|
|
520
|
+
|
|
521
|
+
It uses the logic of the `TextOrderService` but modifies it to suit its needs. It specifically uses the
|
|
522
|
+
`_create_lines_for_words` method and modifies the `serve` method.
|
|
523
|
+
|
|
524
|
+
|
|
468
525
|
"""
|
|
469
526
|
|
|
470
527
|
def __init__(
|
|
@@ -474,7 +531,10 @@ class TextLineServiceMixin(PipelineComponent, ABC):
|
|
|
474
531
|
paragraph_break: Optional[float] = None,
|
|
475
532
|
):
|
|
476
533
|
"""
|
|
477
|
-
|
|
534
|
+
Args:
|
|
535
|
+
name: Name of the service.
|
|
536
|
+
include_residual_text_container: Whether to include residual text containers.
|
|
537
|
+
paragraph_break: Paragraph break threshold.
|
|
478
538
|
"""
|
|
479
539
|
self.include_residual_text_container = include_residual_text_container
|
|
480
540
|
self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
|
|
@@ -482,7 +542,13 @@ class TextLineServiceMixin(PipelineComponent, ABC):
|
|
|
482
542
|
|
|
483
543
|
def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
|
|
484
544
|
"""
|
|
485
|
-
|
|
545
|
+
Creates lines for words using the `TextLineGenerator` instance.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
word_anns: Sequence of `ImageAnnotation`.
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
Sequence of `ImageAnnotation`.
|
|
486
552
|
"""
|
|
487
553
|
detection_result_list = self.text_line_generator.create_detection_result(
|
|
488
554
|
word_anns,
|
|
@@ -504,22 +570,24 @@ class TextLineServiceMixin(PipelineComponent, ABC):
|
|
|
504
570
|
|
|
505
571
|
class TextLineService(TextLineServiceMixin):
|
|
506
572
|
"""
|
|
507
|
-
Some OCR systems do not identify lines of text but only provide text boxes for words.
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
573
|
+
Some OCR systems do not identify lines of text but only provide text boxes for words.
|
|
574
|
+
|
|
575
|
+
This is not sufficient for certain applications. This service determines rule-based text lines based on word boxes.
|
|
576
|
+
One difficulty is that text lines are not continuous but are interrupted, for example, in multi-column layouts.
|
|
577
|
+
These interruptions are taken into account insofar as the gap between two words on almost the same page height must
|
|
578
|
+
not be too large.
|
|
579
|
+
|
|
580
|
+
The service constructs new `ImageAnnotation` of the category `LayoutType.line` and forms relations between the text
|
|
581
|
+
lines and the words contained in the text lines. The reading order is not arranged.
|
|
582
|
+
|
|
583
|
+
|
|
515
584
|
"""
|
|
516
585
|
|
|
517
586
|
def __init__(self, paragraph_break: Optional[float] = None):
|
|
518
587
|
"""
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
will be built
|
|
588
|
+
Args:
|
|
589
|
+
paragraph_break: Threshold of two consecutive words. If distance is larger than threshold, two
|
|
590
|
+
sub-lines will be built.
|
|
523
591
|
"""
|
|
524
592
|
super().__init__(
|
|
525
593
|
name="text_line",
|
|
@@ -553,24 +621,24 @@ class TextLineService(TextLineServiceMixin):
|
|
|
553
621
|
class TextOrderService(TextLineServiceMixin):
|
|
554
622
|
"""
|
|
555
623
|
Reading order of words within floating text blocks as well as reading order of blocks within simple text blocks.
|
|
556
|
-
To understand the difference between floating text blocks and simple text blocks consider a page containing an
|
|
557
|
-
article and a table. Table cells are text blocks that contain words which must be sorted.
|
|
558
|
-
However, they do not belong to floating text that encircle a table. They are rather an element that is supposed to
|
|
559
|
-
be read independently.
|
|
560
624
|
|
|
561
|
-
|
|
562
|
-
|
|
625
|
+
To understand the difference between floating text blocks and simple text blocks, consider a page containing an
|
|
626
|
+
article and a table. Table cells are text blocks that contain words which must be sorted. However, they do not
|
|
627
|
+
belong to floating text that encircle a table. They are rather an element that is supposed to be read independently.
|
|
563
628
|
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
new line, provided that its center is not in a line that has already
|
|
567
|
-
been created by an already processed word. The entire block width is defined as the line width
|
|
568
|
-
and the upper or lower line limit of the word bounding box as the upper or lower line limit. The reading order
|
|
569
|
-
of the words is from left to right within a line. The reading order of the lines is from top to bottom.
|
|
629
|
+
A heuristic argument for its ordering is used where the underlying assumption is the reading order from left
|
|
630
|
+
to right.
|
|
570
631
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
632
|
+
- For the reading order within a text block, text containers (i.e., image annotations that contain character
|
|
633
|
+
sub-annotations) are sorted based on their bounding box center and then lines are formed: Each word induces a new
|
|
634
|
+
line, provided that its center is not in a line that has already been created by an already processed word. The
|
|
635
|
+
entire block width is defined as the line width and the upper or lower line limit of the word bounding box as the
|
|
636
|
+
upper or lower line limit. The reading order of the words is from left to right within a line. The reading order
|
|
637
|
+
of the lines is from top to bottom.
|
|
638
|
+
|
|
639
|
+
- For the reading order of text blocks within a page, the blocks are sorted using a similar procedure, with the
|
|
640
|
+
difference that columns are formed instead of lines. Column lengths are defined as the length of the entire page
|
|
641
|
+
and the left and right text block boundaries as the left and right column boundaries.
|
|
574
642
|
|
|
575
643
|
A category annotation per word is generated, which fixes the order per word in the block, as well as a category
|
|
576
644
|
annotation per block, which saves the reading order of the block per page.
|
|
@@ -578,10 +646,19 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
578
646
|
The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
|
|
579
647
|
narrative text must be added in `floating_text_block_categories`.
|
|
580
648
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
649
|
+
Example:
|
|
650
|
+
|
|
651
|
+
```python
|
|
652
|
+
order = TextOrderService(
|
|
653
|
+
text_container="word",
|
|
654
|
+
text_block_categories=["title", "text", "list", "cell", "head", "body"],
|
|
655
|
+
floating_text_block_categories=["title", "text", "list"]
|
|
656
|
+
)
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
Note:
|
|
660
|
+
The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
|
|
661
|
+
narrative text must be added in `floating_text_block_categories`.
|
|
585
662
|
"""
|
|
586
663
|
|
|
587
664
|
def __init__(
|
|
@@ -596,40 +673,42 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
596
673
|
paragraph_break: Optional[float] = 0.035,
|
|
597
674
|
):
|
|
598
675
|
"""
|
|
599
|
-
:
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
676
|
+
Args:
|
|
677
|
+
text_container: `Name` of an image annotation that has a CHARS sub-category. These annotations will be
|
|
678
|
+
ordered within all text blocks.
|
|
679
|
+
text_block_categories: `Name` of image annotation that have a relation with text containers and where text
|
|
680
|
+
containers need to be sorted. Defaults to `IMAGE_DEFAULTS["text_block_categories"]`.
|
|
681
|
+
floating_text_block_categories: Name of image annotation that belong to floating text. These annotations
|
|
682
|
+
form the highest hierarchy of text blocks that will be ordered to generate a
|
|
683
|
+
narrative output of text. Defaults to
|
|
684
|
+
`IMAGE_DEFAULTS["floating_text_block_categories"]`.
|
|
685
|
+
include_residual_text_container: Text containers with no parent text block (e.g., not matched with any
|
|
686
|
+
parent annotation in `MatchingService`) will not be assigned with a
|
|
687
|
+
reading. (Reading order will only be assigned to image annotations that are
|
|
688
|
+
`floating_text_block_categories` or text containers matched with text block
|
|
689
|
+
annotations.) Setting `include_residual_text_container=True` will build
|
|
690
|
+
synthetic text lines from text containers and regard these text lines as
|
|
691
|
+
floating text blocks.
|
|
692
|
+
starting_point_tolerance: Threshold to identify if two text blocks belong to one column. To check if two
|
|
693
|
+
text blocks belong to the same column, one condition says that x-coordinates of
|
|
694
|
+
vertices should not differ more than this threshold.
|
|
695
|
+
broken_line_tolerance: Threshold to identify if two consecutive words belonging to one line should be in two
|
|
696
|
+
different sub-lines (because they belong to two different text columns).
|
|
697
|
+
height_tolerance: Threshold to identify if two columns lying over each other belong together or need to be
|
|
698
|
+
separated. Scaling factor of relative text block height.
|
|
699
|
+
paragraph_break: Threshold of two consecutive words. If distance is larger than threshold, two sublines
|
|
700
|
+
will be built.
|
|
622
701
|
"""
|
|
623
702
|
self.text_container = get_type(text_container)
|
|
624
703
|
if isinstance(text_block_categories, (str, ObjectTypes)):
|
|
625
704
|
text_block_categories = (get_type(text_block_categories),)
|
|
626
705
|
if text_block_categories is None:
|
|
627
|
-
text_block_categories = IMAGE_DEFAULTS
|
|
706
|
+
text_block_categories = IMAGE_DEFAULTS.TEXT_BLOCK_CATEGORIES
|
|
628
707
|
self.text_block_categories = tuple((get_type(category) for category in text_block_categories))
|
|
629
708
|
if isinstance(floating_text_block_categories, (str, ObjectTypes)):
|
|
630
709
|
floating_text_block_categories = (get_type(floating_text_block_categories),)
|
|
631
710
|
if floating_text_block_categories is None:
|
|
632
|
-
floating_text_block_categories = IMAGE_DEFAULTS
|
|
711
|
+
floating_text_block_categories = IMAGE_DEFAULTS.FLOATING_TEXT_BLOCK_CATEGORIES
|
|
633
712
|
self.floating_text_block_categories = tuple((get_type(category) for category in floating_text_block_categories))
|
|
634
713
|
if include_residual_text_container:
|
|
635
714
|
self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
|
|
@@ -676,10 +755,13 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
676
755
|
|
|
677
756
|
def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
|
|
678
757
|
"""
|
|
679
|
-
|
|
680
|
-
|
|
758
|
+
Orders text within a text block.
|
|
759
|
+
|
|
760
|
+
It will take all child-like text containers (determined by a `MatchingOrderService`) from a block and order
|
|
761
|
+
all items line-wise.
|
|
681
762
|
|
|
682
|
-
:
|
|
763
|
+
Args:
|
|
764
|
+
text_block_ann: Text block annotation (category one of `text_block_categories`).
|
|
683
765
|
"""
|
|
684
766
|
text_container_ids = text_block_ann.get_relationship(Relationships.CHILD)
|
|
685
767
|
text_container_ann = self.dp_manager.datapoint.get_annotation(
|
|
@@ -700,9 +782,10 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
700
782
|
|
|
701
783
|
def order_blocks(self, text_block_anns: list[ImageAnnotation]) -> None:
|
|
702
784
|
"""
|
|
703
|
-
|
|
785
|
+
Orders text blocks using the internal order generator.
|
|
704
786
|
|
|
705
|
-
:
|
|
787
|
+
Args:
|
|
788
|
+
text_block_anns: List of `ImageAnnotation`.
|
|
706
789
|
"""
|
|
707
790
|
block_order_list = self.order_generator.order_blocks(
|
|
708
791
|
text_block_anns, self.dp_manager.datapoint.width, self.dp_manager.datapoint.height
|