deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show
  1. deepdoctection/__init__.py +16 -29
  2. deepdoctection/analyzer/dd.py +70 -59
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/common.py +9 -5
  5. deepdoctection/dataflow/custom.py +5 -5
  6. deepdoctection/dataflow/custom_serialize.py +75 -18
  7. deepdoctection/dataflow/parallel_map.py +3 -3
  8. deepdoctection/dataflow/serialize.py +4 -4
  9. deepdoctection/dataflow/stats.py +3 -3
  10. deepdoctection/datapoint/annotation.py +41 -56
  11. deepdoctection/datapoint/box.py +9 -8
  12. deepdoctection/datapoint/convert.py +6 -6
  13. deepdoctection/datapoint/image.py +56 -44
  14. deepdoctection/datapoint/view.py +245 -150
  15. deepdoctection/datasets/__init__.py +1 -4
  16. deepdoctection/datasets/adapter.py +35 -26
  17. deepdoctection/datasets/base.py +14 -12
  18. deepdoctection/datasets/dataflow_builder.py +3 -3
  19. deepdoctection/datasets/info.py +24 -26
  20. deepdoctection/datasets/instances/doclaynet.py +51 -51
  21. deepdoctection/datasets/instances/fintabnet.py +46 -46
  22. deepdoctection/datasets/instances/funsd.py +25 -24
  23. deepdoctection/datasets/instances/iiitar13k.py +13 -10
  24. deepdoctection/datasets/instances/layouttest.py +4 -3
  25. deepdoctection/datasets/instances/publaynet.py +5 -5
  26. deepdoctection/datasets/instances/pubtables1m.py +24 -21
  27. deepdoctection/datasets/instances/pubtabnet.py +32 -30
  28. deepdoctection/datasets/instances/rvlcdip.py +30 -30
  29. deepdoctection/datasets/instances/xfund.py +26 -26
  30. deepdoctection/datasets/save.py +6 -6
  31. deepdoctection/eval/__init__.py +1 -4
  32. deepdoctection/eval/accmetric.py +32 -33
  33. deepdoctection/eval/base.py +8 -9
  34. deepdoctection/eval/cocometric.py +15 -13
  35. deepdoctection/eval/eval.py +41 -37
  36. deepdoctection/eval/tedsmetric.py +30 -23
  37. deepdoctection/eval/tp_eval_callback.py +16 -19
  38. deepdoctection/extern/__init__.py +2 -7
  39. deepdoctection/extern/base.py +339 -134
  40. deepdoctection/extern/d2detect.py +85 -113
  41. deepdoctection/extern/deskew.py +14 -11
  42. deepdoctection/extern/doctrocr.py +141 -130
  43. deepdoctection/extern/fastlang.py +27 -18
  44. deepdoctection/extern/hfdetr.py +71 -62
  45. deepdoctection/extern/hflayoutlm.py +504 -211
  46. deepdoctection/extern/hflm.py +230 -0
  47. deepdoctection/extern/model.py +488 -302
  48. deepdoctection/extern/pdftext.py +23 -19
  49. deepdoctection/extern/pt/__init__.py +1 -3
  50. deepdoctection/extern/pt/nms.py +6 -2
  51. deepdoctection/extern/pt/ptutils.py +29 -19
  52. deepdoctection/extern/tessocr.py +39 -38
  53. deepdoctection/extern/texocr.py +18 -18
  54. deepdoctection/extern/tp/tfutils.py +57 -9
  55. deepdoctection/extern/tp/tpcompat.py +21 -14
  56. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
  60. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  61. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
  62. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
  67. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
  68. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  69. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  70. deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
  71. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  72. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  73. deepdoctection/extern/tpdetect.py +45 -53
  74. deepdoctection/mapper/__init__.py +3 -8
  75. deepdoctection/mapper/cats.py +27 -29
  76. deepdoctection/mapper/cocostruct.py +10 -10
  77. deepdoctection/mapper/d2struct.py +27 -26
  78. deepdoctection/mapper/hfstruct.py +13 -8
  79. deepdoctection/mapper/laylmstruct.py +178 -37
  80. deepdoctection/mapper/maputils.py +12 -11
  81. deepdoctection/mapper/match.py +2 -2
  82. deepdoctection/mapper/misc.py +11 -9
  83. deepdoctection/mapper/pascalstruct.py +4 -4
  84. deepdoctection/mapper/prodigystruct.py +5 -5
  85. deepdoctection/mapper/pubstruct.py +84 -92
  86. deepdoctection/mapper/tpstruct.py +5 -5
  87. deepdoctection/mapper/xfundstruct.py +33 -33
  88. deepdoctection/pipe/__init__.py +1 -1
  89. deepdoctection/pipe/anngen.py +12 -14
  90. deepdoctection/pipe/base.py +52 -106
  91. deepdoctection/pipe/common.py +72 -59
  92. deepdoctection/pipe/concurrency.py +16 -11
  93. deepdoctection/pipe/doctectionpipe.py +24 -21
  94. deepdoctection/pipe/language.py +20 -25
  95. deepdoctection/pipe/layout.py +20 -16
  96. deepdoctection/pipe/lm.py +75 -105
  97. deepdoctection/pipe/order.py +194 -89
  98. deepdoctection/pipe/refine.py +111 -124
  99. deepdoctection/pipe/segment.py +156 -161
  100. deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
  101. deepdoctection/pipe/text.py +37 -36
  102. deepdoctection/pipe/transform.py +19 -16
  103. deepdoctection/train/__init__.py +6 -12
  104. deepdoctection/train/d2_frcnn_train.py +48 -41
  105. deepdoctection/train/hf_detr_train.py +41 -30
  106. deepdoctection/train/hf_layoutlm_train.py +153 -135
  107. deepdoctection/train/tp_frcnn_train.py +32 -31
  108. deepdoctection/utils/concurrency.py +1 -1
  109. deepdoctection/utils/context.py +13 -6
  110. deepdoctection/utils/develop.py +4 -4
  111. deepdoctection/utils/env_info.py +87 -125
  112. deepdoctection/utils/file_utils.py +6 -11
  113. deepdoctection/utils/fs.py +22 -18
  114. deepdoctection/utils/identifier.py +2 -2
  115. deepdoctection/utils/logger.py +16 -15
  116. deepdoctection/utils/metacfg.py +7 -7
  117. deepdoctection/utils/mocks.py +93 -0
  118. deepdoctection/utils/pdf_utils.py +11 -11
  119. deepdoctection/utils/settings.py +185 -181
  120. deepdoctection/utils/tqdm.py +1 -1
  121. deepdoctection/utils/transform.py +14 -9
  122. deepdoctection/utils/types.py +104 -0
  123. deepdoctection/utils/utils.py +7 -7
  124. deepdoctection/utils/viz.py +74 -72
  125. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
  126. deepdoctection-0.33.dist-info/RECORD +146 -0
  127. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
  128. deepdoctection/utils/detection_types.py +0 -68
  129. deepdoctection-0.31.dist-info/RECORD +0 -144
  130. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
  131. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
@@ -18,11 +18,14 @@
18
18
  """
19
19
  Module for ordering text and layout segments pipeline components
20
20
  """
21
+ from __future__ import annotations
22
+
21
23
  import os
24
+ from abc import ABC
22
25
  from copy import copy
23
26
  from itertools import chain
24
27
  from logging import DEBUG
25
- from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
28
+ from typing import Any, Optional, Sequence, Union
26
29
 
27
30
  import numpy as np
28
31
 
@@ -32,9 +35,8 @@ from ..datapoint.image import Image
32
35
  from ..datapoint.view import IMAGE_DEFAULTS
33
36
  from ..extern.base import DetectionResult
34
37
  from ..extern.tp.tpfrcnn.utils.np_box_ops import ioa as np_ioa
35
- from ..pipe.base import PipelineComponent
38
+ from ..pipe.base import MetaAnnotation, PipelineComponent
36
39
  from ..pipe.registry import pipeline_component_registry
37
- from ..utils.detection_types import JsonDict
38
40
  from ..utils.logger import LoggingRecord, logger
39
41
  from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr, get_type
40
42
 
@@ -67,7 +69,7 @@ class OrderGenerator:
67
69
  @staticmethod
68
70
  def group_words_into_lines(
69
71
  word_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
70
- ) -> List[Tuple[int, int, str]]:
72
+ ) -> list[tuple[int, int, str]]:
71
73
  """Arranging words into horizontal text lines and sorting text lines vertically in order to give
72
74
  an enumeration of words that is used for establishing the reading order. Using this reading order arragement
73
75
  makes only sense for words within a rectangle and needs to be revised in more complex appearances.
@@ -75,7 +77,7 @@ class OrderGenerator:
75
77
  id)`.
76
78
  """
77
79
  reading_lines = []
78
- rows: List[Dict[str, float]] = []
80
+ rows: list[dict[str, float]] = []
79
81
  for word in word_anns:
80
82
  bounding_box = word.get_bounding_box(image_id)
81
83
  row_found = False
@@ -114,13 +116,13 @@ class OrderGenerator:
114
116
  @staticmethod
115
117
  def group_lines_into_lines(
116
118
  line_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
117
- ) -> List[Tuple[int, int, str]]:
119
+ ) -> list[tuple[int, int, str]]:
118
120
  """
119
121
  Sorting reading lines. Returns for a list of `ImageAnnotation` an list of tuples (each tuple containing the
120
122
  reading order and the `annotation_id` for each list element.
121
123
  :param line_anns: text line `ImageAnnotation`
122
124
  :param image_id: image_id of underyling image (to find get the bounding boxes)
123
- :return: `List[(reading_order, reading_order,annotation_id)]`
125
+ :return: `list[(reading_order, reading_order,annotation_id)]`
124
126
  """
125
127
  reading_lines = []
126
128
  for ann in line_anns:
@@ -131,9 +133,9 @@ class OrderGenerator:
131
133
  return [(idx + 1, idx + 1, line[1]) for idx, line in enumerate(reading_lines)]
132
134
 
133
135
  @staticmethod
134
- def _connected_components(columns: List[BoundingBox]) -> List[Dict[str, Any]]:
136
+ def _connected_components(columns: list[BoundingBox]) -> list[dict[str, Any]]:
135
137
  # building connected components of columns
136
- connected_components: List[Dict[str, Any]] = []
138
+ connected_components: list[dict[str, Any]] = []
137
139
  for idx, col in enumerate(columns):
138
140
  col_dict = {"id": idx, "box": col}
139
141
  component_found = False
@@ -168,8 +170,8 @@ class OrderGenerator:
168
170
  return connected_components
169
171
 
170
172
  def order_blocks(
171
- self, anns: List[ImageAnnotation], image_width: float, image_height: float, image_id: Optional[str] = None
172
- ) -> Sequence[Tuple[int, str]]:
173
+ self, anns: list[ImageAnnotation], image_width: float, image_height: float, image_id: Optional[str] = None
174
+ ) -> Sequence[tuple[int, str]]:
173
175
  """
174
176
  Determining a text ordering of text blocks. These text blocks should be larger sections than barely words.
175
177
  It will first try to detect columns, then try to consolidate columns and finally try to detecting connected
@@ -181,12 +183,12 @@ class OrderGenerator:
181
183
  :param image_width: image width (to re-calculate bounding boxes into relative coords)
182
184
  :param image_height: image height (to re-calculate bounding boxes into relative coords)
183
185
  :param image_id: image id
184
- :return: List of tuples with reading order position and `annotation_id`
186
+ :return: list of tuples with reading order position and `annotation_id`
185
187
  """
186
188
  if not anns:
187
189
  return []
188
190
  reading_blocks = []
189
- columns: List[BoundingBox] = []
191
+ columns: list[BoundingBox] = []
190
192
  anns.sort(
191
193
  key=lambda x: (
192
194
  x.bounding_box.transform(image_width, image_height).cy, # type: ignore
@@ -267,7 +269,7 @@ class OrderGenerator:
267
269
  blocks.sort(key=lambda x: x[0]) # type: ignore
268
270
  sorted_blocks = []
269
271
  max_block_number = max(list(columns_dict.values()))
270
- filtered_blocks: Sequence[Tuple[int, str]]
272
+ filtered_blocks: Sequence[tuple[int, str]]
271
273
  for idx in range(max_block_number + 1):
272
274
  filtered_blocks = list(filter(lambda x: x[0] == idx, blocks)) # type: ignore # pylint: disable=W0640
273
275
  sorted_blocks.extend(self._sort_anns_grouped_by_blocks(filtered_blocks, anns, image_width, image_height))
@@ -286,7 +288,7 @@ class OrderGenerator:
286
288
  )
287
289
  return reading_blocks
288
290
 
289
- def _consolidate_columns(self, columns: List[BoundingBox]) -> Dict[int, int]:
291
+ def _consolidate_columns(self, columns: list[BoundingBox]) -> dict[int, int]:
290
292
  if not columns:
291
293
  return {}
292
294
  np_boxes = np.array([col.to_list(mode="xyxy") for col in columns])
@@ -307,8 +309,8 @@ class OrderGenerator:
307
309
 
308
310
  @staticmethod
309
311
  def _sort_anns_grouped_by_blocks(
310
- block: Sequence[Tuple[int, str]], anns: Sequence[ImageAnnotation], image_width: float, image_height: float
311
- ) -> List[Tuple[int, str]]:
312
+ block: Sequence[tuple[int, str]], anns: Sequence[ImageAnnotation], image_width: float, image_height: float
313
+ ) -> list[tuple[int, str]]:
312
314
  if not block:
313
315
  return []
314
316
  anns_and_blocks_numbers = list(zip(*block))
@@ -326,14 +328,14 @@ class OrderGenerator:
326
328
  @staticmethod
327
329
  def _make_column_detect_results(columns: Sequence[BoundingBox]) -> Sequence[DetectionResult]:
328
330
  column_detect_result_list = []
329
- if os.environ.get("LOG_LEVEL") == "DEBUG":
331
+ if os.environ.get("LOG_LEVEL", "INFO") == "DEBUG":
330
332
  for box in columns:
331
333
  column_detect_result_list.append(
332
334
  DetectionResult(
333
335
  box=box.to_list(mode="xyxy"),
334
336
  absolute_coords=box.absolute_coords,
335
337
  class_id=99,
336
- class_name=LayoutType.column,
338
+ class_name=LayoutType.COLUMN,
337
339
  )
338
340
  )
339
341
  return column_detect_result_list
@@ -349,10 +351,11 @@ class TextLineGenerator:
349
351
  self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
350
352
  ):
351
353
  """
352
- :param make_sub_lines: Whether to build sub lines from lines
354
+ :param make_sub_lines: Whether to build sub lines from lines.
353
355
  :param line_category_id: category_id to give a text line
354
- :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
355
- will be built
356
+ :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
357
+ will be built. We use relative coordinates to calculate the distance between two
358
+ consecutive words. A reasonable value is 0.035
356
359
  """
357
360
  if make_sub_lines and paragraph_break is None:
358
361
  raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
@@ -360,10 +363,10 @@ class TextLineGenerator:
360
363
  self.make_sub_lines = make_sub_lines
361
364
  self.paragraph_break = paragraph_break
362
365
 
363
- def _make_detect_result(self, box: BoundingBox, relationships: Dict[str, List[str]]) -> DetectionResult:
366
+ def _make_detect_result(self, box: BoundingBox, relationships: dict[str, list[str]]) -> DetectionResult:
364
367
  return DetectionResult(
365
368
  box=box.to_list(mode="xyxy"),
366
- class_name=LayoutType.line,
369
+ class_name=LayoutType.LINE,
367
370
  class_id=self.line_category_id,
368
371
  absolute_coords=box.absolute_coords,
369
372
  relationships=relationships,
@@ -375,6 +378,7 @@ class TextLineGenerator:
375
378
  image_width: float,
376
379
  image_height: float,
377
380
  image_id: Optional[str] = None,
381
+ highest_level: bool = True,
378
382
  ) -> Sequence[DetectionResult]:
379
383
  """
380
384
  Creating detecting result of lines (or sub lines) from given word type `ImageAnnotation`.
@@ -392,6 +396,8 @@ class TextLineGenerator:
392
396
  # list of (word index, text line, word annotation_id)
393
397
  word_order_list = OrderGenerator.group_words_into_lines(word_anns, image_id)
394
398
  number_rows = max(word[1] for word in word_order_list)
399
+ if number_rows == 1 and not highest_level:
400
+ return []
395
401
  detection_result_list = []
396
402
  for number_row in range(1, number_rows + 1):
397
403
  # list of (word index, text line, word annotation_id) for text line equal to number_row
@@ -423,29 +429,139 @@ class TextLineGenerator:
423
429
  if current_box.absolute_coords:
424
430
  current_box = current_box.transform(image_width, image_height)
425
431
 
426
- # If distance between boxes is lower than paragraph break, same sub line
432
+ # If distance between boxes is lower than paragraph break, same sub-line
427
433
  if current_box.ulx - prev_box.lrx < self.paragraph_break: # type: ignore
428
434
  sub_line.append(ann)
429
435
  sub_line_ann_ids.append(ann.annotation_id)
430
436
  else:
431
- boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
432
- merge_box = merge_boxes(*boxes)
433
- detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
434
- detection_result_list.append(detection_result)
435
- sub_line = [ann]
436
- sub_line_ann_ids = [ann.annotation_id]
437
+ # We need to iterate maybe more than one time, because sub-lines may have more than one line
438
+ # if having been split. Take fore example a multi-column layout where a sub-line has
439
+ # two lines because of a column break and fonts twice as large as the other column.
440
+ detection_results = self.create_detection_result(
441
+ sub_line, image_width, image_height, image_id, False
442
+ )
443
+ if detection_results:
444
+ detection_result_list.extend(detection_results)
445
+ else:
446
+ boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
447
+ merge_box = merge_boxes(*boxes)
448
+ detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
449
+ detection_result_list.append(detection_result)
450
+ sub_line = [ann]
451
+ sub_line_ann_ids = [ann.annotation_id]
437
452
 
438
453
  if idx == len(anns_per_row) - 1:
439
- boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
440
- merge_box = merge_boxes(*boxes)
441
- detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
442
- detection_result_list.append(detection_result)
454
+ detection_results = self.create_detection_result(
455
+ sub_line, image_width, image_height, image_id, False
456
+ )
457
+ if detection_results:
458
+ detection_result_list.extend(detection_results)
459
+ else:
460
+ boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
461
+ merge_box = merge_boxes(*boxes)
462
+ detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
463
+ detection_result_list.append(detection_result)
443
464
 
444
465
  return detection_result_list
445
466
 
446
467
 
468
+ class TextLineServiceMixin(PipelineComponent, ABC):
469
+ """
470
+ This class is used to create text lines similar to TextOrderService.
471
+ It uses the logic of the TextOrderService but modifies it to suit its needs.
472
+ It specifically uses the _create_lines_for_words method and modifies the serve method.
473
+ """
474
+
475
+ def __init__(
476
+ self,
477
+ name: str,
478
+ line_category_id: int = 1,
479
+ include_residual_text_container: bool = True,
480
+ paragraph_break: Optional[float] = None,
481
+ ):
482
+ """
483
+ Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
484
+ """
485
+ self.line_category_id = line_category_id
486
+ self.include_residual_text_container = include_residual_text_container
487
+ self.text_line_generator = TextLineGenerator(
488
+ self.include_residual_text_container, self.line_category_id, paragraph_break
489
+ )
490
+ super().__init__(name)
491
+
492
+ def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
493
+ """
494
+ This method creates lines for words using the TextLineGenerator instance.
495
+ """
496
+ detection_result_list = self.text_line_generator.create_detection_result(
497
+ word_anns,
498
+ self.dp_manager.datapoint.width,
499
+ self.dp_manager.datapoint.height,
500
+ self.dp_manager.datapoint.image_id,
501
+ )
502
+ line_anns = []
503
+ for detect_result in detection_result_list:
504
+ ann_id = self.dp_manager.set_image_annotation(detect_result)
505
+ if ann_id:
506
+ line_ann = self.dp_manager.get_annotation(ann_id)
507
+ child_ann_id_list = detect_result.relationships["child"] # type: ignore
508
+ for child_ann_id in child_ann_id_list:
509
+ line_ann.dump_relationship(Relationships.CHILD, child_ann_id)
510
+ line_anns.append(line_ann)
511
+ return line_anns
512
+
513
+
514
+ class TextLineService(TextLineServiceMixin):
515
+ """
516
+ Some OCR systems do not identify lines of text but only provide text boxes for words. This is not sufficient
517
+ for certain applications. This service determines rule-based text lines based on word boxes. One difficulty is
518
+ that text lines are not continuous but are interrupted, for example in multi-column layouts.
519
+ These interruptions are taken into account insofar as the gap between two words on almost the same page height
520
+ must not be too large.
521
+
522
+ The service constructs new ImageAnnotation of the category `LayoutType.line` and forms relations between the
523
+ text lines and the words contained in the text lines. The reading order is not arranged.
524
+ """
525
+
526
+ def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
527
+ """
528
+ Initialize `TextLineService`
529
+
530
+ :param line_category_id: category_id to give a text line
531
+ :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
532
+ will be built
533
+ """
534
+ super().__init__(
535
+ name="text_line",
536
+ line_category_id=line_category_id,
537
+ include_residual_text_container=True,
538
+ paragraph_break=paragraph_break,
539
+ )
540
+
541
+ def clone(self) -> TextLineService:
542
+ """
543
+ This method returns a new instance of the class with the same configuration.
544
+ """
545
+ return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
546
+
547
+ def serve(self, dp: Image) -> None:
548
+ text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
549
+ self._create_lines_for_words(text_container_anns)
550
+
551
+ def get_meta_annotation(self) -> MetaAnnotation:
552
+ """
553
+ This method returns metadata about the annotations created by this pipeline component.
554
+ """
555
+ return MetaAnnotation(
556
+ image_annotations=(LayoutType.LINE,),
557
+ sub_categories={LayoutType.LINE: {Relationships.CHILD}},
558
+ relationships={},
559
+ summaries=(),
560
+ )
561
+
562
+
447
563
  @pipeline_component_registry.register("TextOrderService")
448
- class TextOrderService(PipelineComponent):
564
+ class TextOrderService(TextLineServiceMixin):
449
565
  """
450
566
  Reading order of words within floating text blocks as well as reading order of blocks within simple text blocks.
451
567
  To understand the difference between floating text blocks and simple text blocks consider a page containing an
@@ -470,7 +586,8 @@ class TextOrderService(PipelineComponent):
470
586
  A category annotation per word is generated, which fixes the order per word in the block, as well as a category
471
587
  annotation per block, which saves the reading order of the block per page.
472
588
 
473
- The blocks are defined in `_floating_text_block_names` and text blocks in `_floating_text_block_names`.
589
+ The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
590
+ narrative text must be added in `floating_text_block_categories`.
474
591
 
475
592
  order = TextOrderService(text_container="word",
476
593
  text_block_categories=["title", "text", "list", "cell",
@@ -517,23 +634,28 @@ class TextOrderService(PipelineComponent):
517
634
  """
518
635
  self.text_container = get_type(text_container)
519
636
  if isinstance(text_block_categories, (str, ObjectTypes)):
520
- text_block_categories = [text_block_categories]
637
+ text_block_categories = (get_type(text_block_categories),)
521
638
  if text_block_categories is None:
522
639
  text_block_categories = IMAGE_DEFAULTS["text_block_categories"]
523
- self.text_block_categories = [get_type(category) for category in text_block_categories]
640
+ self.text_block_categories = tuple((get_type(category) for category in text_block_categories))
524
641
  if isinstance(floating_text_block_categories, (str, ObjectTypes)):
525
- floating_text_block_categories = [floating_text_block_categories]
642
+ floating_text_block_categories = (get_type(floating_text_block_categories),)
526
643
  if floating_text_block_categories is None:
527
644
  floating_text_block_categories = IMAGE_DEFAULTS["floating_text_block_categories"]
528
- self.floating_text_block_categories = [get_type(category) for category in floating_text_block_categories]
645
+ self.floating_text_block_categories = tuple((get_type(category) for category in floating_text_block_categories))
529
646
  if include_residual_text_container:
530
- self.floating_text_block_categories.append(LayoutType.line)
647
+ self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
531
648
  self.include_residual_text_container = include_residual_text_container
532
649
  self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
533
650
  self.text_line_generator = TextLineGenerator(
534
651
  self.include_residual_text_container, line_category_id, paragraph_break
535
652
  )
536
- super().__init__("text_order")
653
+ super().__init__(
654
+ name="text_order",
655
+ line_category_id=line_category_id,
656
+ include_residual_text_container=include_residual_text_container,
657
+ paragraph_break=paragraph_break,
658
+ )
537
659
  self._init_sanity_checks()
538
660
 
539
661
  def serve(self, dp: Image) -> None:
@@ -541,12 +663,12 @@ class TextOrderService(PipelineComponent):
541
663
  text_block_anns = dp.get_annotation(category_names=self.text_block_categories)
542
664
  if self.include_residual_text_container:
543
665
  mapped_text_container_ids = list(
544
- chain(*[text_block.get_relationship(Relationships.child) for text_block in text_block_anns])
666
+ chain(*[text_block.get_relationship(Relationships.CHILD) for text_block in text_block_anns])
545
667
  )
546
668
  residual_text_container_anns = [
547
669
  ann for ann in text_container_anns if ann.annotation_id not in mapped_text_container_ids
548
670
  ]
549
- if self.text_container == LayoutType.word:
671
+ if self.text_container == LayoutType.WORD:
550
672
  text_block_anns.extend(self._create_lines_for_words(residual_text_container_anns))
551
673
  else:
552
674
  text_block_anns.extend(residual_text_container_anns)
@@ -564,27 +686,9 @@ class TextOrderService(PipelineComponent):
564
686
  annotation_id = self.dp_manager.set_image_annotation(detect_result)
565
687
  if annotation_id:
566
688
  self.dp_manager.set_category_annotation(
567
- Relationships.reading_order, idx, Relationships.reading_order, annotation_id
689
+ Relationships.READING_ORDER, idx, Relationships.READING_ORDER, annotation_id
568
690
  )
569
691
 
570
- def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
571
- detection_result_list = self.text_line_generator.create_detection_result(
572
- word_anns,
573
- self.dp_manager.datapoint.width,
574
- self.dp_manager.datapoint.height,
575
- self.dp_manager.datapoint.image_id,
576
- )
577
- line_anns = []
578
- for detect_result in detection_result_list:
579
- ann_id = self.dp_manager.set_image_annotation(detect_result)
580
- if ann_id:
581
- line_ann = self.dp_manager.get_annotation(ann_id)
582
- child_ann_id_list = detect_result.relationships["child"] # type: ignore
583
- for child_ann_id in child_ann_id_list:
584
- line_ann.dump_relationship(Relationships.child, child_ann_id)
585
- line_anns.append(line_ann)
586
- return line_anns
587
-
588
692
  def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
589
693
  """
590
694
  Order text within a text block. It will take all child-like text containers (determined by a
@@ -592,11 +696,11 @@ class TextOrderService(PipelineComponent):
592
696
 
593
697
  :param text_block_ann: text block annotation (category one of `text_block_categories`).
594
698
  """
595
- text_container_ids = text_block_ann.get_relationship(Relationships.child)
699
+ text_container_ids = text_block_ann.get_relationship(Relationships.CHILD)
596
700
  text_container_ann = self.dp_manager.datapoint.get_annotation(
597
701
  annotation_ids=text_container_ids, category_names=self.text_container
598
702
  )
599
- if self.text_container == LayoutType.word:
703
+ if self.text_container == LayoutType.WORD:
600
704
  word_order_list = self.order_generator.group_words_into_lines(
601
705
  text_container_ann, self.dp_manager.datapoint.image_id
602
706
  )
@@ -606,10 +710,10 @@ class TextOrderService(PipelineComponent):
606
710
  )
607
711
  for word_order in word_order_list:
608
712
  self.dp_manager.set_category_annotation(
609
- Relationships.reading_order, word_order[0], Relationships.reading_order, word_order[2]
713
+ Relationships.READING_ORDER, word_order[0], Relationships.READING_ORDER, word_order[2]
610
714
  )
611
715
 
612
- def order_blocks(self, text_block_anns: List[ImageAnnotation]) -> None:
716
+ def order_blocks(self, text_block_anns: list[ImageAnnotation]) -> None:
613
717
  """
614
718
  Ordering of text blocks. Will use the internal order generator.
615
719
 
@@ -620,42 +724,40 @@ class TextOrderService(PipelineComponent):
620
724
  )
621
725
  for word_order in block_order_list:
622
726
  self.dp_manager.set_category_annotation(
623
- Relationships.reading_order, word_order[0], Relationships.reading_order, word_order[1]
727
+ Relationships.READING_ORDER, word_order[0], Relationships.READING_ORDER, word_order[1]
624
728
  )
625
729
 
626
730
  def _init_sanity_checks(self) -> None:
627
- assert self.text_container in (LayoutType.word, LayoutType.line), (
628
- f"text_container must be either {LayoutType.word} or " f"{LayoutType.line}"
731
+ assert self.text_container in (LayoutType.WORD, LayoutType.LINE), (
732
+ f"text_container must be either {LayoutType.WORD} or " f"{LayoutType.LINE}"
629
733
  )
630
734
  add_category = []
631
735
  if self.include_residual_text_container:
632
- add_category.append(LayoutType.line)
736
+ add_category.append(LayoutType.LINE)
633
737
 
634
738
  assert set(self.floating_text_block_categories) <= set(
635
- self.text_block_categories + add_category # type: ignore
739
+ self.text_block_categories + tuple(add_category)
636
740
  ), "floating_text_block_categories must be a subset of text_block_categories"
637
741
 
638
- def get_meta_annotation(self) -> JsonDict:
742
+ def get_meta_annotation(self) -> MetaAnnotation:
639
743
  add_category = [self.text_container]
640
- image_annotations = []
641
- if self.include_residual_text_container and self.text_container == LayoutType.word:
642
- add_category.append(LayoutType.line)
643
- image_annotations.append(LayoutType.line)
744
+ image_annotations: list[ObjectTypes] = []
745
+ if self.include_residual_text_container and self.text_container == LayoutType.WORD:
746
+ add_category.append(LayoutType.LINE)
747
+ image_annotations.append(LayoutType.LINE)
644
748
  anns_with_reading_order = list(copy(self.floating_text_block_categories)) + add_category
645
- return dict(
646
- [
647
- ("image_annotations", image_annotations),
648
- ("sub_categories", {category: {Relationships.reading_order} for category in anns_with_reading_order}),
649
- ("relationships", {}),
650
- ("summaries", []),
651
- ]
749
+ return MetaAnnotation(
750
+ image_annotations=tuple(image_annotations),
751
+ sub_categories={category: {Relationships.READING_ORDER} for category in anns_with_reading_order},
752
+ relationships={},
753
+ summaries=(),
652
754
  )
653
755
 
654
- def clone(self) -> PipelineComponent:
756
+ def clone(self) -> TextOrderService:
655
757
  return self.__class__(
656
- copy(self.text_container),
657
- copy(self.text_block_categories),
658
- copy(self.floating_text_block_categories),
758
+ self.text_container,
759
+ self.text_block_categories,
760
+ self.floating_text_block_categories,
659
761
  self.include_residual_text_container,
660
762
  self.order_generator.starting_point_tolerance,
661
763
  self.order_generator.broken_line_tolerance,
@@ -663,3 +765,6 @@ class TextOrderService(PipelineComponent):
663
765
  self.text_line_generator.paragraph_break,
664
766
  self.text_line_generator.line_category_id,
665
767
  )
768
+
769
+ def clear_predictor(self) -> None:
770
+ pass