deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ from abc import ABC
25
25
  from copy import copy
26
26
  from itertools import chain
27
27
  from logging import DEBUG
28
- from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
28
+ from typing import Any, Optional, Sequence, Union
29
29
 
30
30
  import numpy as np
31
31
 
@@ -35,9 +35,8 @@ from ..datapoint.image import Image
35
35
  from ..datapoint.view import IMAGE_DEFAULTS
36
36
  from ..extern.base import DetectionResult
37
37
  from ..extern.tp.tpfrcnn.utils.np_box_ops import ioa as np_ioa
38
- from ..pipe.base import PipelineComponent
38
+ from ..pipe.base import MetaAnnotation, PipelineComponent
39
39
  from ..pipe.registry import pipeline_component_registry
40
- from ..utils.detection_types import JsonDict
41
40
  from ..utils.logger import LoggingRecord, logger
42
41
  from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr, get_type
43
42
 
@@ -70,7 +69,7 @@ class OrderGenerator:
70
69
  @staticmethod
71
70
  def group_words_into_lines(
72
71
  word_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
73
- ) -> List[Tuple[int, int, str]]:
72
+ ) -> list[tuple[int, int, str]]:
74
73
  """Arranging words into horizontal text lines and sorting text lines vertically in order to give
75
74
  an enumeration of words that is used for establishing the reading order. Using this reading order arragement
76
75
  makes only sense for words within a rectangle and needs to be revised in more complex appearances.
@@ -78,7 +77,7 @@ class OrderGenerator:
78
77
  id)`.
79
78
  """
80
79
  reading_lines = []
81
- rows: List[Dict[str, float]] = []
80
+ rows: list[dict[str, float]] = []
82
81
  for word in word_anns:
83
82
  bounding_box = word.get_bounding_box(image_id)
84
83
  row_found = False
@@ -117,13 +116,13 @@ class OrderGenerator:
117
116
  @staticmethod
118
117
  def group_lines_into_lines(
119
118
  line_anns: Sequence[ImageAnnotation], image_id: Optional[str] = None
120
- ) -> List[Tuple[int, int, str]]:
119
+ ) -> list[tuple[int, int, str]]:
121
120
  """
122
121
  Sorting reading lines. Returns for a list of `ImageAnnotation` an list of tuples (each tuple containing the
123
122
  reading order and the `annotation_id` for each list element.
124
123
  :param line_anns: text line `ImageAnnotation`
125
124
  :param image_id: image_id of underyling image (to find get the bounding boxes)
126
- :return: `List[(reading_order, reading_order,annotation_id)]`
125
+ :return: `list[(reading_order, reading_order,annotation_id)]`
127
126
  """
128
127
  reading_lines = []
129
128
  for ann in line_anns:
@@ -134,9 +133,9 @@ class OrderGenerator:
134
133
  return [(idx + 1, idx + 1, line[1]) for idx, line in enumerate(reading_lines)]
135
134
 
136
135
  @staticmethod
137
- def _connected_components(columns: List[BoundingBox]) -> List[Dict[str, Any]]:
136
+ def _connected_components(columns: list[BoundingBox]) -> list[dict[str, Any]]:
138
137
  # building connected components of columns
139
- connected_components: List[Dict[str, Any]] = []
138
+ connected_components: list[dict[str, Any]] = []
140
139
  for idx, col in enumerate(columns):
141
140
  col_dict = {"id": idx, "box": col}
142
141
  component_found = False
@@ -171,8 +170,8 @@ class OrderGenerator:
171
170
  return connected_components
172
171
 
173
172
  def order_blocks(
174
- self, anns: List[ImageAnnotation], image_width: float, image_height: float, image_id: Optional[str] = None
175
- ) -> Sequence[Tuple[int, str]]:
173
+ self, anns: list[ImageAnnotation], image_width: float, image_height: float, image_id: Optional[str] = None
174
+ ) -> Sequence[tuple[int, str]]:
176
175
  """
177
176
  Determining a text ordering of text blocks. These text blocks should be larger sections than barely words.
178
177
  It will first try to detect columns, then try to consolidate columns and finally try to detecting connected
@@ -184,12 +183,12 @@ class OrderGenerator:
184
183
  :param image_width: image width (to re-calculate bounding boxes into relative coords)
185
184
  :param image_height: image height (to re-calculate bounding boxes into relative coords)
186
185
  :param image_id: image id
187
- :return: List of tuples with reading order position and `annotation_id`
186
+ :return: list of tuples with reading order position and `annotation_id`
188
187
  """
189
188
  if not anns:
190
189
  return []
191
190
  reading_blocks = []
192
- columns: List[BoundingBox] = []
191
+ columns: list[BoundingBox] = []
193
192
  anns.sort(
194
193
  key=lambda x: (
195
194
  x.bounding_box.transform(image_width, image_height).cy, # type: ignore
@@ -270,7 +269,7 @@ class OrderGenerator:
270
269
  blocks.sort(key=lambda x: x[0]) # type: ignore
271
270
  sorted_blocks = []
272
271
  max_block_number = max(list(columns_dict.values()))
273
- filtered_blocks: Sequence[Tuple[int, str]]
272
+ filtered_blocks: Sequence[tuple[int, str]]
274
273
  for idx in range(max_block_number + 1):
275
274
  filtered_blocks = list(filter(lambda x: x[0] == idx, blocks)) # type: ignore # pylint: disable=W0640
276
275
  sorted_blocks.extend(self._sort_anns_grouped_by_blocks(filtered_blocks, anns, image_width, image_height))
@@ -289,7 +288,7 @@ class OrderGenerator:
289
288
  )
290
289
  return reading_blocks
291
290
 
292
- def _consolidate_columns(self, columns: List[BoundingBox]) -> Dict[int, int]:
291
+ def _consolidate_columns(self, columns: list[BoundingBox]) -> dict[int, int]:
293
292
  if not columns:
294
293
  return {}
295
294
  np_boxes = np.array([col.to_list(mode="xyxy") for col in columns])
@@ -310,8 +309,8 @@ class OrderGenerator:
310
309
 
311
310
  @staticmethod
312
311
  def _sort_anns_grouped_by_blocks(
313
- block: Sequence[Tuple[int, str]], anns: Sequence[ImageAnnotation], image_width: float, image_height: float
314
- ) -> List[Tuple[int, str]]:
312
+ block: Sequence[tuple[int, str]], anns: Sequence[ImageAnnotation], image_width: float, image_height: float
313
+ ) -> list[tuple[int, str]]:
315
314
  if not block:
316
315
  return []
317
316
  anns_and_blocks_numbers = list(zip(*block))
@@ -329,14 +328,14 @@ class OrderGenerator:
329
328
  @staticmethod
330
329
  def _make_column_detect_results(columns: Sequence[BoundingBox]) -> Sequence[DetectionResult]:
331
330
  column_detect_result_list = []
332
- if os.environ.get("LOG_LEVEL") == "DEBUG":
331
+ if os.environ.get("LOG_LEVEL", "INFO") == "DEBUG":
333
332
  for box in columns:
334
333
  column_detect_result_list.append(
335
334
  DetectionResult(
336
335
  box=box.to_list(mode="xyxy"),
337
336
  absolute_coords=box.absolute_coords,
338
337
  class_id=99,
339
- class_name=LayoutType.column,
338
+ class_name=LayoutType.COLUMN,
340
339
  )
341
340
  )
342
341
  return column_detect_result_list
@@ -364,10 +363,10 @@ class TextLineGenerator:
364
363
  self.make_sub_lines = make_sub_lines
365
364
  self.paragraph_break = paragraph_break
366
365
 
367
- def _make_detect_result(self, box: BoundingBox, relationships: Dict[str, List[str]]) -> DetectionResult:
366
+ def _make_detect_result(self, box: BoundingBox, relationships: dict[str, list[str]]) -> DetectionResult:
368
367
  return DetectionResult(
369
368
  box=box.to_list(mode="xyxy"),
370
- class_name=LayoutType.line,
369
+ class_name=LayoutType.LINE,
371
370
  class_id=self.line_category_id,
372
371
  absolute_coords=box.absolute_coords,
373
372
  relationships=relationships,
@@ -507,7 +506,7 @@ class TextLineServiceMixin(PipelineComponent, ABC):
507
506
  line_ann = self.dp_manager.get_annotation(ann_id)
508
507
  child_ann_id_list = detect_result.relationships["child"] # type: ignore
509
508
  for child_ann_id in child_ann_id_list:
510
- line_ann.dump_relationship(Relationships.child, child_ann_id)
509
+ line_ann.dump_relationship(Relationships.CHILD, child_ann_id)
511
510
  line_anns.append(line_ann)
512
511
  return line_anns
513
512
 
@@ -539,27 +538,25 @@ class TextLineService(TextLineServiceMixin):
539
538
  paragraph_break=paragraph_break,
540
539
  )
541
540
 
542
- def clone(self) -> PipelineComponent:
541
+ def clone(self) -> TextLineService:
543
542
  """
544
543
  This method returns a new instance of the class with the same configuration.
545
544
  """
546
545
  return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
547
546
 
548
547
  def serve(self, dp: Image) -> None:
549
- text_container_anns = dp.get_annotation(category_names=LayoutType.word)
548
+ text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
550
549
  self._create_lines_for_words(text_container_anns)
551
550
 
552
- def get_meta_annotation(self) -> JsonDict:
551
+ def get_meta_annotation(self) -> MetaAnnotation:
553
552
  """
554
553
  This method returns metadata about the annotations created by this pipeline component.
555
554
  """
556
- return dict(
557
- [
558
- ("image_annotations", [LayoutType.line]),
559
- ("sub_categories", {LayoutType.line: {Relationships.child}}),
560
- ("relationships", {}),
561
- ("summaries", []),
562
- ]
555
+ return MetaAnnotation(
556
+ image_annotations=(LayoutType.LINE,),
557
+ sub_categories={LayoutType.LINE: {Relationships.CHILD}},
558
+ relationships={},
559
+ summaries=(),
563
560
  )
564
561
 
565
562
 
@@ -637,17 +634,17 @@ class TextOrderService(TextLineServiceMixin):
637
634
  """
638
635
  self.text_container = get_type(text_container)
639
636
  if isinstance(text_block_categories, (str, ObjectTypes)):
640
- text_block_categories = [text_block_categories]
637
+ text_block_categories = (get_type(text_block_categories),)
641
638
  if text_block_categories is None:
642
639
  text_block_categories = IMAGE_DEFAULTS["text_block_categories"]
643
- self.text_block_categories = [get_type(category) for category in text_block_categories]
640
+ self.text_block_categories = tuple((get_type(category) for category in text_block_categories))
644
641
  if isinstance(floating_text_block_categories, (str, ObjectTypes)):
645
- floating_text_block_categories = [floating_text_block_categories]
642
+ floating_text_block_categories = (get_type(floating_text_block_categories),)
646
643
  if floating_text_block_categories is None:
647
644
  floating_text_block_categories = IMAGE_DEFAULTS["floating_text_block_categories"]
648
- self.floating_text_block_categories = [get_type(category) for category in floating_text_block_categories]
645
+ self.floating_text_block_categories = tuple((get_type(category) for category in floating_text_block_categories))
649
646
  if include_residual_text_container:
650
- self.floating_text_block_categories.append(LayoutType.line)
647
+ self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
651
648
  self.include_residual_text_container = include_residual_text_container
652
649
  self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
653
650
  self.text_line_generator = TextLineGenerator(
@@ -666,12 +663,12 @@ class TextOrderService(TextLineServiceMixin):
666
663
  text_block_anns = dp.get_annotation(category_names=self.text_block_categories)
667
664
  if self.include_residual_text_container:
668
665
  mapped_text_container_ids = list(
669
- chain(*[text_block.get_relationship(Relationships.child) for text_block in text_block_anns])
666
+ chain(*[text_block.get_relationship(Relationships.CHILD) for text_block in text_block_anns])
670
667
  )
671
668
  residual_text_container_anns = [
672
669
  ann for ann in text_container_anns if ann.annotation_id not in mapped_text_container_ids
673
670
  ]
674
- if self.text_container == LayoutType.word:
671
+ if self.text_container == LayoutType.WORD:
675
672
  text_block_anns.extend(self._create_lines_for_words(residual_text_container_anns))
676
673
  else:
677
674
  text_block_anns.extend(residual_text_container_anns)
@@ -689,7 +686,7 @@ class TextOrderService(TextLineServiceMixin):
689
686
  annotation_id = self.dp_manager.set_image_annotation(detect_result)
690
687
  if annotation_id:
691
688
  self.dp_manager.set_category_annotation(
692
- Relationships.reading_order, idx, Relationships.reading_order, annotation_id
689
+ Relationships.READING_ORDER, idx, Relationships.READING_ORDER, annotation_id
693
690
  )
694
691
 
695
692
  def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
@@ -699,11 +696,11 @@ class TextOrderService(TextLineServiceMixin):
699
696
 
700
697
  :param text_block_ann: text block annotation (category one of `text_block_categories`).
701
698
  """
702
- text_container_ids = text_block_ann.get_relationship(Relationships.child)
699
+ text_container_ids = text_block_ann.get_relationship(Relationships.CHILD)
703
700
  text_container_ann = self.dp_manager.datapoint.get_annotation(
704
701
  annotation_ids=text_container_ids, category_names=self.text_container
705
702
  )
706
- if self.text_container == LayoutType.word:
703
+ if self.text_container == LayoutType.WORD:
707
704
  word_order_list = self.order_generator.group_words_into_lines(
708
705
  text_container_ann, self.dp_manager.datapoint.image_id
709
706
  )
@@ -713,10 +710,10 @@ class TextOrderService(TextLineServiceMixin):
713
710
  )
714
711
  for word_order in word_order_list:
715
712
  self.dp_manager.set_category_annotation(
716
- Relationships.reading_order, word_order[0], Relationships.reading_order, word_order[2]
713
+ Relationships.READING_ORDER, word_order[0], Relationships.READING_ORDER, word_order[2]
717
714
  )
718
715
 
719
- def order_blocks(self, text_block_anns: List[ImageAnnotation]) -> None:
716
+ def order_blocks(self, text_block_anns: list[ImageAnnotation]) -> None:
720
717
  """
721
718
  Ordering of text blocks. Will use the internal order generator.
722
719
 
@@ -727,42 +724,40 @@ class TextOrderService(TextLineServiceMixin):
727
724
  )
728
725
  for word_order in block_order_list:
729
726
  self.dp_manager.set_category_annotation(
730
- Relationships.reading_order, word_order[0], Relationships.reading_order, word_order[1]
727
+ Relationships.READING_ORDER, word_order[0], Relationships.READING_ORDER, word_order[1]
731
728
  )
732
729
 
733
730
  def _init_sanity_checks(self) -> None:
734
- assert self.text_container in (LayoutType.word, LayoutType.line), (
735
- f"text_container must be either {LayoutType.word} or " f"{LayoutType.line}"
731
+ assert self.text_container in (LayoutType.WORD, LayoutType.LINE), (
732
+ f"text_container must be either {LayoutType.WORD} or " f"{LayoutType.LINE}"
736
733
  )
737
734
  add_category = []
738
735
  if self.include_residual_text_container:
739
- add_category.append(LayoutType.line)
736
+ add_category.append(LayoutType.LINE)
740
737
 
741
738
  assert set(self.floating_text_block_categories) <= set(
742
- self.text_block_categories + add_category # type: ignore
739
+ self.text_block_categories + tuple(add_category)
743
740
  ), "floating_text_block_categories must be a subset of text_block_categories"
744
741
 
745
- def get_meta_annotation(self) -> JsonDict:
742
+ def get_meta_annotation(self) -> MetaAnnotation:
746
743
  add_category = [self.text_container]
747
- image_annotations = []
748
- if self.include_residual_text_container and self.text_container == LayoutType.word:
749
- add_category.append(LayoutType.line)
750
- image_annotations.append(LayoutType.line)
744
+ image_annotations: list[ObjectTypes] = []
745
+ if self.include_residual_text_container and self.text_container == LayoutType.WORD:
746
+ add_category.append(LayoutType.LINE)
747
+ image_annotations.append(LayoutType.LINE)
751
748
  anns_with_reading_order = list(copy(self.floating_text_block_categories)) + add_category
752
- return dict(
753
- [
754
- ("image_annotations", image_annotations),
755
- ("sub_categories", {category: {Relationships.reading_order} for category in anns_with_reading_order}),
756
- ("relationships", {}),
757
- ("summaries", []),
758
- ]
749
+ return MetaAnnotation(
750
+ image_annotations=tuple(image_annotations),
751
+ sub_categories={category: {Relationships.READING_ORDER} for category in anns_with_reading_order},
752
+ relationships={},
753
+ summaries=(),
759
754
  )
760
755
 
761
- def clone(self) -> PipelineComponent:
756
+ def clone(self) -> TextOrderService:
762
757
  return self.__class__(
763
- copy(self.text_container),
764
- copy(self.text_block_categories),
765
- copy(self.floating_text_block_categories),
758
+ self.text_container,
759
+ self.text_block_categories,
760
+ self.floating_text_block_categories,
766
761
  self.include_residual_text_container,
767
762
  self.order_generator.starting_point_tolerance,
768
763
  self.order_generator.broken_line_tolerance,
@@ -770,3 +765,6 @@ class TextOrderService(TextLineServiceMixin):
770
765
  self.text_line_generator.paragraph_break,
771
766
  self.text_line_generator.line_category_id,
772
767
  )
768
+
769
+ def clear_predictor(self) -> None:
770
+ pass