deepdoctection 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -24,7 +24,7 @@ import catalogue # type: ignore
24
24
  from tabulate import tabulate
25
25
  from termcolor import colored
26
26
 
27
- from .base import DatasetBase, CustomDataset
27
+ from .base import CustomDataset, DatasetBase
28
28
 
29
29
  __all__ = ["dataset_registry", "get_dataset", "print_dataset_infos"]
30
30
 
@@ -34,13 +34,15 @@ from ..utils.settings import TypeOrStr
34
34
 
35
35
  def match_anns_by_intersection(
36
36
  dp: Image,
37
- parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
38
- child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
39
37
  matching_rule: Literal["iou", "ioa"],
40
38
  threshold: float,
41
39
  use_weighted_intersections: bool = False,
40
+ parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
41
+ child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
42
42
  parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
43
43
  child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
44
+ parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
45
+ child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
44
46
  max_parent_only: bool = False,
45
47
  ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
46
48
  """
@@ -87,13 +89,19 @@ def match_anns_by_intersection(
87
89
  dates which are not in the list.
88
90
  :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
89
91
  candidates which are not in the list.
92
+ :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
93
+ parent candidates which are not in the list.
94
+ :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
95
+ children candidates which are not in the list.
90
96
  :param max_parent_only: Will assign to each child at most one parent with maximum ioa
91
97
  :return: child indices, parent indices (see Example), list of parent ids and list of children ids.
92
98
  """
93
99
 
94
100
  assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
95
101
 
96
- child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
102
+ child_anns = dp.get_annotation(
103
+ annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
104
+ )
97
105
  child_ann_boxes = np.array(
98
106
  [
99
107
  ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -101,7 +109,9 @@ def match_anns_by_intersection(
101
109
  ]
102
110
  )
103
111
 
104
- parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
112
+ parent_anns = dp.get_annotation(
113
+ annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
114
+ )
105
115
  parent_ann_boxes = np.array(
106
116
  [
107
117
  ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
@@ -147,10 +157,12 @@ def match_anns_by_intersection(
147
157
 
148
158
  def match_anns_by_distance(
149
159
  dp: Image,
150
- parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
151
- child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
160
+ parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
161
+ child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
152
162
  parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
153
163
  child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
164
+ parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
165
+ child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
154
166
  ) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
155
167
  """
156
168
  Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
@@ -164,11 +176,19 @@ def match_anns_by_distance(
164
176
  dates which are not in the list.
165
177
  :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
166
178
  candidates which are not in the list.
179
+ :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
180
+ parent candidates which are not in the list.
181
+ :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
182
+ children candidates which are not in the list.
167
183
  :return:
168
184
  """
169
185
 
170
- parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
171
- child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
186
+ parent_anns = dp.get_annotation(
187
+ annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
188
+ )
189
+ child_anns = dp.get_annotation(
190
+ annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
191
+ )
172
192
  child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
173
193
  parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
174
194
  if child_centers and parent_centers:
@@ -75,27 +75,6 @@ class DatapointManager:
75
75
  """
76
76
  assert self.datapoint_is_passed, "Pass datapoint to DatapointManager before creating anns"
77
77
 
78
- def maybe_map_category_id(self, category_id: Union[str, int]) -> int:
79
- """
80
- Maps categories if a category id mapping is provided in `__init__`.
81
-
82
- :param category_id: category id via integer or string.
83
- :return: mapped category id
84
- """
85
- if self.category_id_mapping is None:
86
- return int(category_id)
87
- return self.category_id_mapping[int(category_id)]
88
-
89
- def set_category_id_mapping(self, category_id_mapping: Mapping[int, int]) -> None:
90
- """
91
- In many cases the category ids sent back from a model have to be modified. Pass a mapping from model
92
- category ids to target annotation category ids.
93
-
94
- :param category_id_mapping: A mapping of model category ids (sent from DetectionResult) to category ids (saved
95
- in annotations)
96
- """
97
- self.category_id_mapping = category_id_mapping
98
-
99
78
  def set_image_annotation(
100
79
  self,
101
80
  detect_result: DetectionResult,
@@ -127,13 +106,10 @@ class DatapointManager:
127
106
  :return: the annotation_id of the generated image annotation
128
107
  """
129
108
  self.assert_datapoint_passed()
130
- if detect_result.class_id is None:
131
- raise ValueError("class_id of detect_result cannot be None")
132
109
  if not isinstance(detect_result.box, (list, np.ndarray)):
133
110
  raise TypeError(
134
111
  f"detect_result.box must be of type list or np.ndarray, but is of type {(type(detect_result.box))}"
135
112
  )
136
- detect_result.class_id = self.maybe_map_category_id(detect_result.class_id)
137
113
  with MappingContextManager(
138
114
  dp_name=self.datapoint.file_name, filter_level="annotation", detect_result=asdict(detect_result)
139
115
  ) as annotation_context:
@@ -155,7 +131,7 @@ class DatapointManager:
155
131
  ann = ImageAnnotation(
156
132
  category_name=detect_result.class_name,
157
133
  bounding_box=box,
158
- category_id=detect_result.class_id,
134
+ category_id=detect_result.class_id if detect_result.class_id is not None else DEFAULT_CATEGORY_ID,
159
135
  score=detect_result.score,
160
136
  service_id=self.service_id,
161
137
  model_id=self.model_id,
@@ -22,6 +22,7 @@ from __future__ import annotations
22
22
 
23
23
  import os
24
24
  from copy import deepcopy
25
+ from dataclasses import dataclass, field
25
26
  from typing import Literal, Mapping, Optional, Sequence, Union
26
27
 
27
28
  import numpy as np
@@ -49,24 +50,30 @@ class ImageCroppingService(PipelineComponent):
49
50
  generally not stored.
50
51
  """
51
52
 
52
- def __init__(self, category_names: Union[TypeOrStr, Sequence[TypeOrStr]]):
53
+ def __init__(
54
+ self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
55
+ service_ids: Optional[Sequence[str]] = None
56
+ ) -> None:
53
57
  """
54
58
  :param category_names: A single name or a list of category names to crop
55
59
  """
56
-
57
- self.category_names = (
58
- (category_names,)
59
- if isinstance(category_names, str)
60
- else tuple(get_type(category_name) for category_name in category_names)
61
- )
60
+ if category_names is None:
61
+ self.category_names = None
62
+ else:
63
+ self.category_names = (
64
+ (category_names,)
65
+ if isinstance(category_names, str)
66
+ else tuple(get_type(category_name) for category_name in category_names)
67
+ )
68
+ self.service_ids = service_ids
62
69
  super().__init__("image_crop")
63
70
 
64
71
  def serve(self, dp: Image) -> None:
65
- for ann in dp.get_annotation(category_names=self.category_names):
72
+ for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
66
73
  dp.image_ann_to_image(ann.annotation_id, crop_image=True)
67
74
 
68
75
  def clone(self) -> ImageCroppingService:
69
- return self.__class__(self.category_names)
76
+ return self.__class__(self.category_names, self.service_ids)
70
77
 
71
78
  def get_meta_annotation(self) -> MetaAnnotation:
72
79
  return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
@@ -124,8 +131,10 @@ class IntersectionMatcher:
124
131
  def match(
125
132
  self,
126
133
  dp: Image,
127
- parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
128
- child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
134
+ parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
135
+ child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
136
+ parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
137
+ child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
129
138
  ) -> list[tuple[str, str]]:
130
139
  """
131
140
  The matching algorithm
@@ -133,6 +142,10 @@ class IntersectionMatcher:
133
142
  :param dp: datapoint image
134
143
  :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
135
144
  :param child_categories: list of categories to be used for a child class.
145
+ :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
146
+ parent candidates which are not in the list.
147
+ :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
148
+ children candidates which are not in the list.
136
149
 
137
150
  :return: A list of tuples with parent and child annotation ids
138
151
  """
@@ -144,6 +157,8 @@ class IntersectionMatcher:
144
157
  threshold=self.threshold,
145
158
  use_weighted_intersections=self.use_weighted_intersections,
146
159
  max_parent_only=self.max_parent_only,
160
+ parent_ann_service_ids=parent_ann_service_ids,
161
+ child_ann_service_ids=child_ann_service_ids,
147
162
  )
148
163
 
149
164
  matched_child_anns = np.take(child_anns, child_index) # type: ignore
@@ -174,8 +189,10 @@ class NeighbourMatcher:
174
189
  def match(
175
190
  self,
176
191
  dp: Image,
177
- parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
178
- child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
192
+ parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
193
+ child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
194
+ parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
195
+ child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
179
196
  ) -> list[tuple[str, str]]:
180
197
  """
181
198
  The matching algorithm
@@ -183,16 +200,54 @@ class NeighbourMatcher:
183
200
  :param dp: datapoint image
184
201
  :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
185
202
  :param child_categories: list of categories to be used for a child class.
203
+ :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
204
+ parent candidates which are not in the list.
205
+ :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
206
+ children candidates which are not in the list.
186
207
 
187
208
  :return: A list of tuples with parent and child annotation ids
188
209
  """
189
210
 
190
211
  return [
191
212
  (pair[0].annotation_id, pair[1].annotation_id)
192
- for pair in match_anns_by_distance(dp, parent_categories, child_categories)
213
+ for pair in match_anns_by_distance(
214
+ dp,
215
+ parent_ann_category_names=parent_categories,
216
+ child_ann_category_names=child_categories,
217
+ parent_ann_service_ids=parent_ann_service_ids,
218
+ child_ann_service_ids=child_ann_service_ids,
219
+ )
193
220
  ]
194
221
 
195
222
 
223
+ @dataclass
224
+ class FamilyCompound:
225
+ """
226
+ A family compound is a set of parent and child categories that are related by a relationship key. The parent
227
+ categories will receive a relationship to the child categories.
228
+ """
229
+
230
+ relationship_key: Relationships
231
+ parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
232
+ child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
233
+ parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
234
+ child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
235
+
236
+ def __post_init__(self) -> None:
237
+ if isinstance(self.parent_categories, str):
238
+ self.parent_categories = (get_type(self.parent_categories),)
239
+ elif self.parent_categories is not None:
240
+ self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
241
+ if isinstance(self.child_categories, str):
242
+ self.child_categories = (get_type(self.child_categories),)
243
+ elif self.child_categories is not None:
244
+ self.child_categories = tuple(get_type(child) for child in self.child_categories)
245
+ if isinstance(self.parent_ann_service_ids, str):
246
+ self.parent_ann_service_ids = (self.parent_ann_service_ids,)
247
+ if isinstance(self.child_ann_service_ids, str):
248
+ self.child_ann_service_ids = (self.child_ann_service_ids,)
249
+
250
+
196
251
  @pipeline_component_registry.register("MatchingService")
197
252
  class MatchingService(PipelineComponent):
198
253
  """
@@ -202,28 +257,15 @@ class MatchingService(PipelineComponent):
202
257
 
203
258
  def __init__(
204
259
  self,
205
- parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
206
- child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
260
+ family_compounds: Sequence[FamilyCompound],
207
261
  matcher: Union[IntersectionMatcher, NeighbourMatcher],
208
- relationship_key: Relationships,
209
262
  ) -> None:
210
263
  """
211
- :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
212
- :param child_categories: list of categories to be used for a child class.
213
-
264
+ :param family_compounds: A list of FamilyCompounds
265
+ :param matcher: A matcher object
214
266
  """
215
- self.parent_categories = (
216
- (get_type(parent_categories),)
217
- if isinstance(parent_categories, str)
218
- else tuple(get_type(category_name) for category_name in parent_categories)
219
- )
220
- self.child_categories = (
221
- (get_type(child_categories),)
222
- if isinstance(child_categories, str)
223
- else (tuple(get_type(category_name) for category_name in child_categories))
224
- )
267
+ self.family_compounds = family_compounds
225
268
  self.matcher = matcher
226
- self.relationship_key = relationship_key
227
269
  super().__init__("matching")
228
270
 
229
271
  def serve(self, dp: Image) -> None:
@@ -233,20 +275,31 @@ class MatchingService(PipelineComponent):
233
275
 
234
276
  :param dp: datapoint image
235
277
  """
236
-
237
- matched_pairs = self.matcher.match(dp, self.parent_categories, self.child_categories)
238
-
239
- for pair in matched_pairs:
240
- self.dp_manager.set_relationship_annotation(self.relationship_key, pair[0], pair[1])
278
+ for family_compound in self.family_compounds:
279
+ matched_pairs = self.matcher.match(
280
+ dp,
281
+ parent_categories=family_compound.parent_categories,
282
+ child_categories=family_compound.child_categories,
283
+ parent_ann_service_ids=family_compound.parent_ann_service_ids,
284
+ child_ann_service_ids=family_compound.child_ann_service_ids,
285
+ )
286
+
287
+ for pair in matched_pairs:
288
+ self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
241
289
 
242
290
  def clone(self) -> PipelineComponent:
243
- return self.__class__(self.parent_categories, self.child_categories, self.matcher, self.relationship_key)
291
+ return self.__class__(self.family_compounds, self.matcher)
244
292
 
245
293
  def get_meta_annotation(self) -> MetaAnnotation:
294
+ relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
295
+ for family_compound in self.family_compounds:
296
+ if family_compound.parent_categories is not None:
297
+ for parent_category in family_compound.parent_categories:
298
+ relationships[parent_category] = {family_compound.relationship_key} # type: ignore
246
299
  return MetaAnnotation(
247
300
  image_annotations=(),
248
301
  sub_categories={},
249
- relationships={parent: {Relationships.CHILD} for parent in self.parent_categories},
302
+ relationships=relationships,
250
303
  summaries=(),
251
304
  )
252
305
 
@@ -20,18 +20,41 @@ Module for layout pipeline component
20
20
  """
21
21
  from __future__ import annotations
22
22
 
23
- from typing import Optional
23
+ from typing import Optional, Sequence, Union
24
24
 
25
25
  import numpy as np
26
26
 
27
27
  from ..datapoint.image import Image
28
28
  from ..extern.base import ObjectDetector, PdfMiner
29
+ from ..mapper.misc import curry
29
30
  from ..utils.error import ImageError
31
+ from ..utils.settings import ObjectTypes
30
32
  from ..utils.transform import PadTransform
31
33
  from .base import MetaAnnotation, PipelineComponent
32
34
  from .registry import pipeline_component_registry
33
35
 
34
36
 
37
+ @curry
38
+ def skip_if_category_or_service_extracted(
39
+ dp: Image,
40
+ category_names: Optional[Union[str, Sequence[ObjectTypes]]] = None,
41
+ service_ids: Optional[Union[str, Sequence[str]]] = None,
42
+ ) -> bool:
43
+ """
44
+ Skip the processing of the pipeline component if the category or service is already extracted.
45
+
46
+ **Example**
47
+
48
+ detector = # some detector
49
+ item_component = ImageLayoutService(detector)
50
+ item_component.set_inbound_filter(skip_if_category_or_service_extracted(detector.get_categories(as_dict=False)))
51
+ """
52
+
53
+ if dp.get_annotation(category_names=category_names, service_ids=service_ids):
54
+ return True
55
+ return False
56
+
57
+
35
58
  @pipeline_component_registry.register("ImageLayoutService")
36
59
  class ImageLayoutService(PipelineComponent):
37
60
  """
@@ -45,7 +68,7 @@ class ImageLayoutService(PipelineComponent):
45
68
 
46
69
  **Example**
47
70
 
48
- d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": "ROW", "2": "COLUMNS"})
71
+ d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: 'row', 2: 'column'})
49
72
  item_component = ImageLayoutService(d_items)
50
73
  """
51
74
 
@@ -55,7 +78,6 @@ class ImageLayoutService(PipelineComponent):
55
78
  to_image: bool = False,
56
79
  crop_image: bool = False,
57
80
  padder: Optional[PadTransform] = None,
58
- skip_if_layout_extracted: bool = False,
59
81
  ):
60
82
  """
61
83
  :param layout_detector: object detector
@@ -65,23 +87,14 @@ class ImageLayoutService(PipelineComponent):
65
87
  to its bounding box and populate the resulting sub image to
66
88
  `ImageAnnotation.image.image`.
67
89
  :param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
68
- :param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
69
- available that will be predicted by the `layout_detector`. If yes, will skip
70
- the prediction process.
71
90
  """
72
91
  self.to_image = to_image
73
92
  self.crop_image = crop_image
74
93
  self.padder = padder
75
- self.skip_if_layout_extracted = skip_if_layout_extracted
76
94
  self.predictor = layout_detector
77
95
  super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
78
96
 
79
97
  def serve(self, dp: Image) -> None:
80
- if self.skip_if_layout_extracted:
81
- categories = self.predictor.get_category_names()
82
- anns = dp.get_annotation(category_names=categories)
83
- if anns:
84
- return
85
98
  if dp.image is None:
86
99
  raise ImageError("image cannot be None")
87
100
  np_image = dp.image
@@ -117,7 +130,7 @@ class ImageLayoutService(PipelineComponent):
117
130
  padder_clone = self.padder.clone()
118
131
  if not isinstance(predictor, ObjectDetector):
119
132
  raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
120
- return self.__class__(predictor, self.to_image, self.crop_image, padder_clone, self.skip_if_layout_extracted)
133
+ return self.__class__(predictor, self.to_image, self.crop_image, padder_clone)
121
134
 
122
135
  def clear_predictor(self) -> None:
123
136
  self.predictor.clear_model()
@@ -347,19 +347,15 @@ class TextLineGenerator:
347
347
  a paragraph break threshold. This allows to detect a multi column structure just by observing sub lines.
348
348
  """
349
349
 
350
- def __init__(
351
- self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
352
- ):
350
+ def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
353
351
  """
354
352
  :param make_sub_lines: Whether to build sub lines from lines.
355
- :param line_category_id: category_id to give a text line
356
353
  :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
357
354
  will be built. We use relative coordinates to calculate the distance between two
358
355
  consecutive words. A reasonable value is 0.035
359
356
  """
360
357
  if make_sub_lines and paragraph_break is None:
361
358
  raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
362
- self.line_category_id = int(line_category_id)
363
359
  self.make_sub_lines = make_sub_lines
364
360
  self.paragraph_break = paragraph_break
365
361
 
@@ -367,7 +363,6 @@ class TextLineGenerator:
367
363
  return DetectionResult(
368
364
  box=box.to_list(mode="xyxy"),
369
365
  class_name=LayoutType.LINE,
370
- class_id=self.line_category_id,
371
366
  absolute_coords=box.absolute_coords,
372
367
  relationships=relationships,
373
368
  )
@@ -475,18 +470,14 @@ class TextLineServiceMixin(PipelineComponent, ABC):
475
470
  def __init__(
476
471
  self,
477
472
  name: str,
478
- line_category_id: int = 1,
479
473
  include_residual_text_container: bool = True,
480
474
  paragraph_break: Optional[float] = None,
481
475
  ):
482
476
  """
483
- Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
477
+ Initialize the TextLineServiceMixin with a TextLineGenerator instance.
484
478
  """
485
- self.line_category_id = line_category_id
486
479
  self.include_residual_text_container = include_residual_text_container
487
- self.text_line_generator = TextLineGenerator(
488
- self.include_residual_text_container, self.line_category_id, paragraph_break
489
- )
480
+ self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
490
481
  super().__init__(name)
491
482
 
492
483
  def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
@@ -523,17 +514,15 @@ class TextLineService(TextLineServiceMixin):
523
514
  text lines and the words contained in the text lines. The reading order is not arranged.
524
515
  """
525
516
 
526
- def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
517
+ def __init__(self, paragraph_break: Optional[float] = None):
527
518
  """
528
519
  Initialize `TextLineService`
529
520
 
530
- :param line_category_id: category_id to give a text line
531
521
  :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
532
522
  will be built
533
523
  """
534
524
  super().__init__(
535
525
  name="text_line",
536
- line_category_id=line_category_id,
537
526
  include_residual_text_container=True,
538
527
  paragraph_break=paragraph_break,
539
528
  )
@@ -542,7 +531,7 @@ class TextLineService(TextLineServiceMixin):
542
531
  """
543
532
  This method returns a new instance of the class with the same configuration.
544
533
  """
545
- return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
534
+ return self.__class__(self.text_line_generator.paragraph_break)
546
535
 
547
536
  def serve(self, dp: Image) -> None:
548
537
  text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
@@ -605,7 +594,6 @@ class TextOrderService(TextLineServiceMixin):
605
594
  broken_line_tolerance: float = 0.003,
606
595
  height_tolerance: float = 2.0,
607
596
  paragraph_break: Optional[float] = 0.035,
608
- line_category_id: int = 1,
609
597
  ):
610
598
  """
611
599
  :param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
@@ -647,12 +635,9 @@ class TextOrderService(TextLineServiceMixin):
647
635
  self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
648
636
  self.include_residual_text_container = include_residual_text_container
649
637
  self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
650
- self.text_line_generator = TextLineGenerator(
651
- self.include_residual_text_container, line_category_id, paragraph_break
652
- )
638
+ self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
653
639
  super().__init__(
654
640
  name="text_order",
655
- line_category_id=line_category_id,
656
641
  include_residual_text_container=include_residual_text_container,
657
642
  paragraph_break=paragraph_break,
658
643
  )
@@ -763,7 +748,6 @@ class TextOrderService(TextLineServiceMixin):
763
748
  self.order_generator.broken_line_tolerance,
764
749
  self.order_generator.height_tolerance,
765
750
  self.text_line_generator.paragraph_break,
766
- self.text_line_generator.line_category_id,
767
751
  )
768
752
 
769
753
  def clear_predictor(self) -> None: