deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
deepdoctection/pipe/common.py
CHANGED
|
@@ -18,8 +18,11 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for common pipeline components
|
|
20
20
|
"""
|
|
21
|
-
from
|
|
22
|
-
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
from copy import deepcopy
|
|
25
|
+
from typing import Literal, Mapping, Optional, Sequence, Union
|
|
23
26
|
|
|
24
27
|
import numpy as np
|
|
25
28
|
|
|
@@ -29,17 +32,14 @@ from ..datapoint.view import IMAGE_DEFAULTS, Page
|
|
|
29
32
|
from ..mapper.maputils import MappingContextManager
|
|
30
33
|
from ..mapper.match import match_anns_by_intersection
|
|
31
34
|
from ..mapper.misc import to_image
|
|
32
|
-
from ..utils.detection_types import JsonDict
|
|
33
|
-
from ..utils.file_utils import detectron2_available, pytorch_available, tf_available
|
|
34
35
|
from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr, get_type
|
|
35
|
-
from .base import PipelineComponent
|
|
36
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
36
37
|
from .registry import pipeline_component_registry
|
|
37
38
|
|
|
38
|
-
if
|
|
39
|
-
from ..mapper.tpstruct import tf_nms_image_annotations as nms_image_annotations
|
|
40
|
-
|
|
41
|
-
elif pytorch_available() and detectron2_available():
|
|
39
|
+
if os.environ.get("DD_USE_TORCH"):
|
|
42
40
|
from ..mapper.d2struct import pt_nms_image_annotations as nms_image_annotations
|
|
41
|
+
elif os.environ.get("DD_USE_TF"):
|
|
42
|
+
from ..mapper.tpstruct import tf_nms_image_annotations as nms_image_annotations
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
@pipeline_component_registry.register("ImageCroppingService")
|
|
@@ -55,20 +55,25 @@ class ImageCroppingService(PipelineComponent):
|
|
|
55
55
|
:param category_names: A single name or a list of category names to crop
|
|
56
56
|
"""
|
|
57
57
|
|
|
58
|
-
|
|
59
|
-
category_names
|
|
60
|
-
|
|
58
|
+
self.category_names = (
|
|
59
|
+
(category_names,)
|
|
60
|
+
if isinstance(category_names, str)
|
|
61
|
+
else tuple(get_type(category_name) for category_name in category_names)
|
|
62
|
+
)
|
|
61
63
|
super().__init__("image_crop")
|
|
62
64
|
|
|
63
65
|
def serve(self, dp: Image) -> None:
|
|
64
66
|
for ann in dp.get_annotation(category_names=self.category_names):
|
|
65
67
|
dp.image_ann_to_image(ann.annotation_id, crop_image=True)
|
|
66
68
|
|
|
67
|
-
def clone(self) ->
|
|
69
|
+
def clone(self) -> ImageCroppingService:
|
|
68
70
|
return self.__class__(self.category_names)
|
|
69
71
|
|
|
70
|
-
def get_meta_annotation(self) ->
|
|
71
|
-
return
|
|
72
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
73
|
+
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
74
|
+
|
|
75
|
+
def clear_predictor(self) -> None:
|
|
76
|
+
pass
|
|
72
77
|
|
|
73
78
|
|
|
74
79
|
@pipeline_component_registry.register("MatchingService")
|
|
@@ -113,16 +118,18 @@ class MatchingService(PipelineComponent):
|
|
|
113
118
|
:param max_parent_only: Will assign to each child at most one parent with maximum ioa
|
|
114
119
|
"""
|
|
115
120
|
self.parent_categories = (
|
|
116
|
-
|
|
117
|
-
if
|
|
118
|
-
else
|
|
121
|
+
(get_type(parent_categories),)
|
|
122
|
+
if isinstance(parent_categories, str)
|
|
123
|
+
else tuple(get_type(category_name) for category_name in parent_categories)
|
|
119
124
|
)
|
|
120
125
|
self.child_categories = (
|
|
121
|
-
|
|
122
|
-
if
|
|
123
|
-
else
|
|
126
|
+
(get_type(child_categories),)
|
|
127
|
+
if isinstance(child_categories, str)
|
|
128
|
+
else (tuple(get_type(category_name) for category_name in child_categories))
|
|
124
129
|
)
|
|
125
|
-
|
|
130
|
+
if matching_rule not in ("iou", "ioa"):
|
|
131
|
+
raise ValueError("segment rule must be either iou or ioa")
|
|
132
|
+
|
|
126
133
|
self.matching_rule = matching_rule
|
|
127
134
|
self.threshold = threshold
|
|
128
135
|
self.use_weighted_intersections = use_weighted_intersections
|
|
@@ -150,24 +157,25 @@ class MatchingService(PipelineComponent):
|
|
|
150
157
|
matched_child_anns = np.take(child_anns, child_index) # type: ignore
|
|
151
158
|
matched_parent_anns = np.take(parent_anns, parent_index) # type: ignore
|
|
152
159
|
for idx, parent in enumerate(matched_parent_anns):
|
|
153
|
-
parent.dump_relationship(Relationships.
|
|
160
|
+
parent.dump_relationship(Relationships.CHILD, matched_child_anns[idx].annotation_id)
|
|
154
161
|
|
|
155
162
|
def clone(self) -> PipelineComponent:
|
|
156
163
|
return self.__class__(self.parent_categories, self.child_categories, self.matching_rule, self.threshold)
|
|
157
164
|
|
|
158
|
-
def get_meta_annotation(self) ->
|
|
159
|
-
return
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
("summaries", []),
|
|
165
|
-
]
|
|
165
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
166
|
+
return MetaAnnotation(
|
|
167
|
+
image_annotations=(),
|
|
168
|
+
sub_categories={},
|
|
169
|
+
relationships={parent: {Relationships.CHILD} for parent in self.parent_categories},
|
|
170
|
+
summaries=(),
|
|
166
171
|
)
|
|
167
172
|
|
|
173
|
+
def clear_predictor(self) -> None:
|
|
174
|
+
pass
|
|
175
|
+
|
|
168
176
|
|
|
169
177
|
@pipeline_component_registry.register("PageParsingService")
|
|
170
|
-
class PageParsingService:
|
|
178
|
+
class PageParsingService(PipelineComponent):
|
|
171
179
|
"""
|
|
172
180
|
A "pseudo" pipeline component that can be added to a pipeline to convert `Image`s into `Page` formats. It allows a
|
|
173
181
|
custom parsing depending on customizing options of other pipeline components.
|
|
@@ -186,14 +194,20 @@ class PageParsingService:
|
|
|
186
194
|
"""
|
|
187
195
|
self.name = "page_parser"
|
|
188
196
|
if isinstance(floating_text_block_categories, (str, ObjectTypes)):
|
|
189
|
-
floating_text_block_categories =
|
|
197
|
+
floating_text_block_categories = (get_type(floating_text_block_categories),)
|
|
190
198
|
if floating_text_block_categories is None:
|
|
191
|
-
floating_text_block_categories =
|
|
199
|
+
floating_text_block_categories = IMAGE_DEFAULTS["floating_text_block_categories"]
|
|
192
200
|
|
|
193
201
|
self.text_container = get_type(text_container)
|
|
194
|
-
self.floating_text_block_categories =
|
|
202
|
+
self.floating_text_block_categories = tuple(
|
|
203
|
+
(get_type(text_block) for text_block in floating_text_block_categories)
|
|
204
|
+
)
|
|
195
205
|
self.include_residual_text_container = include_residual_text_container
|
|
196
206
|
self._init_sanity_checks()
|
|
207
|
+
super().__init__(self.name)
|
|
208
|
+
|
|
209
|
+
def serve(self, dp: Image) -> None:
|
|
210
|
+
raise NotImplementedError("PageParsingService is not meant to be used in serve method")
|
|
197
211
|
|
|
198
212
|
def pass_datapoint(self, dp: Image) -> Page:
|
|
199
213
|
"""
|
|
@@ -203,29 +217,19 @@ class PageParsingService:
|
|
|
203
217
|
"""
|
|
204
218
|
return Page.from_image(dp, self.text_container, self.floating_text_block_categories)
|
|
205
219
|
|
|
206
|
-
def predict_dataflow(self, df: DataFlow) -> DataFlow:
|
|
207
|
-
"""
|
|
208
|
-
Mapping a datapoint via `pass_datapoint` within a dataflow pipeline
|
|
209
|
-
|
|
210
|
-
:param df: An input dataflow
|
|
211
|
-
:return: A output dataflow
|
|
212
|
-
"""
|
|
213
|
-
return MapData(df, self.pass_datapoint)
|
|
214
|
-
|
|
215
220
|
def _init_sanity_checks(self) -> None:
|
|
216
221
|
assert self.text_container in (
|
|
217
|
-
LayoutType.
|
|
218
|
-
LayoutType.
|
|
219
|
-
), f"text_container must be either {LayoutType.
|
|
222
|
+
LayoutType.WORD,
|
|
223
|
+
LayoutType.LINE,
|
|
224
|
+
), f"text_container must be either {LayoutType.WORD} or {LayoutType.LINE}"
|
|
220
225
|
|
|
221
|
-
|
|
222
|
-
def get_meta_annotation() -> JsonDict:
|
|
226
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
223
227
|
"""
|
|
224
228
|
meta annotation. We do not generate any new annotations here
|
|
225
229
|
"""
|
|
226
|
-
return
|
|
230
|
+
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
227
231
|
|
|
228
|
-
def clone(self) ->
|
|
232
|
+
def clone(self) -> PageParsingService:
|
|
229
233
|
"""clone"""
|
|
230
234
|
return self.__class__(
|
|
231
235
|
deepcopy(self.text_container),
|
|
@@ -233,6 +237,9 @@ class PageParsingService:
|
|
|
233
237
|
self.include_residual_text_container,
|
|
234
238
|
)
|
|
235
239
|
|
|
240
|
+
def clear_predictor(self) -> None:
|
|
241
|
+
pass
|
|
242
|
+
|
|
236
243
|
|
|
237
244
|
@pipeline_component_registry.register("AnnotationNmsService")
|
|
238
245
|
class AnnotationNmsService(PipelineComponent):
|
|
@@ -257,8 +264,8 @@ class AnnotationNmsService(PipelineComponent):
|
|
|
257
264
|
def __init__(
|
|
258
265
|
self,
|
|
259
266
|
nms_pairs: Sequence[Sequence[TypeOrStr]],
|
|
260
|
-
thresholds: Union[float,
|
|
261
|
-
priority: Optional[
|
|
267
|
+
thresholds: Union[float, list[float]],
|
|
268
|
+
priority: Optional[list[Union[Optional[TypeOrStr]]]] = None,
|
|
262
269
|
):
|
|
263
270
|
"""
|
|
264
271
|
:param nms_pairs: Groups of categories, either as string or by `ObjectType`.
|
|
@@ -292,11 +299,14 @@ class AnnotationNmsService(PipelineComponent):
|
|
|
292
299
|
if ann.annotation_id not in ann_ids_to_keep:
|
|
293
300
|
self.dp_manager.deactivate_annotation(ann.annotation_id)
|
|
294
301
|
|
|
295
|
-
def clone(self) ->
|
|
302
|
+
def clone(self) -> PipelineComponent:
|
|
296
303
|
return self.__class__(deepcopy(self.nms_pairs), self.threshold)
|
|
297
304
|
|
|
298
|
-
def get_meta_annotation(self) ->
|
|
299
|
-
return
|
|
305
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
306
|
+
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
307
|
+
|
|
308
|
+
def clear_predictor(self) -> None:
|
|
309
|
+
pass
|
|
300
310
|
|
|
301
311
|
|
|
302
312
|
@pipeline_component_registry.register("ImageParsingService")
|
|
@@ -326,13 +336,16 @@ class ImageParsingService:
|
|
|
326
336
|
"""
|
|
327
337
|
return MapData(df, self.pass_datapoint)
|
|
328
338
|
|
|
329
|
-
def clone(self) ->
|
|
339
|
+
def clone(self) -> ImageParsingService:
|
|
330
340
|
"""clone"""
|
|
331
341
|
return self.__class__(self.dpi)
|
|
332
342
|
|
|
333
343
|
@staticmethod
|
|
334
|
-
def get_meta_annotation() ->
|
|
344
|
+
def get_meta_annotation() -> MetaAnnotation:
|
|
335
345
|
"""
|
|
336
346
|
meta annotation. We do not generate any new annotations here
|
|
337
347
|
"""
|
|
338
|
-
return
|
|
348
|
+
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
349
|
+
|
|
350
|
+
def clear_predictor(self) -> None:
|
|
351
|
+
"""clear predictor. Will do nothing"""
|
|
@@ -18,21 +18,22 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for multithreading tasks
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
21
22
|
|
|
22
23
|
import itertools
|
|
23
24
|
import queue
|
|
24
25
|
from concurrent.futures import ThreadPoolExecutor
|
|
25
26
|
from contextlib import ExitStack
|
|
26
|
-
from typing import Callable,
|
|
27
|
+
from typing import Callable, Optional, Sequence, Union
|
|
27
28
|
|
|
28
29
|
import tqdm
|
|
29
30
|
|
|
30
31
|
from ..dataflow import DataFlow, MapData
|
|
31
32
|
from ..datapoint.image import Image
|
|
32
33
|
from ..utils.context import timed_operation
|
|
33
|
-
from ..utils.detection_types import JsonDict, QueueType, TqdmType
|
|
34
34
|
from ..utils.tqdm import get_tqdm
|
|
35
|
-
from .
|
|
35
|
+
from ..utils.types import QueueType, TqdmType
|
|
36
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
36
37
|
from .common import ImageParsingService, PageParsingService
|
|
37
38
|
from .registry import pipeline_component_registry
|
|
38
39
|
|
|
@@ -99,7 +100,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
99
100
|
|
|
100
101
|
def __init__(
|
|
101
102
|
self,
|
|
102
|
-
pipeline_components: Sequence[Union[PipelineComponent,
|
|
103
|
+
pipeline_components: Sequence[Union[PipelineComponent, ImageParsingService]],
|
|
103
104
|
pre_proc_func: Optional[Callable[[Image], Image]] = None,
|
|
104
105
|
post_proc_func: Optional[Callable[[Image], Image]] = None,
|
|
105
106
|
max_datapoints: Optional[int] = None,
|
|
@@ -122,7 +123,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
122
123
|
self.timer_on = False
|
|
123
124
|
super().__init__(f"multi_thread_{self.pipe_components[0].name}")
|
|
124
125
|
|
|
125
|
-
def put_task(self, df: Union[DataFlow,
|
|
126
|
+
def put_task(self, df: Union[DataFlow, list[Image]]) -> None:
|
|
126
127
|
"""
|
|
127
128
|
Put a dataflow or a list of datapoints to the queue. Note, that the process will not start before `start`
|
|
128
129
|
is called. If you do not know how many datapoints will be cached, use max_datapoint to ensure no oom.
|
|
@@ -132,7 +133,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
132
133
|
|
|
133
134
|
self._put_datapoints_to_queue(df)
|
|
134
135
|
|
|
135
|
-
def start(self) ->
|
|
136
|
+
def start(self) -> list[Image]:
|
|
136
137
|
"""
|
|
137
138
|
Creates a worker for each component and starts processing the data points of the queue. A list of the results
|
|
138
139
|
is returned once all points in the queue have been processed.
|
|
@@ -164,7 +165,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
164
165
|
tqdm_bar: Optional[TqdmType] = None,
|
|
165
166
|
pre_proc_func: Optional[Callable[[Image], Image]] = None,
|
|
166
167
|
post_proc_func: Optional[Callable[[Image], Image]] = None,
|
|
167
|
-
) ->
|
|
168
|
+
) -> list[Image]:
|
|
168
169
|
outputs = []
|
|
169
170
|
|
|
170
171
|
with ExitStack() as stack:
|
|
@@ -183,7 +184,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
183
184
|
tqdm_bar.update(1)
|
|
184
185
|
return outputs
|
|
185
186
|
|
|
186
|
-
def _put_datapoints_to_queue(self, df: Union[DataFlow,
|
|
187
|
+
def _put_datapoints_to_queue(self, df: Union[DataFlow, list[Image]]) -> None:
|
|
187
188
|
if isinstance(df, DataFlow):
|
|
188
189
|
df.reset_state()
|
|
189
190
|
for idx, dp in enumerate(df):
|
|
@@ -192,7 +193,7 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
192
193
|
break
|
|
193
194
|
self.input_queue.put(dp)
|
|
194
195
|
|
|
195
|
-
def pass_datapoints(self, dpts:
|
|
196
|
+
def pass_datapoints(self, dpts: list[Image]) -> list[Image]:
|
|
196
197
|
"""
|
|
197
198
|
Putting the list of datapoints into a thread-save queue and start for each pipeline
|
|
198
199
|
component a separate thread. It will return a list of datapoints where the order of appearance
|
|
@@ -221,8 +222,12 @@ class MultiThreadPipelineComponent(PipelineComponent):
|
|
|
221
222
|
def serve(self, dp: Image) -> None:
|
|
222
223
|
raise NotImplementedError("MultiThreadPipelineComponent does not follow the PipelineComponent implementation")
|
|
223
224
|
|
|
224
|
-
def clone(self) ->
|
|
225
|
+
def clone(self) -> MultiThreadPipelineComponent:
|
|
225
226
|
raise NotImplementedError("MultiThreadPipelineComponent does not allow cloning")
|
|
226
227
|
|
|
227
|
-
def get_meta_annotation(self) ->
|
|
228
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
228
229
|
return self.pipe_components[0].get_meta_annotation()
|
|
230
|
+
|
|
231
|
+
def clear_predictor(self) -> None:
|
|
232
|
+
for pipe in self.pipe_components:
|
|
233
|
+
pipe.clear_predictor()
|
|
@@ -26,18 +26,18 @@ from typing import List, Mapping, Optional, Sequence, Tuple, Union
|
|
|
26
26
|
from ..dataflow import DataFlow, MapData
|
|
27
27
|
from ..dataflow.custom_serialize import SerializerFiles, SerializerPdfDoc
|
|
28
28
|
from ..datapoint.image import Image
|
|
29
|
+
from ..datapoint.view import IMAGE_DEFAULTS
|
|
29
30
|
from ..mapper.maputils import curry
|
|
30
31
|
from ..mapper.misc import to_image
|
|
31
|
-
from ..utils.detection_types import Pathlike
|
|
32
32
|
from ..utils.fs import maybe_path_or_pdf
|
|
33
33
|
from ..utils.logger import LoggingRecord, logger
|
|
34
|
-
from ..utils.
|
|
35
|
-
from .base import Pipeline, PipelineComponent
|
|
34
|
+
from ..utils.types import PathLikeOrStr
|
|
35
|
+
from .base import Pipeline, PipelineComponent
|
|
36
36
|
from .common import PageParsingService
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def _collect_from_kwargs(
|
|
40
|
-
**kwargs: Union[str, DataFlow, bool, int,
|
|
40
|
+
**kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
|
|
41
41
|
) -> Tuple[Optional[str], Optional[str], bool, int, str, DataFlow]:
|
|
42
42
|
dataset_dataflow = kwargs.get("dataset_dataflow")
|
|
43
43
|
path = kwargs.get("path")
|
|
@@ -69,7 +69,7 @@ def _collect_from_kwargs(
|
|
|
69
69
|
|
|
70
70
|
@curry
|
|
71
71
|
def _proto_process(
|
|
72
|
-
dp: Union[str, Mapping[str, str]], path: Optional[
|
|
72
|
+
dp: Union[str, Mapping[str, str]], path: Optional[PathLikeOrStr], doc_path: Optional[PathLikeOrStr]
|
|
73
73
|
) -> Union[str, Mapping[str, str]]:
|
|
74
74
|
if isinstance(dp, str):
|
|
75
75
|
file_name = Path(dp).name
|
|
@@ -78,10 +78,14 @@ def _proto_process(
|
|
|
78
78
|
else:
|
|
79
79
|
file_name = dp["file_name"]
|
|
80
80
|
if path is None:
|
|
81
|
-
path_tmp = doc_path
|
|
81
|
+
path_tmp = doc_path or ""
|
|
82
82
|
else:
|
|
83
83
|
path_tmp = path
|
|
84
|
-
logger.info(
|
|
84
|
+
logger.info(
|
|
85
|
+
LoggingRecord(
|
|
86
|
+
f"Processing {file_name}", {"path": os.fspath(path_tmp), "df": os.fspath(path_tmp), "file_name": file_name}
|
|
87
|
+
)
|
|
88
|
+
)
|
|
85
89
|
return dp
|
|
86
90
|
|
|
87
91
|
|
|
@@ -90,7 +94,7 @@ def _to_image(dp: Union[str, Mapping[str, Union[str, bytes]]], dpi: Optional[int
|
|
|
90
94
|
return to_image(dp, dpi)
|
|
91
95
|
|
|
92
96
|
|
|
93
|
-
def _doc_to_dataflow(path:
|
|
97
|
+
def _doc_to_dataflow(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
|
|
94
98
|
if not os.path.isfile(path):
|
|
95
99
|
raise FileExistsError(f"{path} not a file")
|
|
96
100
|
|
|
@@ -127,19 +131,18 @@ class DoctectionPipe(Pipeline):
|
|
|
127
131
|
|
|
128
132
|
def __init__(
|
|
129
133
|
self,
|
|
130
|
-
pipeline_component_list: List[
|
|
134
|
+
pipeline_component_list: List[PipelineComponent],
|
|
131
135
|
page_parsing_service: Optional[PageParsingService] = None,
|
|
132
136
|
):
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
assert all(
|
|
138
|
-
isinstance(element, (PipelineComponent, PredictorPipelineComponent)) for element in pipeline_component_list
|
|
137
|
+
self.page_parser = (
|
|
138
|
+
PageParsingService(text_container=IMAGE_DEFAULTS["text_container"])
|
|
139
|
+
if page_parsing_service is None
|
|
140
|
+
else page_parsing_service
|
|
139
141
|
)
|
|
142
|
+
|
|
140
143
|
super().__init__(pipeline_component_list)
|
|
141
144
|
|
|
142
|
-
def _entry(self, **kwargs: Union[str, DataFlow, bool, int,
|
|
145
|
+
def _entry(self, **kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) -> DataFlow:
|
|
143
146
|
path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow = _collect_from_kwargs(**kwargs)
|
|
144
147
|
|
|
145
148
|
df: DataFlow
|
|
@@ -147,7 +150,7 @@ class DoctectionPipe(Pipeline):
|
|
|
147
150
|
if isinstance(path, (str, Path)):
|
|
148
151
|
if not isinstance(file_type, (str, list)):
|
|
149
152
|
raise TypeError(f"file_type must be of type string or list, but is of type {type(file_type)}")
|
|
150
|
-
df = DoctectionPipe.path_to_dataflow(path, file_type, shuffle=shuffle)
|
|
153
|
+
df = DoctectionPipe.path_to_dataflow(path=path, file_type=file_type, shuffle=shuffle)
|
|
151
154
|
elif isinstance(doc_path, (str, Path)):
|
|
152
155
|
df = DoctectionPipe.doc_to_dataflow(
|
|
153
156
|
path=doc_path, max_datapoints=int(max_datapoints) if max_datapoints is not None else None
|
|
@@ -164,7 +167,7 @@ class DoctectionPipe(Pipeline):
|
|
|
164
167
|
|
|
165
168
|
@staticmethod
|
|
166
169
|
def path_to_dataflow(
|
|
167
|
-
path:
|
|
170
|
+
path: PathLikeOrStr,
|
|
168
171
|
file_type: Union[str, Sequence[str]],
|
|
169
172
|
max_datapoints: Optional[int] = None,
|
|
170
173
|
shuffle: bool = False,
|
|
@@ -179,12 +182,12 @@ class DoctectionPipe(Pipeline):
|
|
|
179
182
|
:return: dataflow
|
|
180
183
|
"""
|
|
181
184
|
if not os.path.isdir(path):
|
|
182
|
-
raise NotADirectoryError(f"{path} not a directory")
|
|
185
|
+
raise NotADirectoryError(f"{os.fspath(path)} not a directory")
|
|
183
186
|
df = SerializerFiles.load(path, file_type, max_datapoints, shuffle)
|
|
184
187
|
return df
|
|
185
188
|
|
|
186
189
|
@staticmethod
|
|
187
|
-
def doc_to_dataflow(path:
|
|
190
|
+
def doc_to_dataflow(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
|
|
188
191
|
"""
|
|
189
192
|
Processing method for documents
|
|
190
193
|
|
|
@@ -203,7 +206,7 @@ class DoctectionPipe(Pipeline):
|
|
|
203
206
|
"""
|
|
204
207
|
return self.page_parser.predict_dataflow(df)
|
|
205
208
|
|
|
206
|
-
def analyze(self, **kwargs: Union[str, DataFlow, bool, int,
|
|
209
|
+
def analyze(self, **kwargs: Union[str, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) -> DataFlow:
|
|
207
210
|
"""
|
|
208
211
|
`kwargs key dataset_dataflow:` Transfer a dataflow of a dataset via its dataflow builder
|
|
209
212
|
|
deepdoctection/pipe/language.py
CHANGED
|
@@ -18,16 +18,14 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for language detection pipeline component
|
|
20
20
|
"""
|
|
21
|
-
from copy import copy, deepcopy
|
|
22
21
|
from typing import Optional, Sequence
|
|
23
22
|
|
|
24
23
|
from ..datapoint.image import Image
|
|
25
24
|
from ..datapoint.view import Page
|
|
26
25
|
from ..extern.base import LanguageDetector, ObjectDetector
|
|
27
|
-
from ..utils.detection_types import JsonDict
|
|
28
26
|
from ..utils.error import ImageError
|
|
29
27
|
from ..utils.settings import PageType, TypeOrStr, get_type
|
|
30
|
-
from .base import PipelineComponent
|
|
28
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
31
29
|
from .registry import pipeline_component_registry
|
|
32
30
|
|
|
33
31
|
|
|
@@ -74,26 +72,27 @@ class LanguageDetectionService(PipelineComponent):
|
|
|
74
72
|
self.predictor = language_detector
|
|
75
73
|
self.text_detector = text_detector
|
|
76
74
|
self.text_container = get_type(text_container) if text_container is not None else text_container
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
75
|
+
self.floating_text_block_categories = (
|
|
76
|
+
tuple(get_type(text_block) for text_block in floating_text_block_categories)
|
|
77
|
+
if (floating_text_block_categories is not None)
|
|
78
|
+
else ()
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
super().__init__(self._get_name(self.predictor.name))
|
|
83
82
|
|
|
84
83
|
def serve(self, dp: Image) -> None:
|
|
85
84
|
if self.text_detector is None:
|
|
86
|
-
page = Page.from_image(dp, self.text_container, self.floating_text_block_categories)
|
|
85
|
+
page = Page.from_image(dp, self.text_container, self.floating_text_block_categories)
|
|
87
86
|
text = page.text_no_line_break
|
|
88
87
|
else:
|
|
89
88
|
if dp.image is None:
|
|
90
89
|
raise ImageError("image cannot be None")
|
|
91
90
|
detect_result_list = self.text_detector.predict(dp.image)
|
|
92
91
|
# this is a concatenation of all detection result. No reading order
|
|
93
|
-
text = " ".join(
|
|
92
|
+
text = " ".join((result.text for result in detect_result_list if result.text is not None))
|
|
94
93
|
predict_result = self.predictor.predict(text)
|
|
95
94
|
self.dp_manager.set_summary_annotation(
|
|
96
|
-
PageType.
|
|
95
|
+
PageType.LANGUAGE, PageType.LANGUAGE, 1, predict_result.text, predict_result.score
|
|
97
96
|
)
|
|
98
97
|
|
|
99
98
|
def clone(self) -> PipelineComponent:
|
|
@@ -101,22 +100,18 @@ class LanguageDetectionService(PipelineComponent):
|
|
|
101
100
|
if not isinstance(predictor, LanguageDetector):
|
|
102
101
|
raise TypeError(f"Predictor must be of type LanguageDetector, but is of type {type(predictor)}")
|
|
103
102
|
return self.__class__(
|
|
104
|
-
predictor,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
103
|
+
language_detector=predictor,
|
|
104
|
+
text_container=self.text_container,
|
|
105
|
+
text_detector=self.text_detector.clone() if self.text_detector is not None else None,
|
|
106
|
+
floating_text_block_categories=self.floating_text_block_categories,
|
|
108
107
|
)
|
|
109
108
|
|
|
110
|
-
def get_meta_annotation(self) ->
|
|
111
|
-
return
|
|
112
|
-
[
|
|
113
|
-
("image_annotations", []),
|
|
114
|
-
("sub_categories", {}),
|
|
115
|
-
("relationships", {}),
|
|
116
|
-
("summaries", [PageType.language]),
|
|
117
|
-
]
|
|
118
|
-
)
|
|
109
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
110
|
+
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=(PageType.LANGUAGE,))
|
|
119
111
|
|
|
120
112
|
@staticmethod
|
|
121
113
|
def _get_name(predictor_name: str) -> str:
|
|
122
114
|
return f"language_detection_{predictor_name}"
|
|
115
|
+
|
|
116
|
+
def clear_predictor(self) -> None:
|
|
117
|
+
self.predictor.clear_model()
|
deepdoctection/pipe/layout.py
CHANGED
|
@@ -18,21 +18,22 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for layout pipeline component
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
21
23
|
from typing import Optional
|
|
22
24
|
|
|
23
25
|
import numpy as np
|
|
24
26
|
|
|
25
27
|
from ..datapoint.image import Image
|
|
26
28
|
from ..extern.base import ObjectDetector, PdfMiner
|
|
27
|
-
from ..utils.detection_types import JsonDict
|
|
28
29
|
from ..utils.error import ImageError
|
|
29
30
|
from ..utils.transform import PadTransform
|
|
30
|
-
from .base import
|
|
31
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
31
32
|
from .registry import pipeline_component_registry
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
@pipeline_component_registry.register("ImageLayoutService")
|
|
35
|
-
class ImageLayoutService(
|
|
36
|
+
class ImageLayoutService(PipelineComponent):
|
|
36
37
|
"""
|
|
37
38
|
Pipeline component for determining the layout. Which layout blocks are determined depends on the Detector and thus
|
|
38
39
|
usually on the data set on which the Detector was pre-trained. If the Detector has been trained on Publaynet, these
|
|
@@ -63,6 +64,7 @@ class ImageLayoutService(PredictorPipelineComponent):
|
|
|
63
64
|
:param crop_image: Do not only populate `ImageAnnotation.image` but also crop the detected block according
|
|
64
65
|
to its bounding box and populate the resulting sub image to
|
|
65
66
|
`ImageAnnotation.image.image`.
|
|
67
|
+
:param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
|
|
66
68
|
:param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
|
|
67
69
|
available that will be predicted by the `layout_detector`. If yes, will skip
|
|
68
70
|
the prediction process.
|
|
@@ -71,11 +73,12 @@ class ImageLayoutService(PredictorPipelineComponent):
|
|
|
71
73
|
self.crop_image = crop_image
|
|
72
74
|
self.padder = padder
|
|
73
75
|
self.skip_if_layout_extracted = skip_if_layout_extracted
|
|
74
|
-
|
|
76
|
+
self.predictor = layout_detector
|
|
77
|
+
super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
|
|
75
78
|
|
|
76
79
|
def serve(self, dp: Image) -> None:
|
|
77
80
|
if self.skip_if_layout_extracted:
|
|
78
|
-
categories = self.predictor.
|
|
81
|
+
categories = self.predictor.get_category_names()
|
|
79
82
|
anns = dp.get_annotation(category_names=categories)
|
|
80
83
|
if anns:
|
|
81
84
|
return
|
|
@@ -84,7 +87,7 @@ class ImageLayoutService(PredictorPipelineComponent):
|
|
|
84
87
|
np_image = dp.image
|
|
85
88
|
if self.padder:
|
|
86
89
|
np_image = self.padder.apply_image(np_image)
|
|
87
|
-
detect_result_list = self.predictor.predict(np_image)
|
|
90
|
+
detect_result_list = self.predictor.predict(np_image)
|
|
88
91
|
if self.padder and detect_result_list:
|
|
89
92
|
boxes = np.array([detect_result.box for detect_result in detect_result_list])
|
|
90
93
|
boxes_orig = self.padder.inverse_apply_coords(boxes)
|
|
@@ -94,22 +97,20 @@ class ImageLayoutService(PredictorPipelineComponent):
|
|
|
94
97
|
for detect_result in detect_result_list:
|
|
95
98
|
self.dp_manager.set_image_annotation(detect_result, to_image=self.to_image, crop_image=self.crop_image)
|
|
96
99
|
|
|
97
|
-
def get_meta_annotation(self) ->
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
("summaries", []),
|
|
105
|
-
]
|
|
100
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
101
|
+
if not isinstance(self.predictor, (ObjectDetector, PdfMiner)):
|
|
102
|
+
raise TypeError(
|
|
103
|
+
f"self.predictor must be of type ObjectDetector or PdfMiner but is of type " f"{type(self.predictor)}"
|
|
104
|
+
)
|
|
105
|
+
return MetaAnnotation(
|
|
106
|
+
image_annotations=self.predictor.get_category_names(), sub_categories={}, relationships={}, summaries=()
|
|
106
107
|
)
|
|
107
108
|
|
|
108
109
|
@staticmethod
|
|
109
110
|
def _get_name(predictor_name: str) -> str:
|
|
110
111
|
return f"image_{predictor_name}"
|
|
111
112
|
|
|
112
|
-
def clone(self) ->
|
|
113
|
+
def clone(self) -> ImageLayoutService:
|
|
113
114
|
predictor = self.predictor.clone()
|
|
114
115
|
padder_clone = None
|
|
115
116
|
if self.padder:
|
|
@@ -117,3 +118,6 @@ class ImageLayoutService(PredictorPipelineComponent):
|
|
|
117
118
|
if not isinstance(predictor, ObjectDetector):
|
|
118
119
|
raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
|
|
119
120
|
return self.__class__(predictor, self.to_image, self.crop_image, padder_clone, self.skip_if_layout_extracted)
|
|
121
|
+
|
|
122
|
+
def clear_predictor(self) -> None:
|
|
123
|
+
self.predictor.clear_model()
|