deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
# File:
|
|
2
|
+
# File: sub_layout.py
|
|
3
3
|
|
|
4
4
|
# Copyright 2021 Dr. Janis Meyer. All rights reserved.
|
|
5
5
|
#
|
|
@@ -18,9 +18,11 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for cell detection pipeline component
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
21
23
|
from collections import Counter
|
|
22
|
-
from
|
|
23
|
-
from typing import
|
|
24
|
+
from types import MappingProxyType
|
|
25
|
+
from typing import Mapping, Optional, Sequence, Union
|
|
24
26
|
|
|
25
27
|
import numpy as np
|
|
26
28
|
|
|
@@ -28,10 +30,10 @@ from ..datapoint.annotation import ImageAnnotation
|
|
|
28
30
|
from ..datapoint.box import crop_box_from_image
|
|
29
31
|
from ..datapoint.image import Image
|
|
30
32
|
from ..extern.base import DetectionResult, ObjectDetector, PdfMiner
|
|
31
|
-
from ..utils.
|
|
32
|
-
from ..utils.settings import ObjectTypes, Relationships
|
|
33
|
+
from ..utils.settings import ObjectTypes, Relationships, TypeOrStr, get_type
|
|
33
34
|
from ..utils.transform import PadTransform
|
|
34
|
-
from .
|
|
35
|
+
from ..utils.types import PixelValues
|
|
36
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
35
37
|
from .registry import pipeline_component_registry
|
|
36
38
|
|
|
37
39
|
|
|
@@ -47,9 +49,9 @@ class DetectResultGenerator:
|
|
|
47
49
|
|
|
48
50
|
def __init__(
|
|
49
51
|
self,
|
|
50
|
-
categories: Mapping[
|
|
51
|
-
group_categories: Optional[
|
|
52
|
-
exclude_category_ids: Optional[Sequence[
|
|
52
|
+
categories: Mapping[int, ObjectTypes],
|
|
53
|
+
group_categories: Optional[list[list[int]]] = None,
|
|
54
|
+
exclude_category_ids: Optional[Sequence[int]] = None,
|
|
53
55
|
absolute_coords: bool = True,
|
|
54
56
|
) -> None:
|
|
55
57
|
"""
|
|
@@ -59,7 +61,7 @@ class DetectResultGenerator:
|
|
|
59
61
|
grouping category ids.
|
|
60
62
|
:param absolute_coords: 'absolute_coords' value to be set in 'DetectionResult'
|
|
61
63
|
"""
|
|
62
|
-
self.categories = categories
|
|
64
|
+
self.categories = MappingProxyType(dict(categories.items()))
|
|
63
65
|
self.width: Optional[int] = None
|
|
64
66
|
self.height: Optional[int] = None
|
|
65
67
|
if group_categories is None:
|
|
@@ -71,7 +73,7 @@ class DetectResultGenerator:
|
|
|
71
73
|
self.dummy_for_group_generated = [False for _ in self.group_categories]
|
|
72
74
|
self.absolute_coords = absolute_coords
|
|
73
75
|
|
|
74
|
-
def create_detection_result(self, detect_result_list:
|
|
76
|
+
def create_detection_result(self, detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
|
|
75
77
|
"""
|
|
76
78
|
Adds DetectResults for which no object was detected to the list.
|
|
77
79
|
|
|
@@ -100,8 +102,8 @@ class DetectResultGenerator:
|
|
|
100
102
|
self.dummy_for_group_generated = self._initialize_dummy_for_group_generated()
|
|
101
103
|
return detect_result_list
|
|
102
104
|
|
|
103
|
-
def _create_condition(self, detect_result_list:
|
|
104
|
-
count = Counter([
|
|
105
|
+
def _create_condition(self, detect_result_list: list[DetectionResult]) -> dict[int, int]:
|
|
106
|
+
count = Counter([ann.class_id for ann in detect_result_list])
|
|
105
107
|
cat_to_group_sum = {}
|
|
106
108
|
for group in self.group_categories:
|
|
107
109
|
group_sum = 0
|
|
@@ -111,7 +113,7 @@ class DetectResultGenerator:
|
|
|
111
113
|
cat_to_group_sum[el] = group_sum
|
|
112
114
|
return cat_to_group_sum
|
|
113
115
|
|
|
114
|
-
def _dummy_for_group_generated(self, category_id:
|
|
116
|
+
def _dummy_for_group_generated(self, category_id: int) -> bool:
|
|
115
117
|
for idx, group in enumerate(self.group_categories):
|
|
116
118
|
if category_id in group:
|
|
117
119
|
is_generated = self.dummy_for_group_generated[idx]
|
|
@@ -119,12 +121,12 @@ class DetectResultGenerator:
|
|
|
119
121
|
return is_generated
|
|
120
122
|
return False
|
|
121
123
|
|
|
122
|
-
def _initialize_dummy_for_group_generated(self) ->
|
|
124
|
+
def _initialize_dummy_for_group_generated(self) -> list[bool]:
|
|
123
125
|
return [False for _ in self.group_categories]
|
|
124
126
|
|
|
125
127
|
|
|
126
128
|
@pipeline_component_registry.register("SubImageLayoutService")
|
|
127
|
-
class SubImageLayoutService(
|
|
129
|
+
class SubImageLayoutService(PipelineComponent):
|
|
128
130
|
"""
|
|
129
131
|
Component in which the selected ImageAnnotation can be selected with cropped images and presented to a detector.
|
|
130
132
|
|
|
@@ -144,8 +146,8 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
144
146
|
def __init__(
|
|
145
147
|
self,
|
|
146
148
|
sub_image_detector: ObjectDetector,
|
|
147
|
-
sub_image_names: Union[str,
|
|
148
|
-
category_id_mapping: Optional[
|
|
149
|
+
sub_image_names: Union[str, Sequence[TypeOrStr]],
|
|
150
|
+
category_id_mapping: Optional[dict[int, int]] = None,
|
|
149
151
|
detect_result_generator: Optional[DetectResultGenerator] = None,
|
|
150
152
|
padder: Optional[PadTransform] = None,
|
|
151
153
|
):
|
|
@@ -163,16 +165,23 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
163
165
|
inverse coordinate transformation.
|
|
164
166
|
"""
|
|
165
167
|
|
|
166
|
-
|
|
167
|
-
sub_image_names
|
|
168
|
-
|
|
169
|
-
|
|
168
|
+
self.sub_image_name = (
|
|
169
|
+
(get_type(sub_image_names),)
|
|
170
|
+
if isinstance(sub_image_names, str)
|
|
171
|
+
else tuple((get_type(cat) for cat in sub_image_names))
|
|
172
|
+
)
|
|
170
173
|
self.category_id_mapping = category_id_mapping
|
|
171
174
|
self.detect_result_generator = detect_result_generator
|
|
172
175
|
self.padder = padder
|
|
173
|
-
|
|
176
|
+
self.predictor = sub_image_detector
|
|
177
|
+
super().__init__(self._get_name(sub_image_detector.name), self.predictor.model_id)
|
|
174
178
|
if self.detect_result_generator is not None:
|
|
175
|
-
|
|
179
|
+
if self.detect_result_generator.categories != self.predictor.categories.get_categories():
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"The categories of the 'detect_result_generator' must be the same as the categories of the "
|
|
182
|
+
f"'sub_image_detector'. Got {self.detect_result_generator.categories} #"
|
|
183
|
+
f"and {self.predictor.categories.get_categories()}."
|
|
184
|
+
)
|
|
176
185
|
|
|
177
186
|
def serve(self, dp: Image) -> None:
|
|
178
187
|
"""
|
|
@@ -184,7 +193,7 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
184
193
|
sub_image_anns = dp.get_annotation_iter(category_names=self.sub_image_name)
|
|
185
194
|
for sub_image_ann in sub_image_anns:
|
|
186
195
|
np_image = self.prepare_np_image(sub_image_ann)
|
|
187
|
-
detect_result_list = self.predictor.predict(np_image)
|
|
196
|
+
detect_result_list = self.predictor.predict(np_image)
|
|
188
197
|
if self.padder and detect_result_list:
|
|
189
198
|
boxes = np.array([detect_result.box for detect_result in detect_result_list])
|
|
190
199
|
boxes_orig = self.padder.inverse_apply_coords(boxes)
|
|
@@ -203,23 +212,21 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
203
212
|
)
|
|
204
213
|
self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
|
|
205
214
|
|
|
206
|
-
def get_meta_annotation(self) ->
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
("summaries", []),
|
|
215
|
-
]
|
|
215
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
216
|
+
if not isinstance(self.predictor, (ObjectDetector, PdfMiner)):
|
|
217
|
+
raise ValueError(f"predictor must be of type ObjectDetector but is of type {type(self.predictor)}")
|
|
218
|
+
return MetaAnnotation(
|
|
219
|
+
image_annotations=self.predictor.get_category_names(),
|
|
220
|
+
sub_categories={},
|
|
221
|
+
relationships={get_type(parent): {Relationships.CHILD} for parent in self.sub_image_name},
|
|
222
|
+
summaries=(),
|
|
216
223
|
)
|
|
217
224
|
|
|
218
225
|
@staticmethod
|
|
219
226
|
def _get_name(predictor_name: str) -> str:
|
|
220
227
|
return f"sub_image_{predictor_name}"
|
|
221
228
|
|
|
222
|
-
def clone(self) ->
|
|
229
|
+
def clone(self) -> SubImageLayoutService:
|
|
223
230
|
predictor = self.predictor.clone()
|
|
224
231
|
padder_clone = None
|
|
225
232
|
if self.padder:
|
|
@@ -228,13 +235,13 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
228
235
|
raise ValueError(f"predictor must be of type ObjectDetector but is of type {type(predictor)}")
|
|
229
236
|
return self.__class__(
|
|
230
237
|
predictor,
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
238
|
+
self.sub_image_name,
|
|
239
|
+
self.category_id_mapping,
|
|
240
|
+
self.detect_result_generator,
|
|
234
241
|
padder_clone,
|
|
235
242
|
)
|
|
236
243
|
|
|
237
|
-
def prepare_np_image(self, sub_image_ann: ImageAnnotation) ->
|
|
244
|
+
def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> PixelValues:
|
|
238
245
|
"""Maybe crop and pad a np_array before passing it to the predictor.
|
|
239
246
|
|
|
240
247
|
Note that we currently assume to a two level hierachy of images, e.g. we can crop a sub-image from the base
|
|
@@ -256,3 +263,6 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
256
263
|
if self.padder:
|
|
257
264
|
np_image = self.padder.apply_image(np_image)
|
|
258
265
|
return np_image
|
|
266
|
+
|
|
267
|
+
def clear_predictor(self) -> None:
|
|
268
|
+
self.predictor.clear_model()
|
deepdoctection/pipe/text.py
CHANGED
|
@@ -18,24 +18,27 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for text extraction pipeline component
|
|
20
20
|
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
21
24
|
from copy import deepcopy
|
|
22
|
-
from typing import
|
|
25
|
+
from typing import Optional, Sequence, Union
|
|
23
26
|
|
|
24
27
|
from ..datapoint.annotation import ImageAnnotation
|
|
25
28
|
from ..datapoint.image import Image
|
|
26
29
|
from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
|
|
27
30
|
from ..extern.tessocr import TesseractOcrDetector
|
|
28
|
-
from ..utils.detection_types import ImageType, JsonDict
|
|
29
31
|
from ..utils.error import ImageError
|
|
30
|
-
from ..utils.settings import PageType, TypeOrStr, WordType, get_type
|
|
31
|
-
from .
|
|
32
|
+
from ..utils.settings import ObjectTypes, PageType, TypeOrStr, WordType, get_type
|
|
33
|
+
from ..utils.types import PixelValues
|
|
34
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
32
35
|
from .registry import pipeline_component_registry
|
|
33
36
|
|
|
34
37
|
__all__ = ["TextExtractionService"]
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
@pipeline_component_registry.register("TextExtractionService")
|
|
38
|
-
class TextExtractionService(
|
|
41
|
+
class TextExtractionService(PipelineComponent):
|
|
39
42
|
"""
|
|
40
43
|
Pipeline component for extracting text. Any detector can be selected, provided that it can evaluate a
|
|
41
44
|
numpy array as an image.
|
|
@@ -83,11 +86,13 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
83
86
|
if extract_from_roi is None:
|
|
84
87
|
extract_from_roi = []
|
|
85
88
|
self.extract_from_category = (
|
|
86
|
-
|
|
89
|
+
(get_type(extract_from_roi),)
|
|
87
90
|
if isinstance(extract_from_roi, str)
|
|
88
|
-
else
|
|
91
|
+
else tuple((get_type(roi_category) for roi_category in extract_from_roi))
|
|
89
92
|
)
|
|
90
|
-
|
|
93
|
+
|
|
94
|
+
self.predictor = text_extract_detector
|
|
95
|
+
super().__init__(self._get_name(text_extract_detector.name), self.predictor.model_id)
|
|
91
96
|
if self.extract_from_category:
|
|
92
97
|
if not isinstance(self.predictor, (ObjectDetector, TextRecognizer)):
|
|
93
98
|
raise TypeError(
|
|
@@ -95,9 +100,8 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
95
100
|
f"TextRecognizer. Got {type(self.predictor)}"
|
|
96
101
|
)
|
|
97
102
|
if run_time_ocr_language_selection:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
), "Only TesseractOcrDetector supports multiple languages"
|
|
103
|
+
if not isinstance(self.predictor, TesseractOcrDetector):
|
|
104
|
+
raise TypeError("Only TesseractOcrDetector supports multiple languages")
|
|
101
105
|
|
|
102
106
|
self.run_time_ocr_language_selection = run_time_ocr_language_selection
|
|
103
107
|
self.skip_if_text_extracted = skip_if_text_extracted
|
|
@@ -120,7 +124,7 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
120
124
|
else:
|
|
121
125
|
width, height = None, None
|
|
122
126
|
if self.run_time_ocr_language_selection:
|
|
123
|
-
self.predictor.set_language(dp.summary.get_sub_category(PageType.
|
|
127
|
+
self.predictor.set_language(dp.summary.get_sub_category(PageType.LANGUAGE).value) # type: ignore
|
|
124
128
|
detect_result_list = self.predictor.predict(predictor_input) # type: ignore
|
|
125
129
|
if isinstance(self.predictor, PdfMiner):
|
|
126
130
|
width, height = self.predictor.get_width_height(predictor_input) # type: ignore
|
|
@@ -134,15 +138,15 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
134
138
|
)
|
|
135
139
|
if detect_ann_id is not None:
|
|
136
140
|
self.dp_manager.set_container_annotation(
|
|
137
|
-
WordType.
|
|
141
|
+
WordType.CHARACTERS,
|
|
138
142
|
None,
|
|
139
|
-
WordType.
|
|
143
|
+
WordType.CHARACTERS,
|
|
140
144
|
detect_ann_id,
|
|
141
145
|
detect_result.text if detect_result.text is not None else "",
|
|
142
146
|
detect_result.score,
|
|
143
147
|
)
|
|
144
148
|
|
|
145
|
-
def get_text_rois(self, dp: Image) -> Sequence[Union[Image, ImageAnnotation,
|
|
149
|
+
def get_text_rois(self, dp: Image) -> Sequence[Union[Image, ImageAnnotation, list[ImageAnnotation]]]:
|
|
146
150
|
"""
|
|
147
151
|
Return image rois based on selected categories. As this selection makes only sense for specific text extractors
|
|
148
152
|
(e.g. those who do proper OCR and do not mine from text from native pdfs) it will do some sanity checks.
|
|
@@ -151,7 +155,7 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
151
155
|
:return: list of ImageAnnotation or Image
|
|
152
156
|
"""
|
|
153
157
|
if self.skip_if_text_extracted:
|
|
154
|
-
text_categories = self.predictor.
|
|
158
|
+
text_categories = self.predictor.get_category_names()
|
|
155
159
|
text_anns = dp.get_annotation(category_names=text_categories)
|
|
156
160
|
if text_anns:
|
|
157
161
|
return []
|
|
@@ -163,8 +167,8 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
163
167
|
return [dp]
|
|
164
168
|
|
|
165
169
|
def get_predictor_input(
|
|
166
|
-
self, text_roi: Union[Image, ImageAnnotation,
|
|
167
|
-
) -> Optional[Union[bytes,
|
|
170
|
+
self, text_roi: Union[Image, ImageAnnotation, list[ImageAnnotation]]
|
|
171
|
+
) -> Optional[Union[bytes, PixelValues, list[tuple[str, PixelValues]], int]]:
|
|
168
172
|
"""
|
|
169
173
|
Return raw input for a given `text_roi`. This can be a numpy array or pdf bytes and depends on the chosen
|
|
170
174
|
predictor.
|
|
@@ -191,38 +195,35 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
191
195
|
return text_roi.pdf_bytes
|
|
192
196
|
return 1
|
|
193
197
|
|
|
194
|
-
def get_meta_annotation(self) ->
|
|
198
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
199
|
+
sub_cat_dict: dict[ObjectTypes, set[ObjectTypes]]
|
|
195
200
|
if self.extract_from_category:
|
|
196
|
-
sub_cat_dict = {category: {WordType.
|
|
201
|
+
sub_cat_dict = {category: {WordType.CHARACTERS} for category in self.extract_from_category}
|
|
197
202
|
else:
|
|
198
203
|
if not isinstance(self.predictor, (ObjectDetector, PdfMiner)):
|
|
199
204
|
raise TypeError(
|
|
200
205
|
f"self.predictor must be of type ObjectDetector or PdfMiner but is of type "
|
|
201
206
|
f"{type(self.predictor)}"
|
|
202
207
|
)
|
|
203
|
-
sub_cat_dict = {category: {WordType.
|
|
204
|
-
return
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
else []
|
|
212
|
-
),
|
|
213
|
-
),
|
|
214
|
-
("sub_categories", sub_cat_dict),
|
|
215
|
-
("relationships", {}),
|
|
216
|
-
("summaries", []),
|
|
217
|
-
]
|
|
208
|
+
sub_cat_dict = {category: {WordType.CHARACTERS} for category in self.predictor.get_category_names()}
|
|
209
|
+
return MetaAnnotation(
|
|
210
|
+
image_annotations=self.predictor.get_category_names()
|
|
211
|
+
if isinstance(self.predictor, (ObjectDetector, PdfMiner))
|
|
212
|
+
else (),
|
|
213
|
+
sub_categories=sub_cat_dict,
|
|
214
|
+
relationships={},
|
|
215
|
+
summaries=(),
|
|
218
216
|
)
|
|
219
217
|
|
|
220
218
|
@staticmethod
|
|
221
219
|
def _get_name(text_detector_name: str) -> str:
|
|
222
220
|
return f"text_extract_{text_detector_name}"
|
|
223
221
|
|
|
224
|
-
def clone(self) ->
|
|
222
|
+
def clone(self) -> TextExtractionService:
|
|
225
223
|
predictor = self.predictor.clone()
|
|
226
224
|
if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
|
|
227
225
|
raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
|
|
228
226
|
return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
|
|
227
|
+
|
|
228
|
+
def clear_predictor(self) -> None:
|
|
229
|
+
self.predictor.clear_model()
|
deepdoctection/pipe/transform.py
CHANGED
|
@@ -20,15 +20,16 @@ Module for transform style pipeline components. These pipeline components are us
|
|
|
20
20
|
on images (e.g. deskew, de-noising or more general GAN like operations.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
23
25
|
from ..datapoint.image import Image
|
|
24
26
|
from ..extern.base import ImageTransformer
|
|
25
|
-
from
|
|
26
|
-
from .base import ImageTransformPipelineComponent
|
|
27
|
+
from .base import MetaAnnotation, PipelineComponent
|
|
27
28
|
from .registry import pipeline_component_registry
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
@pipeline_component_registry.register("SimpleTransformService")
|
|
31
|
-
class SimpleTransformService(
|
|
32
|
+
class SimpleTransformService(PipelineComponent):
|
|
32
33
|
"""
|
|
33
34
|
Pipeline component for transforming an image. The service is designed for applying transform predictors that
|
|
34
35
|
take an image as numpy array as input and return the same. The service itself will change the underlying metadata
|
|
@@ -44,7 +45,8 @@ class SimpleTransformService(ImageTransformPipelineComponent):
|
|
|
44
45
|
|
|
45
46
|
:param transform_predictor: image transformer
|
|
46
47
|
"""
|
|
47
|
-
|
|
48
|
+
self.transform_predictor = transform_predictor
|
|
49
|
+
super().__init__(self._get_name(transform_predictor.name), self.transform_predictor.model_id)
|
|
48
50
|
|
|
49
51
|
def serve(self, dp: Image) -> None:
|
|
50
52
|
if dp.annotations:
|
|
@@ -60,26 +62,27 @@ class SimpleTransformService(ImageTransformPipelineComponent):
|
|
|
60
62
|
self.dp_manager.datapoint.clear_image(True)
|
|
61
63
|
self.dp_manager.datapoint.image = transformed_image
|
|
62
64
|
self.dp_manager.set_summary_annotation(
|
|
63
|
-
summary_key=self.transform_predictor.
|
|
64
|
-
summary_name=self.transform_predictor.
|
|
65
|
+
summary_key=self.transform_predictor.get_category_names()[0],
|
|
66
|
+
summary_name=self.transform_predictor.get_category_names()[0],
|
|
65
67
|
summary_number=None,
|
|
66
|
-
summary_value=getattr(detection_result, self.transform_predictor.
|
|
68
|
+
summary_value=getattr(detection_result, self.transform_predictor.get_category_names()[0].value, None),
|
|
67
69
|
summary_score=detection_result.score,
|
|
68
70
|
)
|
|
69
71
|
|
|
70
|
-
def clone(self) ->
|
|
72
|
+
def clone(self) -> SimpleTransformService:
|
|
71
73
|
return self.__class__(self.transform_predictor)
|
|
72
74
|
|
|
73
|
-
def get_meta_annotation(self) ->
|
|
74
|
-
return
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
("summaries", [self.transform_predictor.possible_category()]),
|
|
80
|
-
]
|
|
75
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
76
|
+
return MetaAnnotation(
|
|
77
|
+
image_annotations=(),
|
|
78
|
+
sub_categories={},
|
|
79
|
+
relationships={},
|
|
80
|
+
summaries=self.transform_predictor.get_category_names(),
|
|
81
81
|
)
|
|
82
82
|
|
|
83
83
|
@staticmethod
|
|
84
84
|
def _get_name(transform_name: str) -> str:
|
|
85
85
|
return f"simple_transform_{transform_name}"
|
|
86
|
+
|
|
87
|
+
def clear_predictor(self) -> None:
|
|
88
|
+
pass
|
deepdoctection/train/__init__.py
CHANGED
|
@@ -19,20 +19,14 @@
|
|
|
19
19
|
Init module for train package
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from ..utils.file_utils import
|
|
23
|
-
detectron2_available,
|
|
24
|
-
pytorch_available,
|
|
25
|
-
tensorpack_available,
|
|
26
|
-
tf_available,
|
|
27
|
-
transformers_available,
|
|
28
|
-
)
|
|
22
|
+
from ..utils.file_utils import detectron2_available, tensorpack_available, transformers_available
|
|
29
23
|
|
|
30
|
-
if
|
|
31
|
-
from .tp_frcnn_train import train_faster_rcnn
|
|
32
|
-
|
|
33
|
-
if pytorch_available() and detectron2_available():
|
|
24
|
+
if detectron2_available():
|
|
34
25
|
from .d2_frcnn_train import train_d2_faster_rcnn
|
|
35
26
|
|
|
36
|
-
if
|
|
27
|
+
if transformers_available():
|
|
37
28
|
from .hf_detr_train import train_hf_detr
|
|
38
29
|
from .hf_layoutlm_train import train_hf_layoutlm
|
|
30
|
+
|
|
31
|
+
if tensorpack_available():
|
|
32
|
+
from .tp_frcnn_train import train_faster_rcnn
|