deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +4 -2
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +919 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +162 -108
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +205 -119
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +26 -17
- deepdoctection/utils/env_info.py +86 -37
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -71
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.1.dist-info/METADATA +376 -0
- deepdoctection-0.43.1.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Sub layout detection pipeline component
|
|
20
20
|
"""
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
@@ -39,12 +39,12 @@ from .registry import pipeline_component_registry
|
|
|
39
39
|
|
|
40
40
|
class DetectResultGenerator:
|
|
41
41
|
"""
|
|
42
|
-
Use
|
|
42
|
+
Use `DetectResultGenerator` to refine raw detection results.
|
|
43
43
|
|
|
44
44
|
Certain pipeline components depend on, for example, at least one object being detected. If this is not the
|
|
45
|
-
case, the generator can generate a DetectResult with a default setting. If no object was discovered for a
|
|
46
|
-
category, a DetectResult with the dimensions of the original image is generated and added to the remaining
|
|
47
|
-
DetectResults
|
|
45
|
+
case, the generator can generate a `DetectResult` with a default setting. If no object was discovered for a
|
|
46
|
+
category, a `DetectResult` with the dimensions of the original image is generated and added to the remaining
|
|
47
|
+
`DetectResults`.
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
50
|
def __init__(
|
|
@@ -55,11 +55,13 @@ class DetectResultGenerator:
|
|
|
55
55
|
absolute_coords: bool = True,
|
|
56
56
|
) -> None:
|
|
57
57
|
"""
|
|
58
|
-
:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
Args:
|
|
59
|
+
categories_name_as_key: The dict of all possible detection categories.
|
|
60
|
+
group_categories: If you only want to generate only one `DetectResult` for a group of categories, provided
|
|
61
|
+
that the sum of the group is less than one, then you can pass a list of list for grouping category ids.
|
|
62
|
+
exclude_category_names: List of category names to exclude from result generation.
|
|
63
|
+
absolute_coords: Value to be set in `DetectionResult` for `absolute_coords`.
|
|
64
|
+
|
|
63
65
|
"""
|
|
64
66
|
self.categories_name_as_key = MappingProxyType(dict(categories_name_as_key.items()))
|
|
65
67
|
self.width: Optional[int] = None
|
|
@@ -75,10 +77,16 @@ class DetectResultGenerator:
|
|
|
75
77
|
|
|
76
78
|
def create_detection_result(self, detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
|
|
77
79
|
"""
|
|
78
|
-
Adds DetectResults for which no object was detected to the list.
|
|
80
|
+
Adds `DetectResults` for which no object was detected to the list.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
detect_result_list: `DetectResults` of a previously run `ObjectDetector`.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Refined list of `DetectionResult`.
|
|
79
87
|
|
|
80
|
-
:
|
|
81
|
-
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If `width` and `height` are not initialized.
|
|
82
90
|
"""
|
|
83
91
|
|
|
84
92
|
if self.width is None and self.height is None:
|
|
@@ -115,10 +123,16 @@ class DetectResultGenerator:
|
|
|
115
123
|
@staticmethod
|
|
116
124
|
def _detection_result_sanity_check(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
|
|
117
125
|
"""
|
|
118
|
-
Go through each detect_result in the list and check if the box argument has sensible coordinates:
|
|
119
|
-
ulx >= 0 and lrx - ulx >= 0 (same for y coordinate). Remove the detection result if this condition is not
|
|
126
|
+
Go through each `detect_result` in the list and check if the `box` argument has sensible coordinates:
|
|
127
|
+
`ulx >= 0` and `lrx - ulx >= 0` (same for y coordinate). Remove the detection result if this condition is not
|
|
120
128
|
satisfied. We need this check because if some detection results are not sane, we might end up with some
|
|
121
|
-
|
|
129
|
+
non-existing categories.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
detect_result_list: List of `DetectionResult` to check.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of `DetectionResult` with only valid boxes.
|
|
122
136
|
"""
|
|
123
137
|
sane_detect_results = []
|
|
124
138
|
for detect_result in detect_result_list:
|
|
@@ -143,19 +157,20 @@ class DetectResultGenerator:
|
|
|
143
157
|
@pipeline_component_registry.register("SubImageLayoutService")
|
|
144
158
|
class SubImageLayoutService(PipelineComponent):
|
|
145
159
|
"""
|
|
146
|
-
Component in which the selected ImageAnnotation can be selected with cropped images and presented to a detector.
|
|
160
|
+
Component in which the selected `ImageAnnotation` can be selected with cropped images and presented to a detector.
|
|
147
161
|
|
|
148
|
-
The detected DetectResults are transformed into ImageAnnotations and stored both in the cache of the parent
|
|
149
|
-
and in the cache of the sub image.
|
|
162
|
+
The detected `DetectResults` are transformed into `ImageAnnotations` and stored both in the cache of the parent
|
|
163
|
+
image and in the cache of the sub image.
|
|
150
164
|
|
|
151
165
|
If no objects are discovered, artificial objects can be added by means of a refinement process.
|
|
152
166
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
167
|
+
Example:
|
|
168
|
+
```python
|
|
169
|
+
detect_result_generator = DetectResultGenerator(categories_items)
|
|
170
|
+
d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: LayoutType.row,
|
|
171
|
+
2: LayoutType.column})
|
|
172
|
+
item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
|
|
173
|
+
```
|
|
159
174
|
"""
|
|
160
175
|
|
|
161
176
|
def __init__(
|
|
@@ -167,18 +182,22 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
167
182
|
padder: Optional[PadTransform] = None,
|
|
168
183
|
):
|
|
169
184
|
"""
|
|
170
|
-
:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
185
|
+
Args:
|
|
186
|
+
sub_image_detector: `ObjectDetector`.
|
|
187
|
+
sub_image_names: Category names of `ImageAnnotations` to be presented to the detector.
|
|
188
|
+
Attention: The selected `ImageAnnotations` must have `image` and `image.image` not None.
|
|
189
|
+
service_ids: List of service ids to be used for filtering the `ImageAnnotations`. If None, all
|
|
190
|
+
`ImageAnnotations` will be used.
|
|
191
|
+
detect_result_generator: `DetectResultGenerator` instance. `categories` attribute has to be the same as
|
|
192
|
+
the `categories` attribute of the `sub_image_detector`. The generator will be
|
|
193
|
+
responsible to create `DetectionResult` for some categories, if they have not
|
|
194
|
+
been detected by `sub_image_detector`.
|
|
195
|
+
padder: `PadTransform` to pad an image before passing to a predictor. Will be also responsible for
|
|
196
|
+
inverse coordinate transformation.
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
ValueError: If the categories of the `detect_result_generator` do not match the categories of the
|
|
200
|
+
`sub_image_detector`.
|
|
182
201
|
"""
|
|
183
202
|
|
|
184
203
|
self.sub_image_name = (
|
|
@@ -203,10 +222,13 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
203
222
|
|
|
204
223
|
def serve(self, dp: Image) -> None:
|
|
205
224
|
"""
|
|
206
|
-
- Selection of ImageAnnotation to present to the detector.
|
|
207
|
-
- Invoke the detector
|
|
208
|
-
- Optionally invoke the DetectResultGenerator
|
|
209
|
-
- Generate ImageAnnotations and dump to parent image and sub image.
|
|
225
|
+
- Selection of `ImageAnnotation` to present to the detector.
|
|
226
|
+
- Invoke the detector.
|
|
227
|
+
- Optionally invoke the `DetectResultGenerator`.
|
|
228
|
+
- Generate `ImageAnnotations` and dump to parent image and sub image.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
dp: `Image` to process.
|
|
210
232
|
"""
|
|
211
233
|
sub_image_anns = dp.get_annotation(category_names=self.sub_image_name, service_ids=self.service_ids)
|
|
212
234
|
for sub_image_ann in sub_image_anns:
|
|
@@ -255,13 +277,21 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
255
277
|
)
|
|
256
278
|
|
|
257
279
|
def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> PixelValues:
|
|
258
|
-
"""
|
|
280
|
+
"""
|
|
281
|
+
Maybe crop and pad a `np_array` before passing it to the predictor.
|
|
282
|
+
|
|
283
|
+
Note:
|
|
284
|
+
We currently assume a two level hierarchy of images, e.g. we can crop a sub-image from the base
|
|
285
|
+
image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
sub_image_ann: `ImageAnnotation` to be processed.
|
|
259
289
|
|
|
260
|
-
|
|
261
|
-
|
|
290
|
+
Returns:
|
|
291
|
+
Processed `np_image`.
|
|
262
292
|
|
|
263
|
-
:
|
|
264
|
-
|
|
293
|
+
Raises:
|
|
294
|
+
ValueError: If `sub_image_ann.image` is `None`.
|
|
265
295
|
"""
|
|
266
296
|
if sub_image_ann.image is None:
|
|
267
297
|
raise ValueError("sub_image_ann.image is None, but must be an datapoint.Image")
|
deepdoctection/pipe/text.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Text extraction pipeline component
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
from __future__ import annotations
|
|
@@ -40,29 +40,30 @@ __all__ = ["TextExtractionService"]
|
|
|
40
40
|
@pipeline_component_registry.register("TextExtractionService")
|
|
41
41
|
class TextExtractionService(PipelineComponent):
|
|
42
42
|
"""
|
|
43
|
-
|
|
44
|
-
numpy array as an image.
|
|
43
|
+
Text extraction pipeline component.
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
beforehand. ROI extraction is particularly suitable when an OCR component is selected as the detector and the
|
|
49
|
-
document has a complex structure. Instead of transferring the entire image, only the ROIs are transferred to
|
|
50
|
-
the detector. Since the ROI has a simpler structure than the entire document page, it can significantly improve
|
|
51
|
-
the OCR results.
|
|
45
|
+
This component is responsible for extracting text from images or selected regions of interest (ROIs) using a
|
|
46
|
+
specified detector. The detector must be able to evaluate a numpy array as an image.
|
|
52
47
|
|
|
53
|
-
Text
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
Text extraction can be performed on the entire image or on selected ROIs, which are layout components determined by
|
|
49
|
+
a previously run pipeline component. ROI extraction is particularly useful when using an OCR component as the
|
|
50
|
+
detector and the document has a complex structure. By transferring only the ROIs to the detector, OCR results can
|
|
51
|
+
be significantly improved due to the simpler structure of the ROI compared to the entire document page.
|
|
57
52
|
|
|
58
|
-
|
|
59
|
-
|
|
53
|
+
Text components (currently only words) are attached to the image as image annotations. A relation is assigned
|
|
54
|
+
between text and ROI or between text and the entire image. When selecting ROIs, only the selected categories are
|
|
55
|
+
processed. ROIs that are not selected are not presented to the detector.
|
|
60
56
|
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
Example:
|
|
58
|
+
```python
|
|
59
|
+
textract_predictor = TextractOcrDetector()
|
|
60
|
+
text_extract = TextExtractionService(textract_predictor)
|
|
63
61
|
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
pipe = DoctectionPipe([text_extract])
|
|
63
|
+
df = pipe.analyze(path="path/to/document.pdf")
|
|
64
|
+
|
|
65
|
+
for dp in df:
|
|
66
|
+
...
|
|
66
67
|
"""
|
|
67
68
|
|
|
68
69
|
def __init__(
|
|
@@ -72,12 +73,18 @@ class TextExtractionService(PipelineComponent):
|
|
|
72
73
|
run_time_ocr_language_selection: bool = False,
|
|
73
74
|
):
|
|
74
75
|
"""
|
|
75
|
-
:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
76
|
+
Args:
|
|
77
|
+
text_extract_detector: The detector used for text extraction.
|
|
78
|
+
extract_from_roi: One or more category names for ROI selection.
|
|
79
|
+
run_time_ocr_language_selection: If True, enables runtime OCR language selection. Only available for
|
|
80
|
+
`TesseractOcrDetector` as this framework supports multiple languages.
|
|
81
|
+
Requires a language detection pipeline component to have run before.
|
|
82
|
+
Selects the expert language OCR model based on the determined language.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
TypeError: If predicting from a cropped image and the detector is not an `ObjectDetector` or
|
|
86
|
+
`TextRecognizer`.
|
|
87
|
+
TypeError: If `run_time_ocr_language_selection` is True and the detector is not a `TesseractOcrDetector`.
|
|
81
88
|
"""
|
|
82
89
|
|
|
83
90
|
if extract_from_roi is None:
|
|
@@ -140,11 +147,17 @@ class TextExtractionService(PipelineComponent):
|
|
|
140
147
|
|
|
141
148
|
def get_text_rois(self, dp: Image) -> Sequence[Union[Image, ImageAnnotation, list[ImageAnnotation]]]:
|
|
142
149
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
150
|
+
Returns image ROIs based on selected categories.
|
|
151
|
+
|
|
152
|
+
This selection is only meaningful for specific text extractors (e.g., those performing OCR and not mining text
|
|
153
|
+
from native PDFs). Performs sanity checks. If a preceding text extractor has already dumped text, and the
|
|
154
|
+
predictor should not extract text as well, returns an empty list.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
dp: The `Image` to process.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
A list of `ImageAnnotation` or `Image`.
|
|
148
161
|
"""
|
|
149
162
|
|
|
150
163
|
if self.extract_from_category:
|
|
@@ -157,11 +170,17 @@ class TextExtractionService(PipelineComponent):
|
|
|
157
170
|
self, text_roi: Union[Image, ImageAnnotation, list[ImageAnnotation]]
|
|
158
171
|
) -> Optional[Union[bytes, PixelValues, list[tuple[str, PixelValues]], int]]:
|
|
159
172
|
"""
|
|
160
|
-
|
|
173
|
+
Returns raw input for a given `text_roi`. The input can be a numpy array or PDF bytes, depending on the chosen
|
|
161
174
|
predictor.
|
|
162
175
|
|
|
163
|
-
:
|
|
164
|
-
|
|
176
|
+
Args:
|
|
177
|
+
text_roi: The `Image`, `ImageAnnotation`, or list of `ImageAnnotation` to process.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
PDF bytes, numpy array, or other predictor-specific input.
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
ImageError: If required image data is missing or if `text_roi` is not an `Image` when required.
|
|
165
184
|
"""
|
|
166
185
|
|
|
167
186
|
if isinstance(text_roi, ImageAnnotation):
|
deepdoctection/pipe/transform.py
CHANGED
|
@@ -16,8 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
20
|
-
on images (e.g. deskew, de-noising or more general GAN like operations.
|
|
19
|
+
Transform style pipeline components.
|
|
21
20
|
"""
|
|
22
21
|
|
|
23
22
|
from __future__ import annotations
|
|
@@ -32,9 +31,10 @@ from .registry import pipeline_component_registry
|
|
|
32
31
|
@pipeline_component_registry.register("SimpleTransformService")
|
|
33
32
|
class SimpleTransformService(PipelineComponent):
|
|
34
33
|
"""
|
|
35
|
-
Pipeline component for transforming an image.
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
Pipeline component for transforming an image.
|
|
35
|
+
|
|
36
|
+
The service is designed for applying transform predictors that take an image as numpy array as input and return
|
|
37
|
+
the same. The service itself will change the underlying metadata like height and width of the returned transform.
|
|
38
38
|
|
|
39
39
|
This component is meant to be used at the very first stage of a pipeline. If components have already returned image
|
|
40
40
|
annotations then this component will currently not re-calculate bounding boxes in terms of the transformed image.
|
|
@@ -43,8 +43,10 @@ class SimpleTransformService(PipelineComponent):
|
|
|
43
43
|
|
|
44
44
|
def __init__(self, transform_predictor: ImageTransformer):
|
|
45
45
|
"""
|
|
46
|
+
Initializes a `SimpleTransformService`.
|
|
46
47
|
|
|
47
|
-
:
|
|
48
|
+
Args:
|
|
49
|
+
transform_predictor: Image transformer.
|
|
48
50
|
"""
|
|
49
51
|
self.transform_predictor = transform_predictor
|
|
50
52
|
super().__init__(self._get_name(transform_predictor.name), self.transform_predictor.model_id)
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Training Detectron2 `GeneralizedRCNN`
|
|
20
20
|
"""
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
@@ -111,10 +111,12 @@ class WandbWriter(EventWriter):
|
|
|
111
111
|
**kwargs: Any,
|
|
112
112
|
):
|
|
113
113
|
"""
|
|
114
|
-
:
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
114
|
+
Args:
|
|
115
|
+
project: W&B Project name.
|
|
116
|
+
repo: Repository name.
|
|
117
|
+
config: The project level configuration object.
|
|
118
|
+
window_size: The scalars will be median-smoothed by this window size.
|
|
119
|
+
**kwargs: Other arguments passed to `wandb.init(...)`.
|
|
118
120
|
"""
|
|
119
121
|
if config is None:
|
|
120
122
|
config = {}
|
|
@@ -137,8 +139,10 @@ class WandbWriter(EventWriter):
|
|
|
137
139
|
|
|
138
140
|
class D2Trainer(DefaultTrainer):
|
|
139
141
|
"""
|
|
140
|
-
Detectron2 `DefaultTrainer` with some custom method for handling datasets and running evaluation.
|
|
141
|
-
|
|
142
|
+
Detectron2 `DefaultTrainer` with some custom method for handling datasets and running evaluation.
|
|
143
|
+
|
|
144
|
+
Info:
|
|
145
|
+
The setting is made to train standard models in Detectron2.
|
|
142
146
|
"""
|
|
143
147
|
|
|
144
148
|
def __init__(self, cfg: CfgNode, torch_dataset: IterableDataset[Any], mapper: DatasetMapper) -> None:
|
|
@@ -150,10 +154,16 @@ class D2Trainer(DefaultTrainer):
|
|
|
150
154
|
|
|
151
155
|
def build_hooks(self) -> list[HookBase]:
|
|
152
156
|
"""
|
|
153
|
-
|
|
154
|
-
|
|
157
|
+
Builds the list of hooks for training.
|
|
158
|
+
|
|
159
|
+
Note:
|
|
160
|
+
This ensures that the `EvalHook` is being called before the writer and all metrics are being written to
|
|
161
|
+
JSON, Tensorboard etc.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of `HookBase` objects.
|
|
165
|
+
|
|
155
166
|
|
|
156
|
-
:return: list[HookBase]
|
|
157
167
|
"""
|
|
158
168
|
cfg = self.cfg.clone()
|
|
159
169
|
cfg.defrost()
|
|
@@ -203,10 +213,12 @@ class D2Trainer(DefaultTrainer):
|
|
|
203
213
|
def build_writers(self) -> list[EventWriter]:
|
|
204
214
|
"""
|
|
205
215
|
Build a list of writers to be using `default_writers()`.
|
|
206
|
-
If you'd like a different list of writers, you can overwrite it in
|
|
207
|
-
your trainer.
|
|
208
216
|
|
|
209
|
-
:
|
|
217
|
+
Note:
|
|
218
|
+
If you'd like a different list of writers, you can overwrite it in your trainer.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
A list of `EventWriter` objects.
|
|
210
222
|
"""
|
|
211
223
|
writers_list = default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
|
|
212
224
|
if self.cfg.WANDB.USE_WANDB:
|
|
@@ -220,10 +232,13 @@ class D2Trainer(DefaultTrainer):
|
|
|
220
232
|
|
|
221
233
|
def build_train_loader(self, cfg: CfgNode) -> DataLoader[Any]: # pylint: disable=W0221
|
|
222
234
|
"""
|
|
223
|
-
|
|
235
|
+
Builds the data loader for training.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
cfg: Configuration.
|
|
224
239
|
|
|
225
|
-
:
|
|
226
|
-
|
|
240
|
+
Returns:
|
|
241
|
+
The data loader for a given dataset adapter and mapper.
|
|
227
242
|
"""
|
|
228
243
|
return build_detection_train_loader(
|
|
229
244
|
dataset=self.dataset, mapper=self.mapper, total_batch_size=cfg.SOLVER.IMS_PER_BATCH
|
|
@@ -231,10 +246,13 @@ class D2Trainer(DefaultTrainer):
|
|
|
231
246
|
|
|
232
247
|
def eval_with_dd_evaluator(self, **build_eval_kwargs: str) -> Union[list[dict[str, Any]], dict[str, Any]]:
|
|
233
248
|
"""
|
|
234
|
-
|
|
249
|
+
Runs the evaluator. This method will be called from the `EvalHook`.
|
|
235
250
|
|
|
236
|
-
:
|
|
237
|
-
|
|
251
|
+
Args:
|
|
252
|
+
**build_eval_kwargs: Dataflow eval config kwargs of the underlying dataset.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
A dict or list of dicts with evaluation results.
|
|
238
256
|
"""
|
|
239
257
|
assert self.evaluator is not None
|
|
240
258
|
assert self.evaluator.pipe_component is not None
|
|
@@ -251,13 +269,16 @@ class D2Trainer(DefaultTrainer):
|
|
|
251
269
|
build_val_dict: Optional[Mapping[str, str]] = None,
|
|
252
270
|
) -> None:
|
|
253
271
|
"""
|
|
254
|
-
Setup of evaluator before starting training.
|
|
255
|
-
|
|
272
|
+
Setup of evaluator before starting training.
|
|
273
|
+
|
|
274
|
+
Note:
|
|
275
|
+
During training, predictors will be replaced by current checkpoints.
|
|
256
276
|
|
|
257
|
-
:
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
277
|
+
Args:
|
|
278
|
+
dataset_val: Dataset on which to run evaluation.
|
|
279
|
+
pipeline_component: Pipeline component to plug into the evaluator.
|
|
280
|
+
metric: A metric class or instance.
|
|
281
|
+
build_val_dict: Evaluation dataflow build config.
|
|
261
282
|
"""
|
|
262
283
|
if wandb_available():
|
|
263
284
|
run = wandb.run if wandb.run is not None else None
|
|
@@ -295,50 +316,47 @@ def train_d2_faster_rcnn(
|
|
|
295
316
|
pipeline_component_name: Optional[str] = None,
|
|
296
317
|
) -> None:
|
|
297
318
|
"""
|
|
298
|
-
Adaptation of
|
|
299
|
-
standard models
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
:
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
metric setting, pass a metric explicitly.
|
|
340
|
-
:param metric: A metric to choose for validation.
|
|
341
|
-
:param pipeline_component_name: A pipeline component name to use for validation.
|
|
319
|
+
Adaptation of https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py for training Detectron2
|
|
320
|
+
standard models.
|
|
321
|
+
|
|
322
|
+
Trains Detectron2 from scratch or fine-tunes a model using this API.
|
|
323
|
+
|
|
324
|
+
Info:
|
|
325
|
+
This training script is devoted to the case where one cluster with one GPU is available. To run on several
|
|
326
|
+
machines with more than one GPU use `detectron2.engine.launch`.
|
|
327
|
+
|
|
328
|
+
Example:
|
|
329
|
+
```python
|
|
330
|
+
launch(train_d2_faster_rcnn,
|
|
331
|
+
num_gpus,
|
|
332
|
+
num_machines,
|
|
333
|
+
machine_rank,
|
|
334
|
+
dist_url,
|
|
335
|
+
args=(path_config_yaml,
|
|
336
|
+
path_weights,
|
|
337
|
+
config_overwrite,
|
|
338
|
+
log_dir,
|
|
339
|
+
build_train_config,
|
|
340
|
+
dataset_val,
|
|
341
|
+
build_val_config,
|
|
342
|
+
metric_name,
|
|
343
|
+
metric,
|
|
344
|
+
pipeline_component_name),)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
path_config_yaml: Path to a Detectron2 config file.
|
|
349
|
+
dataset_train: The dataset to use for training.
|
|
350
|
+
path_weights: Path to a checkpoint, if you want to continue training or fine-tune. Will train from scratch if
|
|
351
|
+
an empty string is passed.
|
|
352
|
+
config_overwrite: List of arguments if some configs from the .yaml file should be replaced.
|
|
353
|
+
log_dir: Path to log dir. Will default to `train_log/frcnn`.
|
|
354
|
+
build_train_config: Dataflow build setting.
|
|
355
|
+
dataset_val: The dataset to use for validation.
|
|
356
|
+
build_val_config: Same as `build_train_config` but for validation.
|
|
357
|
+
metric_name: A metric name to choose for validation.
|
|
358
|
+
metric: A metric to choose for validation.
|
|
359
|
+
pipeline_component_name: A pipeline component name to use for validation.
|
|
342
360
|
"""
|
|
343
361
|
|
|
344
362
|
assert cuda.device_count() > 0, "Has to train with GPU!"
|