deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +8 -25
- deepdoctection/analyzer/dd.py +84 -71
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +78 -56
- deepdoctection/datapoint/box.py +7 -7
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +157 -75
- deepdoctection/datapoint/view.py +175 -151
- deepdoctection/datasets/adapter.py +30 -24
- deepdoctection/datasets/base.py +10 -10
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +23 -25
- deepdoctection/datasets/instances/doclaynet.py +48 -49
- deepdoctection/datasets/instances/fintabnet.py +44 -45
- deepdoctection/datasets/instances/funsd.py +23 -23
- deepdoctection/datasets/instances/iiitar13k.py +8 -8
- deepdoctection/datasets/instances/layouttest.py +2 -2
- deepdoctection/datasets/instances/publaynet.py +3 -3
- deepdoctection/datasets/instances/pubtables1m.py +18 -18
- deepdoctection/datasets/instances/pubtabnet.py +30 -29
- deepdoctection/datasets/instances/rvlcdip.py +28 -29
- deepdoctection/datasets/instances/xfund.py +51 -30
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +13 -12
- deepdoctection/eval/eval.py +32 -26
- deepdoctection/eval/tedsmetric.py +16 -12
- deepdoctection/eval/tp_eval_callback.py +7 -16
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +69 -89
- deepdoctection/extern/deskew.py +11 -10
- deepdoctection/extern/doctrocr.py +81 -64
- deepdoctection/extern/fastlang.py +23 -16
- deepdoctection/extern/hfdetr.py +53 -38
- deepdoctection/extern/hflayoutlm.py +216 -155
- deepdoctection/extern/hflm.py +35 -30
- deepdoctection/extern/model.py +433 -255
- deepdoctection/extern/pdftext.py +15 -15
- deepdoctection/extern/pt/ptutils.py +4 -2
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +14 -16
- deepdoctection/extern/tp/tfutils.py +16 -2
- deepdoctection/extern/tp/tpcompat.py +11 -7
- deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
- deepdoctection/extern/tpdetect.py +40 -45
- deepdoctection/mapper/cats.py +36 -40
- deepdoctection/mapper/cocostruct.py +16 -12
- deepdoctection/mapper/d2struct.py +22 -22
- deepdoctection/mapper/hfstruct.py +7 -7
- deepdoctection/mapper/laylmstruct.py +22 -24
- deepdoctection/mapper/maputils.py +9 -10
- deepdoctection/mapper/match.py +33 -2
- deepdoctection/mapper/misc.py +6 -7
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +6 -6
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/anngen.py +39 -14
- deepdoctection/pipe/base.py +68 -99
- deepdoctection/pipe/common.py +181 -85
- deepdoctection/pipe/concurrency.py +14 -10
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +18 -16
- deepdoctection/pipe/lm.py +49 -47
- deepdoctection/pipe/order.py +63 -65
- deepdoctection/pipe/refine.py +102 -109
- deepdoctection/pipe/segment.py +157 -162
- deepdoctection/pipe/sub_layout.py +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/d2_frcnn_train.py +27 -25
- deepdoctection/train/hf_detr_train.py +22 -18
- deepdoctection/train/hf_layoutlm_train.py +49 -48
- deepdoctection/train/tp_frcnn_train.py +10 -11
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +52 -14
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +41 -14
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +15 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/pdf_utils.py +39 -14
- deepdoctection/utils/settings.py +188 -182
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +70 -69
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
- deepdoctection-0.34.dist-info/RECORD +146 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.32.dist-info/RECORD +0 -146
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
|
@@ -22,17 +22,17 @@ visualising
|
|
|
22
22
|
from __future__ import annotations
|
|
23
23
|
|
|
24
24
|
import os.path
|
|
25
|
-
from typing import
|
|
25
|
+
from typing import Mapping, Optional, Sequence, Union
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
28
|
from lazy_imports import try_import
|
|
29
29
|
|
|
30
|
-
from ..datapoint.annotation import ImageAnnotation
|
|
30
|
+
from ..datapoint.annotation import DEFAULT_CATEGORY_ID, ImageAnnotation
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
32
|
from ..extern.pt.nms import batched_nms
|
|
33
33
|
from ..mapper.maputils import curry
|
|
34
|
-
from ..utils.
|
|
35
|
-
from ..utils.
|
|
34
|
+
from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
|
|
35
|
+
from ..utils.types import Detectron2Dict
|
|
36
36
|
|
|
37
37
|
with try_import() as pt_import_guard:
|
|
38
38
|
import torch
|
|
@@ -41,7 +41,7 @@ with try_import() as d2_import_guard:
|
|
|
41
41
|
from detectron2.structures import BoxMode
|
|
42
42
|
|
|
43
43
|
with try_import() as wb_import_guard:
|
|
44
|
-
from wandb import Classes
|
|
44
|
+
from wandb import Classes # type: ignore
|
|
45
45
|
from wandb import Image as Wbimage
|
|
46
46
|
|
|
47
47
|
|
|
@@ -49,8 +49,8 @@ with try_import() as wb_import_guard:
|
|
|
49
49
|
def image_to_d2_frcnn_training(
|
|
50
50
|
dp: Image,
|
|
51
51
|
add_mask: bool = False,
|
|
52
|
-
category_names: Optional[Union[
|
|
53
|
-
) -> Optional[
|
|
52
|
+
category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
53
|
+
) -> Optional[Detectron2Dict]:
|
|
54
54
|
"""
|
|
55
55
|
Maps an image to a standard dataset dict as described in
|
|
56
56
|
<https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html>. It further checks if the image is physically
|
|
@@ -66,7 +66,7 @@ def image_to_d2_frcnn_training(
|
|
|
66
66
|
if not os.path.isfile(dp.location) and dp.image is None:
|
|
67
67
|
return None
|
|
68
68
|
|
|
69
|
-
output:
|
|
69
|
+
output: Detectron2Dict = {"file_name": str(dp.location)}
|
|
70
70
|
|
|
71
71
|
if dp.image is not None:
|
|
72
72
|
output["image"] = dp.image.astype("float32")
|
|
@@ -87,10 +87,10 @@ def image_to_d2_frcnn_training(
|
|
|
87
87
|
box = box.transform(dp.width, dp.height, absolute_coords=True)
|
|
88
88
|
|
|
89
89
|
# Detectron2 does not fully support BoxMode.XYXY_REL
|
|
90
|
-
mapped_ann:
|
|
90
|
+
mapped_ann: dict[str, Union[str, int, list[float]]] = {
|
|
91
91
|
"bbox_mode": BoxMode.XYXY_ABS,
|
|
92
92
|
"bbox": box.to_list(mode="xyxy"),
|
|
93
|
-
"category_id":
|
|
93
|
+
"category_id": ann.category_id - 1,
|
|
94
94
|
}
|
|
95
95
|
annotations.append(mapped_ann)
|
|
96
96
|
|
|
@@ -149,23 +149,23 @@ def pt_nms_image_annotations(
|
|
|
149
149
|
|
|
150
150
|
def _get_category_attributes(
|
|
151
151
|
ann: ImageAnnotation, cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None
|
|
152
|
-
) ->
|
|
152
|
+
) -> tuple[ObjectTypes, int, Optional[float]]:
|
|
153
153
|
if cat_to_sub_cat:
|
|
154
154
|
sub_cat_key = cat_to_sub_cat.get(get_type(ann.category_name))
|
|
155
155
|
if sub_cat_key in ann.sub_categories:
|
|
156
156
|
sub_cat = ann.get_sub_category(sub_cat_key)
|
|
157
|
-
return sub_cat.category_name, sub_cat.category_id, sub_cat.score
|
|
158
|
-
return
|
|
159
|
-
return ann.category_name, ann.category_id, ann.score
|
|
157
|
+
return get_type(sub_cat.category_name), sub_cat.category_id, sub_cat.score
|
|
158
|
+
return DefaultType.DEFAULT_TYPE, DEFAULT_CATEGORY_ID, 0.0
|
|
159
|
+
return get_type(ann.category_name), ann.category_id, ann.score
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
@curry
|
|
163
163
|
def to_wandb_image(
|
|
164
164
|
dp: Image,
|
|
165
|
-
categories: Mapping[
|
|
166
|
-
sub_categories: Optional[Mapping[
|
|
165
|
+
categories: Mapping[int, TypeOrStr],
|
|
166
|
+
sub_categories: Optional[Mapping[int, TypeOrStr]] = None,
|
|
167
167
|
cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None,
|
|
168
|
-
) ->
|
|
168
|
+
) -> tuple[str, Wbimage]:
|
|
169
169
|
"""
|
|
170
170
|
Converting a deepdoctection image into a wandb image
|
|
171
171
|
|
|
@@ -185,11 +185,11 @@ def to_wandb_image(
|
|
|
185
185
|
anns = dp.get_annotation(category_names=list(categories.values()))
|
|
186
186
|
|
|
187
187
|
if sub_categories:
|
|
188
|
-
class_labels =
|
|
189
|
-
class_set = Classes([{"name": val, "id":
|
|
188
|
+
class_labels = dict(sub_categories.items())
|
|
189
|
+
class_set = Classes([{"name": val, "id": key} for key, val in sub_categories.items()])
|
|
190
190
|
else:
|
|
191
|
-
|
|
192
|
-
|
|
191
|
+
class_set = Classes([{"name": val, "id": key} for key, val in categories.items()])
|
|
192
|
+
class_labels = dict(categories.items())
|
|
193
193
|
|
|
194
194
|
for ann in anns:
|
|
195
195
|
bounding_box = ann.get_bounding_box(dp.image_id)
|
|
@@ -200,7 +200,7 @@ def to_wandb_image(
|
|
|
200
200
|
box = {
|
|
201
201
|
"position": {"middle": bounding_box.center, "width": bounding_box.width, "height": bounding_box.height},
|
|
202
202
|
"domain": "pixel",
|
|
203
|
-
"class_id":
|
|
203
|
+
"class_id": category_id,
|
|
204
204
|
"box_caption": category_name,
|
|
205
205
|
}
|
|
206
206
|
if score:
|
|
@@ -23,7 +23,7 @@ from __future__ import annotations
|
|
|
23
23
|
|
|
24
24
|
import os
|
|
25
25
|
from dataclasses import dataclass, field
|
|
26
|
-
from typing import
|
|
26
|
+
from typing import Literal, Optional, Sequence, Union
|
|
27
27
|
|
|
28
28
|
import numpy as np
|
|
29
29
|
from lazy_imports import try_import
|
|
@@ -31,9 +31,9 @@ from lazy_imports import try_import
|
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
32
|
from ..mapper.maputils import curry
|
|
33
33
|
from ..mapper.misc import get_load_image_func
|
|
34
|
-
from ..utils.
|
|
35
|
-
from ..utils.settings import ObjectTypes
|
|
34
|
+
from ..utils.settings import TypeOrStr
|
|
36
35
|
from ..utils.transform import PadTransform
|
|
36
|
+
from ..utils.types import JsonDict
|
|
37
37
|
|
|
38
38
|
with try_import() as tr_import_guard:
|
|
39
39
|
from transformers import BatchFeature, DetrFeatureExtractor
|
|
@@ -43,7 +43,7 @@ with try_import() as tr_import_guard:
|
|
|
43
43
|
def image_to_hf_detr_training(
|
|
44
44
|
dp: Image,
|
|
45
45
|
add_mask: bool = False,
|
|
46
|
-
category_names: Optional[Union[
|
|
46
|
+
category_names: Optional[Union[TypeOrStr, Sequence[Union[TypeOrStr]]]] = None,
|
|
47
47
|
) -> Optional[JsonDict]:
|
|
48
48
|
"""
|
|
49
49
|
Maps an image to a detr input datapoint dict, that, after collating can be used for training.
|
|
@@ -76,11 +76,11 @@ def image_to_hf_detr_training(
|
|
|
76
76
|
for ann in anns:
|
|
77
77
|
box = ann.get_bounding_box(dp.image_id)
|
|
78
78
|
|
|
79
|
-
mapped_ann:
|
|
79
|
+
mapped_ann: dict[str, Union[str, int, float, list[float]]] = {
|
|
80
80
|
"id": "".join([c for c in ann.annotation_id if c.isdigit()])[:8],
|
|
81
81
|
"image_id": "".join([c for c in dp.image_id if c.isdigit()])[:8],
|
|
82
82
|
"bbox": box.to_list(mode="xywh"),
|
|
83
|
-
"category_id":
|
|
83
|
+
"category_id": ann.category_id - 1,
|
|
84
84
|
"area": box.area,
|
|
85
85
|
}
|
|
86
86
|
annotations.append(mapped_ann)
|
|
@@ -108,7 +108,7 @@ class DetrDataCollator:
|
|
|
108
108
|
padder: Optional[PadTransform] = None
|
|
109
109
|
return_tensors: Optional[Literal["pt"]] = field(default="pt")
|
|
110
110
|
|
|
111
|
-
def __call__(self, raw_features:
|
|
111
|
+
def __call__(self, raw_features: list[JsonDict]) -> BatchFeature:
|
|
112
112
|
"""
|
|
113
113
|
Creating BatchFeature from a list of dict of raw features.
|
|
114
114
|
|
|
@@ -24,7 +24,7 @@ from __future__ import annotations
|
|
|
24
24
|
|
|
25
25
|
import random
|
|
26
26
|
from dataclasses import dataclass, field
|
|
27
|
-
from typing import Any, Callable,
|
|
27
|
+
from typing import Any, Callable, Literal, NewType, Optional, Sequence, Union
|
|
28
28
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
import numpy.typing as npt
|
|
@@ -34,9 +34,9 @@ from ..datapoint.annotation import ContainerAnnotation
|
|
|
34
34
|
from ..datapoint.convert import box_to_point4, point4_to_box
|
|
35
35
|
from ..datapoint.image import Image
|
|
36
36
|
from ..datapoint.view import Page
|
|
37
|
-
from ..utils.detection_types import JsonDict
|
|
38
37
|
from ..utils.settings import DatasetType, LayoutType, PageType, Relationships, WordType
|
|
39
38
|
from ..utils.transform import ResizeTransform, normalize_image
|
|
39
|
+
from ..utils.types import JsonDict
|
|
40
40
|
from .maputils import curry
|
|
41
41
|
|
|
42
42
|
with try_import() as import_guard:
|
|
@@ -69,7 +69,7 @@ A DataCollator is a function that takes a list of samples from a Dataset and col
|
|
|
69
69
|
of PyTorch/TensorFlow tensors or NumPy arrays.
|
|
70
70
|
"""
|
|
71
71
|
|
|
72
|
-
DataCollator = NewType("DataCollator", Callable[[
|
|
72
|
+
DataCollator = NewType("DataCollator", Callable[[list[InputDataClass]], dict[str, Any]]) # type: ignore
|
|
73
73
|
|
|
74
74
|
_CLS_BOX = [0.0, 0.0, 1000.0, 1000.0]
|
|
75
75
|
_SEP_BOX = [1000.0, 1000.0, 1000.0, 1000.0]
|
|
@@ -125,9 +125,9 @@ def image_to_raw_layoutlm_features(
|
|
|
125
125
|
all_ann_ids = []
|
|
126
126
|
all_words = []
|
|
127
127
|
all_boxes = []
|
|
128
|
-
all_labels:
|
|
128
|
+
all_labels: list[int] = []
|
|
129
129
|
|
|
130
|
-
anns = dp.
|
|
130
|
+
anns = dp.get_annotation(category_names=LayoutType.WORD)
|
|
131
131
|
|
|
132
132
|
word_id_to_segment_box = {}
|
|
133
133
|
if segment_positions:
|
|
@@ -139,12 +139,12 @@ def image_to_raw_layoutlm_features(
|
|
|
139
139
|
if not bounding_box.absolute_coords:
|
|
140
140
|
bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)
|
|
141
141
|
word_id_to_segment_box.update(
|
|
142
|
-
{word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.
|
|
142
|
+
{word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.CHILD)}
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
for ann in anns:
|
|
146
146
|
all_ann_ids.append(ann.annotation_id)
|
|
147
|
-
char_cat = ann.get_sub_category(WordType.
|
|
147
|
+
char_cat = ann.get_sub_category(WordType.CHARACTERS)
|
|
148
148
|
if not isinstance(char_cat, ContainerAnnotation):
|
|
149
149
|
raise TypeError(f"char_cat must be of type ContainerAnnotation but is of type {type(char_cat)}")
|
|
150
150
|
word = char_cat.value
|
|
@@ -158,15 +158,15 @@ def image_to_raw_layoutlm_features(
|
|
|
158
158
|
all_boxes.append(word_id_to_segment_box.get(ann.annotation_id, box).to_list(mode="xyxy"))
|
|
159
159
|
|
|
160
160
|
if (
|
|
161
|
-
WordType.
|
|
162
|
-
) and dataset_type == DatasetType.
|
|
161
|
+
WordType.TOKEN_TAG in ann.sub_categories or WordType.TOKEN_CLASS in ann.sub_categories
|
|
162
|
+
) and dataset_type == DatasetType.TOKEN_CLASSIFICATION:
|
|
163
163
|
if use_token_tag:
|
|
164
|
-
all_labels.append(
|
|
164
|
+
all_labels.append(ann.get_sub_category(WordType.TOKEN_TAG).category_id - 1)
|
|
165
165
|
else:
|
|
166
|
-
all_labels.append(
|
|
166
|
+
all_labels.append(ann.get_sub_category(WordType.TOKEN_CLASS).category_id - 1)
|
|
167
167
|
|
|
168
|
-
if
|
|
169
|
-
all_labels.append(
|
|
168
|
+
if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
|
|
169
|
+
all_labels.append(dp.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1)
|
|
170
170
|
|
|
171
171
|
boxes = np.asarray(all_boxes, dtype="float32")
|
|
172
172
|
if boxes.ndim == 1:
|
|
@@ -234,7 +234,7 @@ def layoutlm_features_to_pt_tensors(features: LayoutLMFeatures) -> LayoutLMFeatu
|
|
|
234
234
|
|
|
235
235
|
|
|
236
236
|
def _tokenize_with_sliding_window(
|
|
237
|
-
raw_features:
|
|
237
|
+
raw_features: list[Union[RawLayoutLMFeatures, RawLMFeatures]],
|
|
238
238
|
tokenizer: PreTrainedTokenizerFast,
|
|
239
239
|
sliding_window_stride: int,
|
|
240
240
|
max_batch_size: int,
|
|
@@ -385,7 +385,7 @@ def _tokenize_with_sliding_window(
|
|
|
385
385
|
)
|
|
386
386
|
)
|
|
387
387
|
|
|
388
|
-
slided_tokenized_inputs:
|
|
388
|
+
slided_tokenized_inputs: dict[str, Union[list[Union[str, int]], torch.Tensor]] = {}
|
|
389
389
|
if return_tensors == "pt":
|
|
390
390
|
slided_tokenized_inputs["overflow_to_sample_mapping"] = torch.tensor(overflow_to_sample_mapping)
|
|
391
391
|
slided_tokenized_inputs["input_ids"] = torch.tensor(all_input_ids)
|
|
@@ -402,7 +402,7 @@ def _tokenize_with_sliding_window(
|
|
|
402
402
|
|
|
403
403
|
|
|
404
404
|
def raw_features_to_layoutlm_features(
|
|
405
|
-
raw_features: Union[RawLayoutLMFeatures, RawLMFeatures,
|
|
405
|
+
raw_features: Union[RawLayoutLMFeatures, RawLMFeatures, list[Union[RawLayoutLMFeatures, RawLMFeatures]]],
|
|
406
406
|
tokenizer: PreTrainedTokenizerFast,
|
|
407
407
|
padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
|
|
408
408
|
truncation: bool = True,
|
|
@@ -447,11 +447,11 @@ def raw_features_to_layoutlm_features(
|
|
|
447
447
|
raw_features = [raw_features]
|
|
448
448
|
|
|
449
449
|
_has_token_labels = (
|
|
450
|
-
raw_features[0]["dataset_type"] == DatasetType.
|
|
450
|
+
raw_features[0]["dataset_type"] == DatasetType.TOKEN_CLASSIFICATION
|
|
451
451
|
and raw_features[0].get("labels") is not None
|
|
452
452
|
)
|
|
453
453
|
_has_sequence_labels = (
|
|
454
|
-
raw_features[0]["dataset_type"] == DatasetType.
|
|
454
|
+
raw_features[0]["dataset_type"] == DatasetType.SEQUENCE_CLASSIFICATION
|
|
455
455
|
and raw_features[0].get("labels") is not None
|
|
456
456
|
)
|
|
457
457
|
_has_labels = bool(_has_token_labels or _has_sequence_labels)
|
|
@@ -620,7 +620,7 @@ class LayoutLMDataCollator:
|
|
|
620
620
|
if self.return_overflowing_tokens:
|
|
621
621
|
assert self.truncation, self.truncation
|
|
622
622
|
|
|
623
|
-
def __call__(self, raw_features: Union[RawLayoutLMFeatures,
|
|
623
|
+
def __call__(self, raw_features: Union[RawLayoutLMFeatures, list[RawLayoutLMFeatures]]) -> LayoutLMFeatures:
|
|
624
624
|
"""
|
|
625
625
|
Calling the DataCollator to form model inputs for training and inference. Takes a single raw
|
|
626
626
|
:param raw_features: A dictionary with the following arguments: `image_id, width, height, ann_ids, words,
|
|
@@ -741,7 +741,7 @@ def image_to_raw_lm_features(
|
|
|
741
741
|
dp: Image,
|
|
742
742
|
dataset_type: Optional[Literal["sequence_classification", "token_classification"]] = None,
|
|
743
743
|
use_token_tag: bool = True,
|
|
744
|
-
text_container: Optional[LayoutType] = LayoutType.
|
|
744
|
+
text_container: Optional[LayoutType] = LayoutType.WORD,
|
|
745
745
|
floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
|
|
746
746
|
include_residual_text_container: bool = False,
|
|
747
747
|
) -> Optional[RawLMFeatures]:
|
|
@@ -787,9 +787,7 @@ def image_to_raw_lm_features(
|
|
|
787
787
|
elif text_["token_classes"]:
|
|
788
788
|
raw_features["labels"] = text_["token_classes"]
|
|
789
789
|
elif page.document_type is not None:
|
|
790
|
-
document_type_id = (
|
|
791
|
-
int(page.image_orig.summary.get_sub_category(PageType.document_type).category_id) - 1 # type: ignore
|
|
792
|
-
)
|
|
790
|
+
document_type_id = page.image_orig.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1
|
|
793
791
|
raw_features["labels"] = [document_type_id]
|
|
794
792
|
|
|
795
793
|
raw_features["dataset_type"] = dataset_type
|
|
@@ -806,7 +804,7 @@ def image_to_lm_features(
|
|
|
806
804
|
return_overflowing_tokens: bool = False,
|
|
807
805
|
return_tensors: Optional[Literal["pt"]] = "pt",
|
|
808
806
|
sliding_window_stride: int = 0,
|
|
809
|
-
text_container: Optional[LayoutType] = LayoutType.
|
|
807
|
+
text_container: Optional[LayoutType] = LayoutType.WORD,
|
|
810
808
|
floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
|
|
811
809
|
include_residual_text_container: bool = False,
|
|
812
810
|
) -> Optional[LayoutLMFeatures]:
|
|
@@ -24,16 +24,16 @@ import functools
|
|
|
24
24
|
import itertools
|
|
25
25
|
import traceback
|
|
26
26
|
from types import TracebackType
|
|
27
|
-
from typing import Any, Callable,
|
|
27
|
+
from typing import Any, Callable, Mapping, Optional, Sequence, Union
|
|
28
28
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
from tabulate import tabulate
|
|
31
31
|
from termcolor import colored
|
|
32
32
|
|
|
33
|
-
from ..utils.detection_types import DP, BaseExceptionType, S, T
|
|
34
33
|
from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
|
|
35
34
|
from ..utils.logger import LoggingRecord, logger
|
|
36
35
|
from ..utils.settings import ObjectTypes
|
|
36
|
+
from ..utils.types import DP, BaseExceptionType, S, T
|
|
37
37
|
|
|
38
38
|
__all__ = ["MappingContextManager", "DefaultMapper", "maybe_get_fake_score", "LabelSummarizer", "curry"]
|
|
39
39
|
|
|
@@ -45,7 +45,7 @@ class MappingContextManager:
|
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
47
|
def __init__(
|
|
48
|
-
self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs:
|
|
48
|
+
self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: dict[str, Optional[str]]
|
|
49
49
|
) -> None:
|
|
50
50
|
"""
|
|
51
51
|
:param dp_name: A name for the datapoint to be mapped
|
|
@@ -81,6 +81,7 @@ class MappingContextManager:
|
|
|
81
81
|
AssertionError,
|
|
82
82
|
TypeError,
|
|
83
83
|
FileNotFoundError,
|
|
84
|
+
AttributeError,
|
|
84
85
|
BoundingBoxError,
|
|
85
86
|
AnnotationError,
|
|
86
87
|
ImageError,
|
|
@@ -192,7 +193,7 @@ class LabelSummarizer:
|
|
|
192
193
|
|
|
193
194
|
"""
|
|
194
195
|
|
|
195
|
-
def __init__(self, categories: Mapping[
|
|
196
|
+
def __init__(self, categories: Mapping[int, ObjectTypes]) -> None:
|
|
196
197
|
"""
|
|
197
198
|
:param categories: A dict of categories as given as in categories.get_categories().
|
|
198
199
|
"""
|
|
@@ -210,11 +211,11 @@ class LabelSummarizer:
|
|
|
210
211
|
np_item = np.asarray(item, dtype="int8")
|
|
211
212
|
self.summary += np.histogram(np_item, bins=self.hist_bins)[0]
|
|
212
213
|
|
|
213
|
-
def get_summary(self) ->
|
|
214
|
+
def get_summary(self) -> dict[int, int]:
|
|
214
215
|
"""
|
|
215
216
|
Get a dictionary with category ids and the number dumped
|
|
216
217
|
"""
|
|
217
|
-
return dict(list(zip(self.categories.keys(), self.summary.
|
|
218
|
+
return dict(list(zip(self.categories.keys(), self.summary.tolist())))
|
|
218
219
|
|
|
219
220
|
def print_summary_histogram(self, dd_logic: bool = True) -> None:
|
|
220
221
|
"""
|
|
@@ -223,11 +224,9 @@ class LabelSummarizer:
|
|
|
223
224
|
:param dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
|
|
224
225
|
"""
|
|
225
226
|
if dd_logic:
|
|
226
|
-
data = list(itertools.chain(*[[self.categories[
|
|
227
|
+
data = list(itertools.chain(*[[self.categories[i].value, v] for i, v in enumerate(self.summary, 1)]))
|
|
227
228
|
else:
|
|
228
|
-
data = list(
|
|
229
|
-
itertools.chain(*[[self.categories[str(i + 1)].value, v] for i, v in enumerate(self.summary[:-1])])
|
|
230
|
-
)
|
|
229
|
+
data = list(itertools.chain(*[[self.categories[i + 1].value, v] for i, v in enumerate(self.summary[:-1])]))
|
|
231
230
|
num_columns = min(6, len(data))
|
|
232
231
|
total_img_anns = sum(data[1::2])
|
|
233
232
|
data.extend([None] * ((num_columns - len(data) % num_columns) % num_columns))
|
deepdoctection/mapper/match.py
CHANGED
|
@@ -19,10 +19,11 @@
|
|
|
19
19
|
Module for matching detections according to various matching rules
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from typing import Any, Literal, Optional, Sequence,
|
|
22
|
+
from typing import Any, Literal, Optional, Sequence, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
from numpy.typing import NDArray
|
|
26
|
+
from scipy.spatial import distance
|
|
26
27
|
|
|
27
28
|
from ..datapoint.annotation import ImageAnnotation
|
|
28
29
|
from ..datapoint.box import iou
|
|
@@ -41,7 +42,7 @@ def match_anns_by_intersection(
|
|
|
41
42
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
42
43
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
43
44
|
max_parent_only: bool = False,
|
|
44
|
-
) ->
|
|
45
|
+
) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
|
|
45
46
|
"""
|
|
46
47
|
Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
|
|
47
48
|
indices that are above some intersection threshold. It will also return a list of all pre selected parent and child
|
|
@@ -164,3 +165,33 @@ def match_anns_by_intersection(
|
|
|
164
165
|
return [], [], [], []
|
|
165
166
|
|
|
166
167
|
return child_index, parent_index, child_anns, parent_anns
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def match_anns_by_distance(
|
|
171
|
+
dp: Image,
|
|
172
|
+
parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
173
|
+
child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
174
|
+
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
175
|
+
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
176
|
+
) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
|
|
177
|
+
"""
|
|
178
|
+
Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
|
|
179
|
+
parent and child bounding boxes. It will return the closest child for each parent. Note, that a child can be
|
|
180
|
+
assigned multiple times to different parents.
|
|
181
|
+
|
|
182
|
+
:param dp: image datapoint
|
|
183
|
+
:param parent_ann_category_names: single str or list of category names
|
|
184
|
+
:param child_ann_category_names: single str or list of category names
|
|
185
|
+
:param parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent candi-
|
|
186
|
+
dates which are not in the list.
|
|
187
|
+
:param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
188
|
+
candidates which are not in the list.
|
|
189
|
+
:return:
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
|
|
193
|
+
child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
|
|
194
|
+
child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
|
|
195
|
+
parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
|
|
196
|
+
child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
|
|
197
|
+
return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]
|
deepdoctection/mapper/misc.py
CHANGED
|
@@ -23,14 +23,14 @@ from __future__ import annotations
|
|
|
23
23
|
|
|
24
24
|
import ast
|
|
25
25
|
import os
|
|
26
|
-
from typing import
|
|
26
|
+
from typing import Mapping, Optional, Sequence, Union
|
|
27
27
|
|
|
28
28
|
from lazy_imports import try_import
|
|
29
29
|
|
|
30
30
|
from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
|
-
from ..utils.detection_types import JsonDict
|
|
33
32
|
from ..utils.fs import get_load_image_func, load_image_from_file
|
|
33
|
+
from ..utils.types import JsonDict
|
|
34
34
|
from ..utils.utils import is_file_extension
|
|
35
35
|
from .maputils import MappingContextManager, curry
|
|
36
36
|
|
|
@@ -135,7 +135,7 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
|
|
|
135
135
|
return dp
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
def image_ann_to_image(dp: Image, category_names: Union[str,
|
|
138
|
+
def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
|
|
139
139
|
"""
|
|
140
140
|
Adds `image` to annotations with given category names
|
|
141
141
|
|
|
@@ -145,7 +145,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
|
|
|
145
145
|
:return: Image
|
|
146
146
|
"""
|
|
147
147
|
|
|
148
|
-
img_anns = dp.
|
|
148
|
+
img_anns = dp.get_annotation(category_names=category_names)
|
|
149
149
|
for ann in img_anns:
|
|
150
150
|
dp.image_ann_to_image(annotation_id=ann.annotation_id, crop_image=crop_image)
|
|
151
151
|
|
|
@@ -154,7 +154,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
|
|
|
154
154
|
|
|
155
155
|
@curry
|
|
156
156
|
def maybe_ann_to_sub_image(
|
|
157
|
-
dp: Image, category_names_sub_image: Union[str,
|
|
157
|
+
dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
|
|
158
158
|
) -> Image:
|
|
159
159
|
"""
|
|
160
160
|
Assigns to sub image with given category names all annotations with given category names whose bounding box lie
|
|
@@ -196,7 +196,6 @@ def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
|
|
|
196
196
|
"""
|
|
197
197
|
|
|
198
198
|
output = str(xslt_obj(dp["xml"]))
|
|
199
|
-
output = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
|
|
200
199
|
dp.pop("xml")
|
|
201
|
-
dp["json"] = output
|
|
200
|
+
dp["json"] = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
|
|
202
201
|
return dp
|
|
@@ -20,25 +20,25 @@ Module for mapping annotations in iiitar13k style structure
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import Optional
|
|
24
24
|
|
|
25
25
|
from ..datapoint.annotation import ImageAnnotation
|
|
26
26
|
from ..datapoint.box import BoundingBox
|
|
27
27
|
from ..datapoint.image import Image
|
|
28
|
-
from ..utils.detection_types import JsonDict
|
|
29
28
|
from ..utils.fs import load_image_from_file
|
|
30
29
|
from ..utils.settings import get_type
|
|
30
|
+
from ..utils.types import JsonDict
|
|
31
31
|
from .maputils import MappingContextManager, curry, maybe_get_fake_score
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
@curry
|
|
35
35
|
def pascal_voc_dict_to_image(
|
|
36
36
|
dp: JsonDict,
|
|
37
|
-
categories_name_as_key:
|
|
37
|
+
categories_name_as_key: dict[str, int],
|
|
38
38
|
load_image: bool,
|
|
39
39
|
filter_empty_image: bool,
|
|
40
40
|
fake_score: bool,
|
|
41
|
-
category_name_mapping: Optional[
|
|
41
|
+
category_name_mapping: Optional[dict[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
43
43
|
"""
|
|
44
44
|
Map a dataset in a structure equivalent to iiitar13k annotation style to image format
|
|
@@ -23,8 +23,8 @@ import os
|
|
|
23
23
|
from typing import Mapping, Optional, Sequence
|
|
24
24
|
|
|
25
25
|
from ..datapoint import BoundingBox, Image, ImageAnnotation
|
|
26
|
-
from ..utils.
|
|
27
|
-
from ..utils.
|
|
26
|
+
from ..utils.settings import ObjectTypes, get_type
|
|
27
|
+
from ..utils.types import JsonDict, PathLikeOrStr
|
|
28
28
|
from .maputils import MappingContextManager, curry, maybe_get_fake_score
|
|
29
29
|
|
|
30
30
|
_PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
|
|
@@ -33,10 +33,10 @@ _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
|
|
|
33
33
|
@curry
|
|
34
34
|
def prodigy_to_image(
|
|
35
35
|
dp: JsonDict,
|
|
36
|
-
categories_name_as_key: Mapping[
|
|
36
|
+
categories_name_as_key: Mapping[ObjectTypes, int],
|
|
37
37
|
load_image: bool,
|
|
38
38
|
fake_score: bool,
|
|
39
|
-
path_reference_ds: Optional[
|
|
39
|
+
path_reference_ds: Optional[PathLikeOrStr] = None,
|
|
40
40
|
accept_only_answer: bool = False,
|
|
41
41
|
category_name_mapping: Optional[Mapping[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
@@ -133,7 +133,7 @@ def prodigy_to_image(
|
|
|
133
133
|
annotation = ImageAnnotation(
|
|
134
134
|
category_name=label,
|
|
135
135
|
bounding_box=bbox,
|
|
136
|
-
category_id=categories_name_as_key[label],
|
|
136
|
+
category_id=categories_name_as_key[get_type(label)],
|
|
137
137
|
score=score,
|
|
138
138
|
external_id=external_id,
|
|
139
139
|
)
|
|
@@ -163,7 +163,7 @@ def image_to_prodigy(dp: Image, category_names: Optional[Sequence[ObjectTypes]]
|
|
|
163
163
|
output["image_id"] = dp.image_id
|
|
164
164
|
|
|
165
165
|
spans = []
|
|
166
|
-
for ann in dp.
|
|
166
|
+
for ann in dp.get_annotation(category_names=category_names):
|
|
167
167
|
bounding_box = ann.get_bounding_box(dp.image_id)
|
|
168
168
|
if not bounding_box.absolute_coords:
|
|
169
169
|
bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)
|