deepdoctection 0.26__py3-none-any.whl → 0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +7 -1
- deepdoctection/analyzer/dd.py +15 -3
- deepdoctection/configs/conf_dd_one.yaml +4 -0
- deepdoctection/datapoint/convert.py +5 -10
- deepdoctection/datapoint/image.py +2 -2
- deepdoctection/datapoint/view.py +38 -18
- deepdoctection/datasets/save.py +3 -3
- deepdoctection/extern/d2detect.py +1 -2
- deepdoctection/extern/doctrocr.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
- deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
- deepdoctection/mapper/laylmstruct.py +2 -3
- deepdoctection/utils/context.py +2 -2
- deepdoctection/utils/file_utils.py +63 -26
- deepdoctection/utils/fs.py +6 -6
- deepdoctection/utils/pdf_utils.py +2 -2
- deepdoctection/utils/settings.py +8 -1
- deepdoctection/utils/transform.py +9 -9
- deepdoctection/utils/viz.py +405 -86
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/METADATA +93 -94
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/RECORD +31 -31
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/WHEEL +1 -1
- tests/analyzer/test_dd.py +6 -57
- tests/conftest.py +2 -0
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/LICENSE +0 -0
- {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/top_level.txt +0 -0
deepdoctection/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from packaging import version
|
|
|
14
14
|
from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
|
|
15
15
|
from .utils.logger import logger
|
|
16
16
|
|
|
17
|
-
__version__ = 0.
|
|
17
|
+
__version__ = 0.27
|
|
18
18
|
|
|
19
19
|
_IMPORT_STRUCTURE = {
|
|
20
20
|
"analyzer": ["get_dd_analyzer", "build_analyzer"],
|
|
@@ -311,6 +311,10 @@ _IMPORT_STRUCTURE = {
|
|
|
311
311
|
"get_fasttext_requirement",
|
|
312
312
|
"wandb_available",
|
|
313
313
|
"get_wandb_requirement",
|
|
314
|
+
"opencv_available",
|
|
315
|
+
"get_opencv_requirement",
|
|
316
|
+
"pillow_available",
|
|
317
|
+
"get_pillow_requirement",
|
|
314
318
|
"load_image_from_file",
|
|
315
319
|
"load_bytes_from_pdf_file",
|
|
316
320
|
"get_load_image_func",
|
|
@@ -378,6 +382,7 @@ _IMPORT_STRUCTURE = {
|
|
|
378
382
|
"draw_text",
|
|
379
383
|
"draw_boxes",
|
|
380
384
|
"interactive_imshow",
|
|
385
|
+
"viz_handler",
|
|
381
386
|
],
|
|
382
387
|
}
|
|
383
388
|
|
|
@@ -403,6 +408,7 @@ if tf_available():
|
|
|
403
408
|
except Exception: # pylint: disable=W0703
|
|
404
409
|
pass
|
|
405
410
|
|
|
411
|
+
|
|
406
412
|
# Direct imports for type-checking
|
|
407
413
|
if TYPE_CHECKING:
|
|
408
414
|
from .analyzer import *
|
deepdoctection/analyzer/dd.py
CHANGED
|
@@ -36,7 +36,7 @@ from ..extern.tessocr import TesseractOcrDetector
|
|
|
36
36
|
from ..extern.texocr import TextractOcrDetector
|
|
37
37
|
from ..pipe.base import PipelineComponent
|
|
38
38
|
from ..pipe.cell import DetectResultGenerator, SubImageLayoutService
|
|
39
|
-
from ..pipe.common import MatchingService, PageParsingService
|
|
39
|
+
from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
|
|
40
40
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
41
41
|
from ..pipe.layout import ImageLayoutService
|
|
42
42
|
from ..pipe.order import TextOrderService
|
|
@@ -206,7 +206,7 @@ def _build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer
|
|
|
206
206
|
profile = ModelCatalog.get_profile(weights)
|
|
207
207
|
if profile.architecture is None:
|
|
208
208
|
raise ValueError("model profile.architecture must be specified")
|
|
209
|
-
return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE)
|
|
209
|
+
return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
|
|
210
210
|
if cfg.OCR.USE_TEXTRACT:
|
|
211
211
|
credentials_kwargs = {
|
|
212
212
|
"aws_access_key_id": environ.get("ACCESS_KEY"),
|
|
@@ -225,7 +225,7 @@ def _build_doctr_word(cfg: AttrDict) -> DoctrTextlineDetector:
|
|
|
225
225
|
raise ValueError("model profile.architecture must be specified")
|
|
226
226
|
if profile.categories is None:
|
|
227
227
|
raise ValueError("model profile.categories must be specified")
|
|
228
|
-
return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE)
|
|
228
|
+
return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE, lib=cfg.LIB)
|
|
229
229
|
|
|
230
230
|
|
|
231
231
|
def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
@@ -242,6 +242,17 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
|
242
242
|
layout = _build_service(d_layout, cfg, "LAYOUT")
|
|
243
243
|
pipe_component_list.append(layout)
|
|
244
244
|
|
|
245
|
+
# setup layout nms service
|
|
246
|
+
if cfg.LAYOUT_NMS_PAIRS.COMBINATIONS and cfg.USE_LAYOUT:
|
|
247
|
+
if not isinstance(cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
|
|
248
|
+
cfg.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
|
|
249
|
+
):
|
|
250
|
+
raise ValueError("LAYOUT_NMS_PAIRS mus be a list of lists")
|
|
251
|
+
layout_nms_serivce = AnnotationNmsService(
|
|
252
|
+
cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, cfg.LAYOUT_NMS_PAIRS.THRESHOLDS, cfg.LAYOUT_NMS_PAIRS.PRIORITY
|
|
253
|
+
)
|
|
254
|
+
pipe_component_list.append(layout_nms_serivce)
|
|
255
|
+
|
|
245
256
|
# setup tables service
|
|
246
257
|
if cfg.USE_TABLE_SEGMENTATION:
|
|
247
258
|
d_item = _build_detector(cfg, "ITEM")
|
|
@@ -302,6 +313,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
|
302
313
|
)
|
|
303
314
|
pipe_component_list.append(text)
|
|
304
315
|
|
|
316
|
+
if cfg.USE_PDF_MINER or cfg.USE_OCR:
|
|
305
317
|
match = MatchingService(
|
|
306
318
|
parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
|
|
307
319
|
child_categories=LayoutType.word,
|
|
@@ -25,7 +25,6 @@ from io import BytesIO
|
|
|
25
25
|
from shutil import which
|
|
26
26
|
from typing import Any, Optional, Union, no_type_check
|
|
27
27
|
|
|
28
|
-
import cv2
|
|
29
28
|
import numpy as np
|
|
30
29
|
from numpy import uint8
|
|
31
30
|
from numpy.typing import NDArray
|
|
@@ -34,6 +33,7 @@ from PyPDF2 import PdfReader
|
|
|
34
33
|
from ..utils.detection_types import ImageType
|
|
35
34
|
from ..utils.develop import deprecated
|
|
36
35
|
from ..utils.pdf_utils import pdf_to_np_array
|
|
36
|
+
from ..utils.viz import viz_handler
|
|
37
37
|
|
|
38
38
|
__all__ = [
|
|
39
39
|
"convert_b64_to_np_array",
|
|
@@ -81,9 +81,8 @@ def convert_b64_to_np_array(image: str) -> ImageType:
|
|
|
81
81
|
:param image: An image as base64 string.
|
|
82
82
|
:return: numpy array.
|
|
83
83
|
"""
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
return np_array.astype(uint8)
|
|
84
|
+
|
|
85
|
+
return viz_handler.convert_b64_to_np(image).astype(uint8)
|
|
87
86
|
|
|
88
87
|
|
|
89
88
|
def convert_np_array_to_b64(np_image: ImageType) -> str:
|
|
@@ -93,9 +92,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
|
|
|
93
92
|
:param np_image: An image as numpy array.
|
|
94
93
|
:return: An image as base64 string.
|
|
95
94
|
"""
|
|
96
|
-
|
|
97
|
-
image = base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
|
|
98
|
-
return image
|
|
95
|
+
return viz_handler.convert_np_to_b64(np_image)
|
|
99
96
|
|
|
100
97
|
|
|
101
98
|
@no_type_check
|
|
@@ -106,9 +103,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
|
|
|
106
103
|
:param np_image: An image as numpy array.
|
|
107
104
|
:return: An image as base64 bytes.
|
|
108
105
|
"""
|
|
109
|
-
|
|
110
|
-
b_image = np_encode[1].tobytes()
|
|
111
|
-
return b_image
|
|
106
|
+
return viz_handler.encode(np_image)
|
|
112
107
|
|
|
113
108
|
|
|
114
109
|
@deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
|
|
@@ -626,8 +626,8 @@ class Image:
|
|
|
626
626
|
self.remove_image_from_lower_hierachy()
|
|
627
627
|
export_dict = self.as_dict()
|
|
628
628
|
export_dict["location"] = str(export_dict["location"])
|
|
629
|
-
if
|
|
630
|
-
export_dict["_image"] =
|
|
629
|
+
if not image_to_json:
|
|
630
|
+
export_dict["_image"] = None
|
|
631
631
|
if dry:
|
|
632
632
|
return export_dict
|
|
633
633
|
with open(path_json, "w", encoding="UTF-8") as file:
|
deepdoctection/datapoint/view.py
CHANGED
|
@@ -23,7 +23,6 @@ simplify consumption
|
|
|
23
23
|
from copy import copy
|
|
24
24
|
from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union, no_type_check
|
|
25
25
|
|
|
26
|
-
import cv2
|
|
27
26
|
import numpy as np
|
|
28
27
|
|
|
29
28
|
from ..utils.detection_types import ImageType, JsonDict, Pathlike
|
|
@@ -39,7 +38,7 @@ from ..utils.settings import (
|
|
|
39
38
|
WordType,
|
|
40
39
|
get_type,
|
|
41
40
|
)
|
|
42
|
-
from ..utils.viz import draw_boxes, interactive_imshow
|
|
41
|
+
from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
|
|
43
42
|
from .annotation import ContainerAnnotation, ImageAnnotation, SummaryAnnotation, ann_from_dict
|
|
44
43
|
from .box import BoundingBox
|
|
45
44
|
from .image import Image
|
|
@@ -415,6 +414,17 @@ class Page(Image):
|
|
|
415
414
|
text_container: ObjectTypes
|
|
416
415
|
floating_text_block_categories: List[ObjectTypes]
|
|
417
416
|
image_orig: Image
|
|
417
|
+
_attribute_names: Set[str] = {
|
|
418
|
+
"text",
|
|
419
|
+
"chunks",
|
|
420
|
+
"tables",
|
|
421
|
+
"layouts",
|
|
422
|
+
"words",
|
|
423
|
+
"file_name",
|
|
424
|
+
"location",
|
|
425
|
+
"document_id",
|
|
426
|
+
"page_number",
|
|
427
|
+
}
|
|
418
428
|
|
|
419
429
|
@no_type_check
|
|
420
430
|
def get_annotation(
|
|
@@ -734,7 +744,9 @@ class Page(Image):
|
|
|
734
744
|
)
|
|
735
745
|
else:
|
|
736
746
|
img = draw_boxes(self.image, boxes, category_names_list)
|
|
737
|
-
|
|
747
|
+
scale_fx, scale_fy = 1.3, 1.3
|
|
748
|
+
scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
|
|
749
|
+
img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
|
|
738
750
|
else:
|
|
739
751
|
img = self.image
|
|
740
752
|
|
|
@@ -744,24 +756,32 @@ class Page(Image):
|
|
|
744
756
|
return img
|
|
745
757
|
return None
|
|
746
758
|
|
|
747
|
-
@
|
|
748
|
-
def get_attribute_names() -> Set[str]:
|
|
759
|
+
@classmethod
|
|
760
|
+
def get_attribute_names(cls) -> Set[str]:
|
|
749
761
|
"""
|
|
750
762
|
:return: A set of registered attributes.
|
|
751
763
|
"""
|
|
752
|
-
return set(PageType).union(
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
"
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
764
|
+
return set(PageType).union(cls._attribute_names)
|
|
765
|
+
|
|
766
|
+
@classmethod
|
|
767
|
+
def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
|
|
768
|
+
"""
|
|
769
|
+
Adding a custom attribute name to a Page class.
|
|
770
|
+
|
|
771
|
+
**Example:**
|
|
772
|
+
|
|
773
|
+
Page.add_attribute_name("foo")
|
|
774
|
+
|
|
775
|
+
page = Page.from_image(...)
|
|
776
|
+
print(page.foo)
|
|
777
|
+
|
|
778
|
+
Note, that the attribute must be registered as a valid `ObjectTypes`
|
|
779
|
+
|
|
780
|
+
:param attribute_name: attribute name to add
|
|
781
|
+
"""
|
|
782
|
+
|
|
783
|
+
attribute_name = get_type(attribute_name)
|
|
784
|
+
cls._attribute_names.add(attribute_name.value)
|
|
765
785
|
|
|
766
786
|
def save(
|
|
767
787
|
self,
|
deepdoctection/datasets/save.py
CHANGED
|
@@ -23,13 +23,12 @@ import json
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Optional
|
|
25
25
|
|
|
26
|
-
from cv2 import imwrite
|
|
27
|
-
|
|
28
26
|
from ..dataflow import DataFlow, MapData, SerializerJsonlines
|
|
29
27
|
from ..datapoint.convert import convert_b64_to_np_array
|
|
30
28
|
from ..datapoint.image import Image
|
|
31
29
|
from ..utils.detection_types import JsonDict, Pathlike
|
|
32
30
|
from ..utils.fs import mkdir_p
|
|
31
|
+
from ..utils.viz import viz_handler
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
def dataflow_to_json(
|
|
@@ -84,7 +83,8 @@ def dataflow_to_json(
|
|
|
84
83
|
target_file_png = path / "image" / (dp["file_name"].split(".")[0] + ".png")
|
|
85
84
|
image = dp.pop("_image")
|
|
86
85
|
image = convert_b64_to_np_array(image)
|
|
87
|
-
|
|
86
|
+
|
|
87
|
+
viz_handler.write_image(str(target_file_png), image)
|
|
88
88
|
|
|
89
89
|
with open(target_file, "w", encoding="UTF-8") as file:
|
|
90
90
|
json.dump(dp, file)
|
|
@@ -23,7 +23,6 @@ from copy import copy
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence
|
|
25
25
|
|
|
26
|
-
import cv2
|
|
27
26
|
import numpy as np
|
|
28
27
|
|
|
29
28
|
from ..utils.detection_types import ImageType, Requirement
|
|
@@ -130,7 +129,7 @@ def d2_jit_predict_image(
|
|
|
130
129
|
keep = batched_nms(boxes, scores, class_masks, nms_thresh_class_agnostic).cpu()
|
|
131
130
|
|
|
132
131
|
# The exported model does not contain the final resize step, so we need to add it manually here
|
|
133
|
-
inverse_resizer = ResizeTransform(new_height, new_width, height, width,
|
|
132
|
+
inverse_resizer = ResizeTransform(new_height, new_width, height, width, "VIZ")
|
|
134
133
|
np_boxes = np.reshape(boxes.cpu().numpy(), (-1, 2))
|
|
135
134
|
np_boxes = inverse_resizer.apply_coords(np_boxes)
|
|
136
135
|
np_boxes = np.reshape(np_boxes, (-1, 4))
|
|
@@ -62,14 +62,14 @@ def _set_device_str(device: Optional[str] = None) -> str:
|
|
|
62
62
|
return device
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def _load_model(path_weights: str, doctr_predictor: Any, device: str) -> None:
|
|
66
|
-
if pytorch_available():
|
|
65
|
+
def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
|
|
66
|
+
if lib == "PT" and pytorch_available():
|
|
67
67
|
state_dict = torch.load(path_weights, map_location=device)
|
|
68
68
|
for key in list(state_dict.keys()):
|
|
69
69
|
state_dict["model." + key] = state_dict.pop(key)
|
|
70
70
|
doctr_predictor.load_state_dict(state_dict)
|
|
71
71
|
doctr_predictor.to(device)
|
|
72
|
-
elif tf_available():
|
|
72
|
+
elif lib == "TF" and tf_available():
|
|
73
73
|
# Unzip the archive
|
|
74
74
|
params_path = Path(path_weights).parent
|
|
75
75
|
is_zip_path = path_weights.endswith(".zip")
|
|
@@ -99,7 +99,7 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
|
|
|
99
99
|
DetectionResult(
|
|
100
100
|
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
|
|
101
101
|
)
|
|
102
|
-
for box in raw_output[0]
|
|
102
|
+
for box in raw_output[0]["words"]
|
|
103
103
|
]
|
|
104
104
|
return detection_results
|
|
105
105
|
|
|
@@ -173,7 +173,9 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
173
173
|
path_weights: str,
|
|
174
174
|
categories: Mapping[str, TypeOrStr],
|
|
175
175
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
176
|
+
lib: str = "TF",
|
|
176
177
|
) -> None:
|
|
178
|
+
self.lib = lib
|
|
177
179
|
self.name = "doctr_text_detector"
|
|
178
180
|
self.architecture = architecture
|
|
179
181
|
self.path_weights = path_weights
|
|
@@ -205,14 +207,14 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
205
207
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
|
|
206
208
|
|
|
207
209
|
def clone(self) -> PredictorBase:
|
|
208
|
-
return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input)
|
|
210
|
+
return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
|
|
209
211
|
|
|
210
212
|
def possible_categories(self) -> List[ObjectTypes]:
|
|
211
213
|
return [LayoutType.word]
|
|
212
214
|
|
|
213
215
|
def load_model(self) -> None:
|
|
214
216
|
"""Loading model weights"""
|
|
215
|
-
_load_model(self.path_weights, self.doctr_predictor, self.device)
|
|
217
|
+
_load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
|
|
216
218
|
|
|
217
219
|
|
|
218
220
|
class DoctrTextRecognizer(TextRecognizer):
|
|
@@ -252,7 +254,10 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
252
254
|
|
|
253
255
|
"""
|
|
254
256
|
|
|
255
|
-
def __init__(
|
|
257
|
+
def __init__(
|
|
258
|
+
self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None, lib: str = "TF"
|
|
259
|
+
) -> None:
|
|
260
|
+
self.lib = lib
|
|
256
261
|
self.name = "doctr_text_recognizer"
|
|
257
262
|
self.architecture = architecture
|
|
258
263
|
self.path_weights = path_weights
|
|
@@ -281,8 +286,8 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
281
286
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
|
|
282
287
|
|
|
283
288
|
def clone(self) -> PredictorBase:
|
|
284
|
-
return self.__class__(self.architecture, self.path_weights, self.device_input)
|
|
289
|
+
return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
|
|
285
290
|
|
|
286
291
|
def load_model(self) -> None:
|
|
287
292
|
"""Loading model weights"""
|
|
288
|
-
_load_model(self.path_weights, self.doctr_predictor, self.device)
|
|
293
|
+
_load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
|
|
@@ -10,7 +10,6 @@ This file is modified from
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
import cv2
|
|
14
13
|
import numpy as np
|
|
15
14
|
from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
|
|
16
15
|
|
|
@@ -25,11 +24,11 @@ class CustomResize(ImageAugmentor):
|
|
|
25
24
|
Try resizing the shortest edge to a certain number while avoiding the longest edge to exceed max_size.
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
|
-
def __init__(self, short_edge_length, max_size, interp=
|
|
27
|
+
def __init__(self, short_edge_length, max_size, interp=1):
|
|
29
28
|
"""
|
|
30
29
|
:param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
|
|
31
30
|
:param max_size: maximum allowed longest edge length.
|
|
32
|
-
:param interp:
|
|
31
|
+
:param interp: Interpolation mode. We use Tensorpack's internal `ResizeTransform`, that always requires OpenCV
|
|
33
32
|
"""
|
|
34
33
|
super().__init__()
|
|
35
34
|
if isinstance(short_edge_length, int):
|
|
@@ -165,7 +165,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
|
|
|
165
165
|
"""
|
|
166
166
|
n_in = l.shape[1]
|
|
167
167
|
if n_in != n_out: # change dimension when channel is not the same
|
|
168
|
-
return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation)
|
|
168
|
+
return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation) # pylint: disable=E1124
|
|
169
169
|
return l
|
|
170
170
|
|
|
171
171
|
|
|
@@ -181,12 +181,12 @@ def resnet_bottleneck(l, ch_out, stride, cfg):
|
|
|
181
181
|
"""
|
|
182
182
|
shortcut = l
|
|
183
183
|
|
|
184
|
-
l = Conv2D("conv1", l, ch_out, 1, strides=1)
|
|
184
|
+
l = Conv2D("conv1", l, ch_out, 1, strides=1) # pylint: disable=E1124
|
|
185
185
|
if stride == 2:
|
|
186
186
|
l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
|
|
187
|
-
l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID")
|
|
187
|
+
l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID") # pylint: disable=E1124
|
|
188
188
|
else:
|
|
189
|
-
l = Conv2D("conv2", l, ch_out, 3, strides=stride)
|
|
189
|
+
l = Conv2D("conv2", l, ch_out, 3, strides=stride) # pylint: disable=E1124
|
|
190
190
|
if cfg.BACKBONE.NORM != "None":
|
|
191
191
|
l = Conv2D("conv3", l, ch_out * 4, 1, activation=get_norm(cfg, zero_init=True))
|
|
192
192
|
else:
|
|
@@ -263,9 +263,9 @@ def resnet_fpn_backbone(image, cfg):
|
|
|
263
263
|
),
|
|
264
264
|
)
|
|
265
265
|
l.set_shape([None, chan, None, None])
|
|
266
|
-
l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID")
|
|
266
|
+
l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID") # pylint: disable=E1124
|
|
267
267
|
l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
|
|
268
|
-
l = MaxPooling("pool0", l, 3, strides=2, padding="VALID")
|
|
268
|
+
l = MaxPooling("pool0", l, 3, strides=2, padding="VALID") # pylint: disable=E1124
|
|
269
269
|
|
|
270
270
|
bottleneck = resnet_bottleneck if cfg.BACKBONE.BOTTLENECK == "resnet" else resnext32x4d_bottleneck
|
|
271
271
|
with backbone_scope(cfg=cfg, freeze=freeze_at > 1):
|
|
@@ -98,14 +98,14 @@ class GeneralizedRCNN(ModelDescWithConfig):
|
|
|
98
98
|
|
|
99
99
|
image = self.preprocess(inputs["image"]) # 1CHW
|
|
100
100
|
|
|
101
|
-
features = self.backbone(image)
|
|
101
|
+
features = self.backbone(image) # pylint: disable=E1101
|
|
102
102
|
anchor_inputs = {k: v for k, v in inputs.items() if k.startswith("anchor_")}
|
|
103
|
-
proposals, rpn_losses = self.rpn(image, features, anchor_inputs)
|
|
103
|
+
proposals, rpn_losses = self.rpn(image, features, anchor_inputs) # pylint: disable=E1101
|
|
104
104
|
|
|
105
105
|
targets = [inputs[k] for k in ["gt_boxes", "gt_labels", "gt_masks"] if k in inputs]
|
|
106
106
|
gt_boxes_area = tf.reduce_mean(tf_area(inputs["gt_boxes"]), name="mean_gt_box_area")
|
|
107
107
|
add_moving_summary(gt_boxes_area)
|
|
108
|
-
head_losses = self.roi_heads(image, features, proposals, targets)
|
|
108
|
+
head_losses = self.roi_heads(image, features, proposals, targets) # pylint: disable=E1101
|
|
109
109
|
|
|
110
110
|
if self.training:
|
|
111
111
|
wd_cost = regularize_cost(".*/W", l2_regularizer(self.cfg.TRAIN.WEIGHT_DECAY), name="wd_cost")
|
|
@@ -63,7 +63,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
|
|
|
63
63
|
x = tf.transpose(x, [0, 3, 1, 2])
|
|
64
64
|
return x
|
|
65
65
|
except AttributeError:
|
|
66
|
-
return FixedUnPooling(
|
|
66
|
+
return FixedUnPooling(
|
|
67
|
+
name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first"
|
|
68
|
+
) # pylint: disable=E1124
|
|
67
69
|
|
|
68
70
|
with argscope(
|
|
69
71
|
Conv2D,
|
|
@@ -85,7 +87,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
|
|
|
85
87
|
p2345 = [Conv2D(f"posthoc_3x3_p{i + 2}", c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])]
|
|
86
88
|
if use_gn:
|
|
87
89
|
p2345 = [GroupNorm(f"gn_p{i + 2}", c) for i, c in enumerate(p2345)]
|
|
88
|
-
p6 = MaxPooling(
|
|
90
|
+
p6 = MaxPooling(
|
|
91
|
+
"maxpool_p6", p2345[-1], pool_size=1, strides=2, data_format="channels_first", padding="VALID"
|
|
92
|
+
) # pylint: disable=E1124
|
|
89
93
|
return p2345 + [p6]
|
|
90
94
|
|
|
91
95
|
|
|
@@ -267,8 +267,10 @@ def fastrcnn_2fc_head(feature, cfg):
|
|
|
267
267
|
|
|
268
268
|
dim = cfg.FPN.FRCNN_FC_HEAD_DIM
|
|
269
269
|
init = tfv1.variance_scaling_initializer()
|
|
270
|
-
hidden = FullyConnected(
|
|
271
|
-
|
|
270
|
+
hidden = FullyConnected(
|
|
271
|
+
"fc6", feature, dim, kernel_initializer=init, activation=tf.nn.relu
|
|
272
|
+
) # pylint: disable=E1124
|
|
273
|
+
hidden = FullyConnected("fc7", hidden, dim, kernel_initializer=init, activation=tf.nn.relu) # pylint: disable=E1124
|
|
272
274
|
return hidden
|
|
273
275
|
|
|
274
276
|
|
|
@@ -298,7 +300,7 @@ def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None, **kwargs): # pylint:
|
|
|
298
300
|
l = Conv2D(f"conv{k}", l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
|
|
299
301
|
if norm is not None:
|
|
300
302
|
l = GroupNorm(f"gn{k}", l)
|
|
301
|
-
l = FullyConnected(
|
|
303
|
+
l = FullyConnected( # pylint: disable=E1124
|
|
302
304
|
"fc",
|
|
303
305
|
l,
|
|
304
306
|
cfg.FPN.FRCNN_FC_HEAD_DIM,
|
|
@@ -88,7 +88,9 @@ def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None, **kwargs)
|
|
|
88
88
|
l = Conv2D(f"fcn{k}", l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
|
|
89
89
|
if norm is not None:
|
|
90
90
|
l = GroupNorm(f"gn{k}", l)
|
|
91
|
-
l = Conv2DTranspose(
|
|
91
|
+
l = Conv2DTranspose(
|
|
92
|
+
"deconv", l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu
|
|
93
|
+
) # pylint: disable=E1124
|
|
92
94
|
l = Conv2D("conv", l, num_category, 1, kernel_initializer=tf.random_normal_initializer(stddev=0.001))
|
|
93
95
|
return l
|
|
94
96
|
|
|
@@ -79,6 +79,7 @@ def _paste_mask(box, mask, shape, mrcnn_accurate_paste):
|
|
|
79
79
|
|
|
80
80
|
# rounding errors could happen here, because masks were not originally computed for this shape.
|
|
81
81
|
# but it's hard to do better, because the network does not know the "original" scale
|
|
82
|
+
|
|
82
83
|
mask = (cv2.resize(mask, (w, h)) > 0.5).astype("uint8")
|
|
83
84
|
ret = np.zeros(shape, dtype="uint8")
|
|
84
85
|
ret[y_0 : y_1 + 1, x_0 : x_1 + 1] = mask
|
|
@@ -26,7 +26,6 @@ from typing import Any, Callable, Dict, List, Literal, NewType, Optional, Sequen
|
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
28
|
import numpy.typing as npt
|
|
29
|
-
from cv2 import INTER_LINEAR
|
|
30
29
|
|
|
31
30
|
from ..datapoint.annotation import ContainerAnnotation
|
|
32
31
|
from ..datapoint.convert import box_to_point4, point4_to_box
|
|
@@ -179,11 +178,11 @@ def image_to_raw_layoutlm_features(
|
|
|
179
178
|
|
|
180
179
|
boxes = box_to_point4(boxes)
|
|
181
180
|
|
|
182
|
-
resizer = ResizeTransform(dp.height, dp.width, input_height, input_width,
|
|
181
|
+
resizer = ResizeTransform(dp.height, dp.width, input_height, input_width, "VIZ")
|
|
183
182
|
|
|
184
183
|
if dp.image is not None:
|
|
185
184
|
if image_width != input_width or image_height != input_height:
|
|
186
|
-
image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width,
|
|
185
|
+
image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width, "VIZ")
|
|
187
186
|
image = image_only_resizer.apply_image(dp.image)
|
|
188
187
|
else:
|
|
189
188
|
image = resizer.apply_image(dp.image)
|
deepdoctection/utils/context.py
CHANGED
|
@@ -29,10 +29,10 @@ from time import perf_counter as timer
|
|
|
29
29
|
from typing import Any, Generator, Iterator, Optional, Tuple, Union
|
|
30
30
|
|
|
31
31
|
import numpy as np
|
|
32
|
-
from cv2 import imwrite
|
|
33
32
|
|
|
34
33
|
from .detection_types import ImageType
|
|
35
34
|
from .logger import logger
|
|
35
|
+
from .viz import viz_handler
|
|
36
36
|
|
|
37
37
|
__all__ = ["timeout_manager", "save_tmp_file", "timed_operation"]
|
|
38
38
|
|
|
@@ -89,7 +89,7 @@ def save_tmp_file(image: Union[str, ImageType, bytes], prefix: str) -> Iterator[
|
|
|
89
89
|
return
|
|
90
90
|
if isinstance(image, (np.ndarray, np.generic)):
|
|
91
91
|
input_file_name = file.name + ".PNG"
|
|
92
|
-
|
|
92
|
+
viz_handler.write_image(input_file_name, image)
|
|
93
93
|
yield file.name, input_file_name
|
|
94
94
|
if isinstance(image, bytes):
|
|
95
95
|
input_file_name = file.name
|