deepdoctection 0.26__py3-none-any.whl → 0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (31) hide show
  1. deepdoctection/__init__.py +7 -1
  2. deepdoctection/analyzer/dd.py +15 -3
  3. deepdoctection/configs/conf_dd_one.yaml +4 -0
  4. deepdoctection/datapoint/convert.py +5 -10
  5. deepdoctection/datapoint/image.py +2 -2
  6. deepdoctection/datapoint/view.py +38 -18
  7. deepdoctection/datasets/save.py +3 -3
  8. deepdoctection/extern/d2detect.py +1 -2
  9. deepdoctection/extern/doctrocr.py +14 -9
  10. deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
  11. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
  12. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
  13. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
  14. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
  15. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
  16. deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
  17. deepdoctection/mapper/laylmstruct.py +2 -3
  18. deepdoctection/utils/context.py +2 -2
  19. deepdoctection/utils/file_utils.py +63 -26
  20. deepdoctection/utils/fs.py +6 -6
  21. deepdoctection/utils/pdf_utils.py +2 -2
  22. deepdoctection/utils/settings.py +8 -1
  23. deepdoctection/utils/transform.py +9 -9
  24. deepdoctection/utils/viz.py +405 -86
  25. {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/METADATA +93 -94
  26. {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/RECORD +31 -31
  27. {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/WHEEL +1 -1
  28. tests/analyzer/test_dd.py +6 -57
  29. tests/conftest.py +2 -0
  30. {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/LICENSE +0 -0
  31. {deepdoctection-0.26.dist-info → deepdoctection-0.27.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ from packaging import version
14
14
  from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
15
15
  from .utils.logger import logger
16
16
 
17
- __version__ = 0.26
17
+ __version__ = 0.27
18
18
 
19
19
  _IMPORT_STRUCTURE = {
20
20
  "analyzer": ["get_dd_analyzer", "build_analyzer"],
@@ -311,6 +311,10 @@ _IMPORT_STRUCTURE = {
311
311
  "get_fasttext_requirement",
312
312
  "wandb_available",
313
313
  "get_wandb_requirement",
314
+ "opencv_available",
315
+ "get_opencv_requirement",
316
+ "pillow_available",
317
+ "get_pillow_requirement",
314
318
  "load_image_from_file",
315
319
  "load_bytes_from_pdf_file",
316
320
  "get_load_image_func",
@@ -378,6 +382,7 @@ _IMPORT_STRUCTURE = {
378
382
  "draw_text",
379
383
  "draw_boxes",
380
384
  "interactive_imshow",
385
+ "viz_handler",
381
386
  ],
382
387
  }
383
388
 
@@ -403,6 +408,7 @@ if tf_available():
403
408
  except Exception: # pylint: disable=W0703
404
409
  pass
405
410
 
411
+
406
412
  # Direct imports for type-checking
407
413
  if TYPE_CHECKING:
408
414
  from .analyzer import *
@@ -36,7 +36,7 @@ from ..extern.tessocr import TesseractOcrDetector
36
36
  from ..extern.texocr import TextractOcrDetector
37
37
  from ..pipe.base import PipelineComponent
38
38
  from ..pipe.cell import DetectResultGenerator, SubImageLayoutService
39
- from ..pipe.common import MatchingService, PageParsingService
39
+ from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
40
40
  from ..pipe.doctectionpipe import DoctectionPipe
41
41
  from ..pipe.layout import ImageLayoutService
42
42
  from ..pipe.order import TextOrderService
@@ -206,7 +206,7 @@ def _build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer
206
206
  profile = ModelCatalog.get_profile(weights)
207
207
  if profile.architecture is None:
208
208
  raise ValueError("model profile.architecture must be specified")
209
- return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE)
209
+ return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
210
210
  if cfg.OCR.USE_TEXTRACT:
211
211
  credentials_kwargs = {
212
212
  "aws_access_key_id": environ.get("ACCESS_KEY"),
@@ -225,7 +225,7 @@ def _build_doctr_word(cfg: AttrDict) -> DoctrTextlineDetector:
225
225
  raise ValueError("model profile.architecture must be specified")
226
226
  if profile.categories is None:
227
227
  raise ValueError("model profile.categories must be specified")
228
- return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE)
228
+ return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE, lib=cfg.LIB)
229
229
 
230
230
 
231
231
  def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
@@ -242,6 +242,17 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
242
242
  layout = _build_service(d_layout, cfg, "LAYOUT")
243
243
  pipe_component_list.append(layout)
244
244
 
245
+ # setup layout nms service
246
+ if cfg.LAYOUT_NMS_PAIRS.COMBINATIONS and cfg.USE_LAYOUT:
247
+ if not isinstance(cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
248
+ cfg.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
249
+ ):
250
+ raise ValueError("LAYOUT_NMS_PAIRS mus be a list of lists")
251
+ layout_nms_serivce = AnnotationNmsService(
252
+ cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, cfg.LAYOUT_NMS_PAIRS.THRESHOLDS, cfg.LAYOUT_NMS_PAIRS.PRIORITY
253
+ )
254
+ pipe_component_list.append(layout_nms_serivce)
255
+
245
256
  # setup tables service
246
257
  if cfg.USE_TABLE_SEGMENTATION:
247
258
  d_item = _build_detector(cfg, "ITEM")
@@ -302,6 +313,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
302
313
  )
303
314
  pipe_component_list.append(text)
304
315
 
316
+ if cfg.USE_PDF_MINER or cfg.USE_OCR:
305
317
  match = MatchingService(
306
318
  parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
307
319
  child_categories=LayoutType.word,
@@ -33,6 +33,10 @@ PT:
33
33
  WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
34
34
  WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
35
35
  FILTER:
36
+ LAYOUT_NMS_PAIRS:
37
+ COMBINATIONS:
38
+ THRESHOLDS:
39
+ PRIORITY:
36
40
  SEGMENTATION:
37
41
  ASSIGNMENT_RULE: ioa
38
42
  THRESHOLD_ROWS: 0.4
@@ -25,7 +25,6 @@ from io import BytesIO
25
25
  from shutil import which
26
26
  from typing import Any, Optional, Union, no_type_check
27
27
 
28
- import cv2
29
28
  import numpy as np
30
29
  from numpy import uint8
31
30
  from numpy.typing import NDArray
@@ -34,6 +33,7 @@ from PyPDF2 import PdfReader
34
33
  from ..utils.detection_types import ImageType
35
34
  from ..utils.develop import deprecated
36
35
  from ..utils.pdf_utils import pdf_to_np_array
36
+ from ..utils.viz import viz_handler
37
37
 
38
38
  __all__ = [
39
39
  "convert_b64_to_np_array",
@@ -81,9 +81,8 @@ def convert_b64_to_np_array(image: str) -> ImageType:
81
81
  :param image: An image as base64 string.
82
82
  :return: numpy array.
83
83
  """
84
- np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
85
- np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
86
- return np_array.astype(uint8)
84
+
85
+ return viz_handler.convert_b64_to_np(image).astype(uint8)
87
86
 
88
87
 
89
88
  def convert_np_array_to_b64(np_image: ImageType) -> str:
@@ -93,9 +92,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
93
92
  :param np_image: An image as numpy array.
94
93
  :return: An image as base64 string.
95
94
  """
96
- np_encode = cv2.imencode(".png", np_image)
97
- image = base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
98
- return image
95
+ return viz_handler.convert_np_to_b64(np_image)
99
96
 
100
97
 
101
98
  @no_type_check
@@ -106,9 +103,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
106
103
  :param np_image: An image as numpy array.
107
104
  :return: An image as base64 bytes.
108
105
  """
109
- np_encode = cv2.imencode(".png", np_image)
110
- b_image = np_encode[1].tobytes()
111
- return b_image
106
+ return viz_handler.encode(np_image)
112
107
 
113
108
 
114
109
  @deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
@@ -626,8 +626,8 @@ class Image:
626
626
  self.remove_image_from_lower_hierachy()
627
627
  export_dict = self.as_dict()
628
628
  export_dict["location"] = str(export_dict["location"])
629
- if image_to_json and self.image is not None:
630
- export_dict["_image"] = convert_np_array_to_b64(self.image)
629
+ if not image_to_json:
630
+ export_dict["_image"] = None
631
631
  if dry:
632
632
  return export_dict
633
633
  with open(path_json, "w", encoding="UTF-8") as file:
@@ -23,7 +23,6 @@ simplify consumption
23
23
  from copy import copy
24
24
  from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union, no_type_check
25
25
 
26
- import cv2
27
26
  import numpy as np
28
27
 
29
28
  from ..utils.detection_types import ImageType, JsonDict, Pathlike
@@ -39,7 +38,7 @@ from ..utils.settings import (
39
38
  WordType,
40
39
  get_type,
41
40
  )
42
- from ..utils.viz import draw_boxes, interactive_imshow
41
+ from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
43
42
  from .annotation import ContainerAnnotation, ImageAnnotation, SummaryAnnotation, ann_from_dict
44
43
  from .box import BoundingBox
45
44
  from .image import Image
@@ -415,6 +414,17 @@ class Page(Image):
415
414
  text_container: ObjectTypes
416
415
  floating_text_block_categories: List[ObjectTypes]
417
416
  image_orig: Image
417
+ _attribute_names: Set[str] = {
418
+ "text",
419
+ "chunks",
420
+ "tables",
421
+ "layouts",
422
+ "words",
423
+ "file_name",
424
+ "location",
425
+ "document_id",
426
+ "page_number",
427
+ }
418
428
 
419
429
  @no_type_check
420
430
  def get_annotation(
@@ -734,7 +744,9 @@ class Page(Image):
734
744
  )
735
745
  else:
736
746
  img = draw_boxes(self.image, boxes, category_names_list)
737
- img = cv2.resize(img, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
747
+ scale_fx, scale_fy = 1.3, 1.3
748
+ scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
749
+ img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
738
750
  else:
739
751
  img = self.image
740
752
 
@@ -744,24 +756,32 @@ class Page(Image):
744
756
  return img
745
757
  return None
746
758
 
747
- @staticmethod
748
- def get_attribute_names() -> Set[str]:
759
+ @classmethod
760
+ def get_attribute_names(cls) -> Set[str]:
749
761
  """
750
762
  :return: A set of registered attributes.
751
763
  """
752
- return set(PageType).union(
753
- {
754
- "text",
755
- "chunks",
756
- "tables",
757
- "layouts",
758
- "words",
759
- "file_name",
760
- "location",
761
- "document_id",
762
- "page_number",
763
- }
764
- )
764
+ return set(PageType).union(cls._attribute_names)
765
+
766
+ @classmethod
767
+ def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
768
+ """
769
+ Adding a custom attribute name to a Page class.
770
+
771
+ **Example:**
772
+
773
+ Page.add_attribute_name("foo")
774
+
775
+ page = Page.from_image(...)
776
+ print(page.foo)
777
+
778
+ Note, that the attribute must be registered as a valid `ObjectTypes`
779
+
780
+ :param attribute_name: attribute name to add
781
+ """
782
+
783
+ attribute_name = get_type(attribute_name)
784
+ cls._attribute_names.add(attribute_name.value)
765
785
 
766
786
  def save(
767
787
  self,
@@ -23,13 +23,12 @@ import json
23
23
  from pathlib import Path
24
24
  from typing import Optional
25
25
 
26
- from cv2 import imwrite
27
-
28
26
  from ..dataflow import DataFlow, MapData, SerializerJsonlines
29
27
  from ..datapoint.convert import convert_b64_to_np_array
30
28
  from ..datapoint.image import Image
31
29
  from ..utils.detection_types import JsonDict, Pathlike
32
30
  from ..utils.fs import mkdir_p
31
+ from ..utils.viz import viz_handler
33
32
 
34
33
 
35
34
  def dataflow_to_json(
@@ -84,7 +83,8 @@ def dataflow_to_json(
84
83
  target_file_png = path / "image" / (dp["file_name"].split(".")[0] + ".png")
85
84
  image = dp.pop("_image")
86
85
  image = convert_b64_to_np_array(image)
87
- imwrite(str(target_file_png), image)
86
+
87
+ viz_handler.write_image(str(target_file_png), image)
88
88
 
89
89
  with open(target_file, "w", encoding="UTF-8") as file:
90
90
  json.dump(dp, file)
@@ -23,7 +23,6 @@ from copy import copy
23
23
  from pathlib import Path
24
24
  from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence
25
25
 
26
- import cv2
27
26
  import numpy as np
28
27
 
29
28
  from ..utils.detection_types import ImageType, Requirement
@@ -130,7 +129,7 @@ def d2_jit_predict_image(
130
129
  keep = batched_nms(boxes, scores, class_masks, nms_thresh_class_agnostic).cpu()
131
130
 
132
131
  # The exported model does not contain the final resize step, so we need to add it manually here
133
- inverse_resizer = ResizeTransform(new_height, new_width, height, width, cv2.INTER_LINEAR)
132
+ inverse_resizer = ResizeTransform(new_height, new_width, height, width, "VIZ")
134
133
  np_boxes = np.reshape(boxes.cpu().numpy(), (-1, 2))
135
134
  np_boxes = inverse_resizer.apply_coords(np_boxes)
136
135
  np_boxes = np.reshape(np_boxes, (-1, 4))
@@ -62,14 +62,14 @@ def _set_device_str(device: Optional[str] = None) -> str:
62
62
  return device
63
63
 
64
64
 
65
- def _load_model(path_weights: str, doctr_predictor: Any, device: str) -> None:
66
- if pytorch_available():
65
+ def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
66
+ if lib == "PT" and pytorch_available():
67
67
  state_dict = torch.load(path_weights, map_location=device)
68
68
  for key in list(state_dict.keys()):
69
69
  state_dict["model." + key] = state_dict.pop(key)
70
70
  doctr_predictor.load_state_dict(state_dict)
71
71
  doctr_predictor.to(device)
72
- elif tf_available():
72
+ elif lib == "TF" and tf_available():
73
73
  # Unzip the archive
74
74
  params_path = Path(path_weights).parent
75
75
  is_zip_path = path_weights.endswith(".zip")
@@ -99,7 +99,7 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
99
99
  DetectionResult(
100
100
  box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
101
101
  )
102
- for box in raw_output[0]
102
+ for box in raw_output[0]["words"]
103
103
  ]
104
104
  return detection_results
105
105
 
@@ -173,7 +173,9 @@ class DoctrTextlineDetector(ObjectDetector):
173
173
  path_weights: str,
174
174
  categories: Mapping[str, TypeOrStr],
175
175
  device: Optional[Literal["cpu", "cuda"]] = None,
176
+ lib: str = "TF",
176
177
  ) -> None:
178
+ self.lib = lib
177
179
  self.name = "doctr_text_detector"
178
180
  self.architecture = architecture
179
181
  self.path_weights = path_weights
@@ -205,14 +207,14 @@ class DoctrTextlineDetector(ObjectDetector):
205
207
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
206
208
 
207
209
  def clone(self) -> PredictorBase:
208
- return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input)
210
+ return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
209
211
 
210
212
  def possible_categories(self) -> List[ObjectTypes]:
211
213
  return [LayoutType.word]
212
214
 
213
215
  def load_model(self) -> None:
214
216
  """Loading model weights"""
215
- _load_model(self.path_weights, self.doctr_predictor, self.device)
217
+ _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
216
218
 
217
219
 
218
220
  class DoctrTextRecognizer(TextRecognizer):
@@ -252,7 +254,10 @@ class DoctrTextRecognizer(TextRecognizer):
252
254
 
253
255
  """
254
256
 
255
- def __init__(self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None) -> None:
257
+ def __init__(
258
+ self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None, lib: str = "TF"
259
+ ) -> None:
260
+ self.lib = lib
256
261
  self.name = "doctr_text_recognizer"
257
262
  self.architecture = architecture
258
263
  self.path_weights = path_weights
@@ -281,8 +286,8 @@ class DoctrTextRecognizer(TextRecognizer):
281
286
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
282
287
 
283
288
  def clone(self) -> PredictorBase:
284
- return self.__class__(self.architecture, self.path_weights, self.device_input)
289
+ return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
285
290
 
286
291
  def load_model(self) -> None:
287
292
  """Loading model weights"""
288
- _load_model(self.path_weights, self.doctr_predictor, self.device)
293
+ _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
@@ -10,7 +10,6 @@ This file is modified from
10
10
  """
11
11
 
12
12
 
13
- import cv2
14
13
  import numpy as np
15
14
  from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
16
15
 
@@ -25,11 +24,11 @@ class CustomResize(ImageAugmentor):
25
24
  Try resizing the shortest edge to a certain number while avoiding the longest edge to exceed max_size.
26
25
  """
27
26
 
28
- def __init__(self, short_edge_length, max_size, interp=cv2.INTER_LINEAR):
27
+ def __init__(self, short_edge_length, max_size, interp=1):
29
28
  """
30
29
  :param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
31
30
  :param max_size: maximum allowed longest edge length.
32
- :param interp: cv2 interpolation mode
31
+ :param interp: Interpolation mode. We use Tensorpack's internal `ResizeTransform`, that always requires OpenCV
33
32
  """
34
33
  super().__init__()
35
34
  if isinstance(short_edge_length, int):
@@ -165,7 +165,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
165
165
  """
166
166
  n_in = l.shape[1]
167
167
  if n_in != n_out: # change dimension when channel is not the same
168
- return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation)
168
+ return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation) # pylint: disable=E1124
169
169
  return l
170
170
 
171
171
 
@@ -181,12 +181,12 @@ def resnet_bottleneck(l, ch_out, stride, cfg):
181
181
  """
182
182
  shortcut = l
183
183
 
184
- l = Conv2D("conv1", l, ch_out, 1, strides=1)
184
+ l = Conv2D("conv1", l, ch_out, 1, strides=1) # pylint: disable=E1124
185
185
  if stride == 2:
186
186
  l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
187
- l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID")
187
+ l = Conv2D("conv2", l, ch_out, 3, strides=2, padding="VALID") # pylint: disable=E1124
188
188
  else:
189
- l = Conv2D("conv2", l, ch_out, 3, strides=stride)
189
+ l = Conv2D("conv2", l, ch_out, 3, strides=stride) # pylint: disable=E1124
190
190
  if cfg.BACKBONE.NORM != "None":
191
191
  l = Conv2D("conv3", l, ch_out * 4, 1, activation=get_norm(cfg, zero_init=True))
192
192
  else:
@@ -263,9 +263,9 @@ def resnet_fpn_backbone(image, cfg):
263
263
  ),
264
264
  )
265
265
  l.set_shape([None, chan, None, None])
266
- l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID")
266
+ l = Conv2D("conv0", l, 64, 7, strides=2, padding="VALID") # pylint: disable=E1124
267
267
  l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(cfg, 0, 1), maybe_reverse_pad(cfg, 0, 1)])
268
- l = MaxPooling("pool0", l, 3, strides=2, padding="VALID")
268
+ l = MaxPooling("pool0", l, 3, strides=2, padding="VALID") # pylint: disable=E1124
269
269
 
270
270
  bottleneck = resnet_bottleneck if cfg.BACKBONE.BOTTLENECK == "resnet" else resnext32x4d_bottleneck
271
271
  with backbone_scope(cfg=cfg, freeze=freeze_at > 1):
@@ -98,14 +98,14 @@ class GeneralizedRCNN(ModelDescWithConfig):
98
98
 
99
99
  image = self.preprocess(inputs["image"]) # 1CHW
100
100
 
101
- features = self.backbone(image)
101
+ features = self.backbone(image) # pylint: disable=E1101
102
102
  anchor_inputs = {k: v for k, v in inputs.items() if k.startswith("anchor_")}
103
- proposals, rpn_losses = self.rpn(image, features, anchor_inputs)
103
+ proposals, rpn_losses = self.rpn(image, features, anchor_inputs) # pylint: disable=E1101
104
104
 
105
105
  targets = [inputs[k] for k in ["gt_boxes", "gt_labels", "gt_masks"] if k in inputs]
106
106
  gt_boxes_area = tf.reduce_mean(tf_area(inputs["gt_boxes"]), name="mean_gt_box_area")
107
107
  add_moving_summary(gt_boxes_area)
108
- head_losses = self.roi_heads(image, features, proposals, targets)
108
+ head_losses = self.roi_heads(image, features, proposals, targets) # pylint: disable=E1101
109
109
 
110
110
  if self.training:
111
111
  wd_cost = regularize_cost(".*/W", l2_regularizer(self.cfg.TRAIN.WEIGHT_DECAY), name="wd_cost")
@@ -63,7 +63,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
63
63
  x = tf.transpose(x, [0, 3, 1, 2])
64
64
  return x
65
65
  except AttributeError:
66
- return FixedUnPooling(name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first")
66
+ return FixedUnPooling(
67
+ name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first"
68
+ ) # pylint: disable=E1124
67
69
 
68
70
  with argscope(
69
71
  Conv2D,
@@ -85,7 +87,9 @@ def fpn_model(features, fpn_num_channels, fpn_norm):
85
87
  p2345 = [Conv2D(f"posthoc_3x3_p{i + 2}", c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])]
86
88
  if use_gn:
87
89
  p2345 = [GroupNorm(f"gn_p{i + 2}", c) for i, c in enumerate(p2345)]
88
- p6 = MaxPooling("maxpool_p6", p2345[-1], pool_size=1, strides=2, data_format="channels_first", padding="VALID")
90
+ p6 = MaxPooling(
91
+ "maxpool_p6", p2345[-1], pool_size=1, strides=2, data_format="channels_first", padding="VALID"
92
+ ) # pylint: disable=E1124
89
93
  return p2345 + [p6]
90
94
 
91
95
 
@@ -267,8 +267,10 @@ def fastrcnn_2fc_head(feature, cfg):
267
267
 
268
268
  dim = cfg.FPN.FRCNN_FC_HEAD_DIM
269
269
  init = tfv1.variance_scaling_initializer()
270
- hidden = FullyConnected("fc6", feature, dim, kernel_initializer=init, activation=tf.nn.relu)
271
- hidden = FullyConnected("fc7", hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
270
+ hidden = FullyConnected(
271
+ "fc6", feature, dim, kernel_initializer=init, activation=tf.nn.relu
272
+ ) # pylint: disable=E1124
273
+ hidden = FullyConnected("fc7", hidden, dim, kernel_initializer=init, activation=tf.nn.relu) # pylint: disable=E1124
272
274
  return hidden
273
275
 
274
276
 
@@ -298,7 +300,7 @@ def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None, **kwargs): # pylint:
298
300
  l = Conv2D(f"conv{k}", l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
299
301
  if norm is not None:
300
302
  l = GroupNorm(f"gn{k}", l)
301
- l = FullyConnected(
303
+ l = FullyConnected( # pylint: disable=E1124
302
304
  "fc",
303
305
  l,
304
306
  cfg.FPN.FRCNN_FC_HEAD_DIM,
@@ -88,7 +88,9 @@ def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None, **kwargs)
88
88
  l = Conv2D(f"fcn{k}", l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
89
89
  if norm is not None:
90
90
  l = GroupNorm(f"gn{k}", l)
91
- l = Conv2DTranspose("deconv", l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
91
+ l = Conv2DTranspose(
92
+ "deconv", l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu
93
+ ) # pylint: disable=E1124
92
94
  l = Conv2D("conv", l, num_category, 1, kernel_initializer=tf.random_normal_initializer(stddev=0.001))
93
95
  return l
94
96
 
@@ -79,6 +79,7 @@ def _paste_mask(box, mask, shape, mrcnn_accurate_paste):
79
79
 
80
80
  # rounding errors could happen here, because masks were not originally computed for this shape.
81
81
  # but it's hard to do better, because the network does not know the "original" scale
82
+
82
83
  mask = (cv2.resize(mask, (w, h)) > 0.5).astype("uint8")
83
84
  ret = np.zeros(shape, dtype="uint8")
84
85
  ret[y_0 : y_1 + 1, x_0 : x_1 + 1] = mask
@@ -26,7 +26,6 @@ from typing import Any, Callable, Dict, List, Literal, NewType, Optional, Sequen
26
26
 
27
27
  import numpy as np
28
28
  import numpy.typing as npt
29
- from cv2 import INTER_LINEAR
30
29
 
31
30
  from ..datapoint.annotation import ContainerAnnotation
32
31
  from ..datapoint.convert import box_to_point4, point4_to_box
@@ -179,11 +178,11 @@ def image_to_raw_layoutlm_features(
179
178
 
180
179
  boxes = box_to_point4(boxes)
181
180
 
182
- resizer = ResizeTransform(dp.height, dp.width, input_height, input_width, INTER_LINEAR)
181
+ resizer = ResizeTransform(dp.height, dp.width, input_height, input_width, "VIZ")
183
182
 
184
183
  if dp.image is not None:
185
184
  if image_width != input_width or image_height != input_height:
186
- image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width, INTER_LINEAR)
185
+ image_only_resizer = ResizeTransform(dp.height, dp.width, image_height, image_width, "VIZ")
187
186
  image = image_only_resizer.apply_image(dp.image)
188
187
  else:
189
188
  image = resizer.apply_image(dp.image)
@@ -29,10 +29,10 @@ from time import perf_counter as timer
29
29
  from typing import Any, Generator, Iterator, Optional, Tuple, Union
30
30
 
31
31
  import numpy as np
32
- from cv2 import imwrite
33
32
 
34
33
  from .detection_types import ImageType
35
34
  from .logger import logger
35
+ from .viz import viz_handler
36
36
 
37
37
  __all__ = ["timeout_manager", "save_tmp_file", "timed_operation"]
38
38
 
@@ -89,7 +89,7 @@ def save_tmp_file(image: Union[str, ImageType, bytes], prefix: str) -> Iterator[
89
89
  return
90
90
  if isinstance(image, (np.ndarray, np.generic)):
91
91
  input_file_name = file.name + ".PNG"
92
- imwrite(input_file_name, image)
92
+ viz_handler.write_image(input_file_name, image)
93
93
  yield file.name, input_file_name
94
94
  if isinstance(image, bytes):
95
95
  input_file_name = file.name