python-doctr 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
1
  from .differentiable_binarization import *
2
2
  from .linknet import *
3
+ from .fast import *
3
4
  from .zoo import *
@@ -147,24 +147,20 @@ class DBNet(_DBNet, keras.Model, NestedObject):
147
147
  _inputs = [layers.Input(shape=in_shape[1:]) for in_shape in self.feat_extractor.output_shape]
148
148
  output_shape = tuple(self.fpn(_inputs).shape)
149
149
 
150
- self.probability_head = keras.Sequential(
151
- [
152
- *conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
153
- layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
154
- layers.BatchNormalization(),
155
- layers.Activation("relu"),
156
- layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
157
- ]
158
- )
159
- self.threshold_head = keras.Sequential(
160
- [
161
- *conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
162
- layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
163
- layers.BatchNormalization(),
164
- layers.Activation("relu"),
165
- layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
166
- ]
167
- )
150
+ self.probability_head = keras.Sequential([
151
+ *conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
152
+ layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
153
+ layers.BatchNormalization(),
154
+ layers.Activation("relu"),
155
+ layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
156
+ ])
157
+ self.threshold_head = keras.Sequential([
158
+ *conv_sequence(64, "relu", True, kernel_size=3, input_shape=output_shape[1:]),
159
+ layers.Conv2DTranspose(64, 2, strides=2, use_bias=False, kernel_initializer="he_normal"),
160
+ layers.BatchNormalization(),
161
+ layers.Activation("relu"),
162
+ layers.Conv2DTranspose(num_classes, 2, strides=2, kernel_initializer="he_normal"),
163
+ ])
168
164
 
169
165
  self.postprocessor = DBPostProcessor(
170
166
  assume_straight_pages=assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
@@ -0,0 +1,6 @@
1
+ from doctr.file_utils import is_tf_available, is_torch_available
2
+
3
+ if is_tf_available():
4
+ from .tensorflow import *
5
+ elif is_torch_available():
6
+ from .pytorch import * # type: ignore[assignment]
@@ -0,0 +1,256 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization
7
+
8
+ from typing import Dict, List, Tuple, Union
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import pyclipper
13
+ from shapely.geometry import Polygon
14
+
15
+ from doctr.models.core import BaseModel
16
+
17
+ from ..core import DetectionPostProcessor
18
+
19
+ __all__ = ["_FAST", "FASTPostProcessor"]
20
+
21
+
22
+ class FASTPostProcessor(DetectionPostProcessor):
23
+ """Implements a post processor for FAST model.
24
+
25
+ Args:
26
+ ----
27
+ bin_thresh: threshold used to binzarized p_map at inference time
28
+ box_thresh: minimal objectness score to consider a box
29
+ assume_straight_pages: whether the inputs were expected to have horizontal text elements
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ bin_thresh: float = 0.3,
35
+ box_thresh: float = 0.1,
36
+ assume_straight_pages: bool = True,
37
+ ) -> None:
38
+ super().__init__(box_thresh, bin_thresh, assume_straight_pages)
39
+ self.unclip_ratio = 1.0
40
+
41
+ def polygon_to_box(
42
+ self,
43
+ points: np.ndarray,
44
+ ) -> np.ndarray:
45
+ """Expand a polygon (points) by a factor unclip_ratio, and returns a polygon
46
+
47
+ Args:
48
+ ----
49
+ points: The first parameter.
50
+
51
+ Returns:
52
+ -------
53
+ a box in absolute coordinates (xmin, ymin, xmax, ymax) or (4, 2) array (quadrangle)
54
+ """
55
+ if not self.assume_straight_pages:
56
+ # Compute the rectangle polygon enclosing the raw polygon
57
+ rect = cv2.minAreaRect(points)
58
+ points = cv2.boxPoints(rect)
59
+ # Add 1 pixel to correct cv2 approx
60
+ area = (rect[1][0] + 1) * (1 + rect[1][1])
61
+ length = 2 * (rect[1][0] + rect[1][1]) + 2
62
+ else:
63
+ poly = Polygon(points)
64
+ area = poly.area
65
+ length = poly.length
66
+ distance = area * self.unclip_ratio / length # compute distance to expand polygon
67
+ offset = pyclipper.PyclipperOffset()
68
+ offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
69
+ _points = offset.Execute(distance)
70
+ # Take biggest stack of points
71
+ idx = 0
72
+ if len(_points) > 1:
73
+ max_size = 0
74
+ for _idx, p in enumerate(_points):
75
+ if len(p) > max_size:
76
+ idx = _idx
77
+ max_size = len(p)
78
+ # We ensure that _points can be correctly casted to a ndarray
79
+ _points = [_points[idx]]
80
+ expanded_points: np.ndarray = np.asarray(_points) # expand polygon
81
+ if len(expanded_points) < 1:
82
+ return None # type: ignore[return-value]
83
+ return (
84
+ cv2.boundingRect(expanded_points) # type: ignore[return-value]
85
+ if self.assume_straight_pages
86
+ else np.roll(cv2.boxPoints(cv2.minAreaRect(expanded_points)), -1, axis=0)
87
+ )
88
+
89
+ def bitmap_to_boxes(
90
+ self,
91
+ pred: np.ndarray,
92
+ bitmap: np.ndarray,
93
+ ) -> np.ndarray:
94
+ """Compute boxes from a bitmap/pred_map: find connected components then filter boxes
95
+
96
+ Args:
97
+ ----
98
+ pred: Pred map from differentiable linknet output
99
+ bitmap: Bitmap map computed from pred (binarized)
100
+ angle_tol: Comparison tolerance of the angle with the median angle across the page
101
+ ratio_tol: Under this limit aspect ratio, we cannot resolve the direction of the crop
102
+
103
+ Returns:
104
+ -------
105
+ np tensor boxes for the bitmap, each box is a 6-element list
106
+ containing x, y, w, h, alpha, score for the box
107
+ """
108
+ height, width = bitmap.shape[:2]
109
+ boxes: List[Union[np.ndarray, List[float]]] = []
110
+ # get contours from connected components on the bitmap
111
+ contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
112
+ for contour in contours:
113
+ # Check whether smallest enclosing bounding box is not too small
114
+ if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
115
+ continue
116
+ # Compute objectness
117
+ if self.assume_straight_pages:
118
+ x, y, w, h = cv2.boundingRect(contour)
119
+ points: np.ndarray = np.array([[x, y], [x, y + h], [x + w, y + h], [x + w, y]])
120
+ score = self.box_score(pred, points, assume_straight_pages=True)
121
+ else:
122
+ score = self.box_score(pred, contour, assume_straight_pages=False)
123
+
124
+ if score < self.box_thresh: # remove polygons with a weak objectness
125
+ continue
126
+
127
+ if self.assume_straight_pages:
128
+ _box = self.polygon_to_box(points)
129
+ else:
130
+ _box = self.polygon_to_box(np.squeeze(contour))
131
+
132
+ if self.assume_straight_pages:
133
+ # compute relative polygon to get rid of img shape
134
+ x, y, w, h = _box
135
+ xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
136
+ boxes.append([xmin, ymin, xmax, ymax, score])
137
+ else:
138
+ # compute relative box to get rid of img shape
139
+ _box[:, 0] /= width
140
+ _box[:, 1] /= height
141
+ boxes.append(_box)
142
+
143
+ if not self.assume_straight_pages:
144
+ return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 4, 2), dtype=pred.dtype)
145
+ else:
146
+ return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype)
147
+
148
+
149
+ class _FAST(BaseModel):
150
+ """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
151
+ <https://arxiv.org/pdf/2111.02394.pdf>`_.
152
+ """
153
+
154
+ min_size_box: int = 3
155
+ assume_straight_pages: bool = True
156
+ shrink_ratio = 0.1
157
+
158
+ def build_target(
159
+ self,
160
+ target: List[Dict[str, np.ndarray]],
161
+ output_shape: Tuple[int, int, int],
162
+ channels_last: bool = True,
163
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
164
+ """Build the target, and it's mask to be used from loss computation.
165
+
166
+ Args:
167
+ ----
168
+ target: target coming from dataset
169
+ output_shape: shape of the output of the model without batch_size
170
+ channels_last: whether channels are last or not
171
+
172
+ Returns:
173
+ -------
174
+ the new formatted target, mask and shrunken text kernel
175
+ """
176
+ if any(t.dtype != np.float32 for tgt in target for t in tgt.values()):
177
+ raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
178
+ if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for tgt in target for t in tgt.values()):
179
+ raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")
180
+
181
+ h: int
182
+ w: int
183
+ if channels_last:
184
+ h, w, num_classes = output_shape
185
+ else:
186
+ num_classes, h, w = output_shape
187
+ target_shape = (len(target), num_classes, h, w)
188
+
189
+ seg_target: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
190
+ seg_mask: np.ndarray = np.ones(target_shape, dtype=bool)
191
+ shrunken_kernel: np.ndarray = np.zeros(target_shape, dtype=np.uint8)
192
+
193
+ for idx, tgt in enumerate(target):
194
+ for class_idx, _tgt in enumerate(tgt.values()):
195
+ # Draw each polygon on gt
196
+ if _tgt.shape[0] == 0:
197
+ # Empty image, full masked
198
+ seg_mask[idx, class_idx] = False
199
+
200
+ # Absolute bounding boxes
201
+ abs_boxes = _tgt.copy()
202
+
203
+ if abs_boxes.ndim == 3:
204
+ abs_boxes[:, :, 0] *= w
205
+ abs_boxes[:, :, 1] *= h
206
+ polys = abs_boxes
207
+ boxes_size = np.linalg.norm(abs_boxes[:, 2, :] - abs_boxes[:, 0, :], axis=-1)
208
+ abs_boxes = np.concatenate((abs_boxes.min(1), abs_boxes.max(1)), -1).round().astype(np.int32)
209
+ else:
210
+ abs_boxes[:, [0, 2]] *= w
211
+ abs_boxes[:, [1, 3]] *= h
212
+ abs_boxes = abs_boxes.round().astype(np.int32)
213
+ polys = np.stack(
214
+ [
215
+ abs_boxes[:, [0, 1]],
216
+ abs_boxes[:, [0, 3]],
217
+ abs_boxes[:, [2, 3]],
218
+ abs_boxes[:, [2, 1]],
219
+ ],
220
+ axis=1,
221
+ )
222
+ boxes_size = np.minimum(abs_boxes[:, 2] - abs_boxes[:, 0], abs_boxes[:, 3] - abs_boxes[:, 1])
223
+
224
+ for poly, box, box_size in zip(polys, abs_boxes, boxes_size):
225
+ # Mask boxes that are too small
226
+ if box_size < self.min_size_box:
227
+ seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
228
+ continue
229
+
230
+ # Negative shrink for gt, as described in paper
231
+ polygon = Polygon(poly)
232
+ distance = polygon.area * (1 - np.power(self.shrink_ratio, 2)) / polygon.length
233
+ subject = [tuple(coor) for coor in poly]
234
+ padding = pyclipper.PyclipperOffset()
235
+ padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
236
+ shrunken = padding.Execute(-distance)
237
+
238
+ # Draw polygon on gt if it is valid
239
+ if len(shrunken) == 0:
240
+ seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
241
+ continue
242
+ shrunken = np.array(shrunken[0]).reshape(-1, 2)
243
+ if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid:
244
+ seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False
245
+ continue
246
+ cv2.fillPoly(shrunken_kernel[idx, class_idx], [shrunken.astype(np.int32)], 1.0) # type: ignore[call-overload]
247
+ # draw the original polygon on the segmentation target
248
+ cv2.fillPoly(seg_target[idx, class_idx], [poly.astype(np.int32)], 1.0) # type: ignore[call-overload]
249
+
250
+ # Don't forget to switch back to channel last if Tensorflow is used
251
+ if channels_last:
252
+ seg_target = seg_target.transpose((0, 2, 3, 1))
253
+ seg_mask = seg_mask.transpose((0, 2, 3, 1))
254
+ shrunken_kernel = shrunken_kernel.transpose((0, 2, 3, 1))
255
+
256
+ return seg_target, seg_mask, shrunken_kernel