python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/__init__.py +1 -1
- doctr/contrib/__init__.py +0 -0
- doctr/contrib/artefacts.py +131 -0
- doctr/contrib/base.py +105 -0
- doctr/datasets/datasets/pytorch.py +2 -2
- doctr/datasets/generator/base.py +6 -5
- doctr/datasets/imgur5k.py +1 -1
- doctr/datasets/loader.py +1 -6
- doctr/datasets/utils.py +2 -1
- doctr/datasets/vocabs.py +9 -2
- doctr/file_utils.py +26 -12
- doctr/io/elements.py +40 -6
- doctr/io/html.py +2 -2
- doctr/io/image/pytorch.py +6 -8
- doctr/io/image/tensorflow.py +1 -1
- doctr/io/pdf.py +5 -2
- doctr/io/reader.py +6 -0
- doctr/models/__init__.py +0 -1
- doctr/models/_utils.py +57 -20
- doctr/models/builder.py +71 -13
- doctr/models/classification/mobilenet/pytorch.py +45 -9
- doctr/models/classification/mobilenet/tensorflow.py +38 -7
- doctr/models/classification/predictor/pytorch.py +18 -11
- doctr/models/classification/predictor/tensorflow.py +16 -10
- doctr/models/classification/textnet/pytorch.py +3 -3
- doctr/models/classification/textnet/tensorflow.py +3 -3
- doctr/models/classification/zoo.py +39 -15
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/__init__.py +1 -0
- doctr/models/detection/_utils/base.py +66 -0
- doctr/models/detection/differentiable_binarization/base.py +4 -3
- doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
- doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +257 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +4 -3
- doctr/models/detection/predictor/pytorch.py +15 -1
- doctr/models/detection/predictor/tensorflow.py +15 -1
- doctr/models/detection/zoo.py +21 -4
- doctr/models/factory/hub.py +3 -12
- doctr/models/kie_predictor/base.py +9 -3
- doctr/models/kie_predictor/pytorch.py +41 -20
- doctr/models/kie_predictor/tensorflow.py +36 -16
- doctr/models/modules/layers/pytorch.py +89 -10
- doctr/models/modules/layers/tensorflow.py +88 -10
- doctr/models/modules/transformer/pytorch.py +2 -2
- doctr/models/predictor/base.py +77 -50
- doctr/models/predictor/pytorch.py +31 -20
- doctr/models/predictor/tensorflow.py +27 -17
- doctr/models/preprocessor/pytorch.py +4 -4
- doctr/models/preprocessor/tensorflow.py +3 -2
- doctr/models/recognition/master/pytorch.py +2 -2
- doctr/models/recognition/parseq/pytorch.py +4 -3
- doctr/models/recognition/parseq/tensorflow.py +4 -3
- doctr/models/recognition/sar/pytorch.py +7 -6
- doctr/models/recognition/sar/tensorflow.py +3 -9
- doctr/models/recognition/vitstr/pytorch.py +1 -1
- doctr/models/recognition/zoo.py +1 -1
- doctr/models/zoo.py +2 -2
- doctr/py.typed +0 -0
- doctr/transforms/functional/base.py +1 -1
- doctr/transforms/functional/pytorch.py +4 -4
- doctr/transforms/modules/base.py +37 -15
- doctr/transforms/modules/pytorch.py +66 -8
- doctr/transforms/modules/tensorflow.py +63 -7
- doctr/utils/fonts.py +7 -5
- doctr/utils/geometry.py +35 -12
- doctr/utils/metrics.py +33 -174
- doctr/utils/reconstitution.py +126 -0
- doctr/utils/visualization.py +5 -118
- doctr/version.py +1 -1
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
- doctr/models/artefacts/__init__.py +0 -2
- doctr/models/artefacts/barcode.py +0 -74
- doctr/models/artefacts/face.py +0 -63
- doctr/models/obj_detection/__init__.py +0 -1
- doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
-
|
|
3
|
-
# This program is licensed under the Apache License 2.0.
|
|
4
|
-
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
-
|
|
6
|
-
from typing import List, Tuple
|
|
7
|
-
|
|
8
|
-
import cv2
|
|
9
|
-
import numpy as np
|
|
10
|
-
|
|
11
|
-
__all__ = ["BarCodeDetector"]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class BarCodeDetector:
|
|
15
|
-
"""Implements a Bar-code detector.
|
|
16
|
-
For now, only horizontal (or with a small angle) bar-codes are supported
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
----
|
|
20
|
-
min_size: minimum relative size of a barcode on the page
|
|
21
|
-
canny_minval: lower bound for canny hysteresis
|
|
22
|
-
canny_maxval: upper-bound for canny hysteresis
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
def __init__(self, min_size: float = 1 / 6, canny_minval: int = 50, canny_maxval: int = 150) -> None:
|
|
26
|
-
self.min_size = min_size
|
|
27
|
-
self.canny_minval = canny_minval
|
|
28
|
-
self.canny_maxval = canny_maxval
|
|
29
|
-
|
|
30
|
-
def __call__(
|
|
31
|
-
self,
|
|
32
|
-
img: np.ndarray,
|
|
33
|
-
) -> List[Tuple[float, float, float, float]]:
|
|
34
|
-
"""Detect Barcodes on the image
|
|
35
|
-
Args:
|
|
36
|
-
img: np image
|
|
37
|
-
|
|
38
|
-
Returns
|
|
39
|
-
-------
|
|
40
|
-
A list of tuples: [(xmin, ymin, xmax, ymax), ...] containing barcodes rel. coordinates
|
|
41
|
-
"""
|
|
42
|
-
# get image size and define parameters
|
|
43
|
-
height, width = img.shape[:2]
|
|
44
|
-
k = (1 + int(width / 512)) * 10 # spatial extension of kernels, 512 -> 20, 1024 -> 30, ...
|
|
45
|
-
min_w = int(width * self.min_size) # minimal size of a possible barcode
|
|
46
|
-
|
|
47
|
-
# Detect edges
|
|
48
|
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
49
|
-
edges = cv2.Canny(gray, self.canny_minval, self.canny_maxval, apertureSize=3)
|
|
50
|
-
|
|
51
|
-
# Horizontal dilation to aggregate bars of the potential barcode
|
|
52
|
-
# without aggregating text lines of the page vertically
|
|
53
|
-
edges = cv2.dilate(edges, np.ones((1, k), np.uint8))
|
|
54
|
-
|
|
55
|
-
# Instantiate a barcode-shaped kernel and erode to keep only vertical-bar structures
|
|
56
|
-
bar_code_kernel: np.ndarray = np.zeros((k, 3), np.uint8)
|
|
57
|
-
bar_code_kernel[..., [0, 2]] = 1
|
|
58
|
-
edges = cv2.erode(edges, bar_code_kernel, iterations=1)
|
|
59
|
-
|
|
60
|
-
# Opening to remove noise
|
|
61
|
-
edges = cv2.morphologyEx(edges, cv2.MORPH_OPEN, np.ones((k, k), np.uint8))
|
|
62
|
-
|
|
63
|
-
# Dilation to retrieve vertical length (lost at the first dilation)
|
|
64
|
-
edges = cv2.dilate(edges, np.ones((k, 1), np.uint8))
|
|
65
|
-
|
|
66
|
-
# Find contours, and keep the widest as barcodes
|
|
67
|
-
contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
68
|
-
barcodes = []
|
|
69
|
-
for contour in contours:
|
|
70
|
-
x, y, w, h = cv2.boundingRect(contour)
|
|
71
|
-
if w >= min_w:
|
|
72
|
-
barcodes.append((x / width, y / height, (x + w) / width, (y + h) / height))
|
|
73
|
-
|
|
74
|
-
return barcodes
|
doctr/models/artefacts/face.py
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
-
|
|
3
|
-
# This program is licensed under the Apache License 2.0.
|
|
4
|
-
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
-
|
|
6
|
-
from typing import List, Tuple
|
|
7
|
-
|
|
8
|
-
import cv2
|
|
9
|
-
import numpy as np
|
|
10
|
-
|
|
11
|
-
from doctr.utils.repr import NestedObject
|
|
12
|
-
|
|
13
|
-
__all__ = ["FaceDetector"]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class FaceDetector(NestedObject):
|
|
17
|
-
"""Implements a face detector to detect profile pictures on resumes, IDS, driving licenses, passports...
|
|
18
|
-
Based on open CV CascadeClassifier (haarcascades)
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
----
|
|
22
|
-
n_faces: maximal number of faces to detect on a single image, default = 1
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
n_faces: int = 1,
|
|
28
|
-
) -> None:
|
|
29
|
-
self.n_faces = n_faces
|
|
30
|
-
# Instantiate classifier
|
|
31
|
-
self.detector = cv2.CascadeClassifier(
|
|
32
|
-
cv2.data.haarcascades + "haarcascade_frontalface_default.xml" # type: ignore[attr-defined]
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
def extra_repr(self) -> str:
|
|
36
|
-
return f"n_faces={self.n_faces}"
|
|
37
|
-
|
|
38
|
-
def __call__(
|
|
39
|
-
self,
|
|
40
|
-
img: np.ndarray,
|
|
41
|
-
) -> List[Tuple[float, float, float, float]]:
|
|
42
|
-
"""Detect n_faces on the img
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
----
|
|
46
|
-
img: image to detect faces on
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
-------
|
|
50
|
-
A list of size n_faces, each face is a tuple of relative xmin, ymin, xmax, ymax
|
|
51
|
-
"""
|
|
52
|
-
height, width = img.shape[:2]
|
|
53
|
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
54
|
-
|
|
55
|
-
faces = self.detector.detectMultiScale(gray, 1.5, 3)
|
|
56
|
-
# If faces are detected, keep only the biggest ones
|
|
57
|
-
rel_faces = []
|
|
58
|
-
if len(faces) > 0:
|
|
59
|
-
x, y, w, h = sorted(faces, key=lambda x: x[2] + x[3])[-min(self.n_faces, len(faces))]
|
|
60
|
-
xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
|
|
61
|
-
rel_faces.append((xmin, ymin, xmax, ymax))
|
|
62
|
-
|
|
63
|
-
return rel_faces
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .faster_rcnn import *
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
-
|
|
3
|
-
# This program is licensed under the Apache License 2.0.
|
|
4
|
-
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
-
|
|
6
|
-
from typing import Any, Dict
|
|
7
|
-
|
|
8
|
-
from torchvision.models.detection import FasterRCNN, FasterRCNN_MobileNet_V3_Large_FPN_Weights, faster_rcnn
|
|
9
|
-
|
|
10
|
-
from ...utils import load_pretrained_params
|
|
11
|
-
|
|
12
|
-
__all__ = ["fasterrcnn_mobilenet_v3_large_fpn"]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
default_cfgs: Dict[str, Dict[str, Any]] = {
|
|
16
|
-
"fasterrcnn_mobilenet_v3_large_fpn": {
|
|
17
|
-
"input_shape": (3, 1024, 1024),
|
|
18
|
-
"mean": (0.485, 0.456, 0.406),
|
|
19
|
-
"std": (0.229, 0.224, 0.225),
|
|
20
|
-
"classes": ["background", "qr_code", "bar_code", "logo", "photo"],
|
|
21
|
-
"url": "https://doctr-static.mindee.com/models?id=v0.4.1/fasterrcnn_mobilenet_v3_large_fpn-d5b2490d.pt&src=0",
|
|
22
|
-
},
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _fasterrcnn(arch: str, pretrained: bool, **kwargs: Any) -> FasterRCNN:
|
|
27
|
-
_kwargs = {
|
|
28
|
-
"image_mean": default_cfgs[arch]["mean"],
|
|
29
|
-
"image_std": default_cfgs[arch]["std"],
|
|
30
|
-
"box_detections_per_img": 150,
|
|
31
|
-
"box_score_thresh": 0.5,
|
|
32
|
-
"box_positive_fraction": 0.35,
|
|
33
|
-
"box_nms_thresh": 0.2,
|
|
34
|
-
"rpn_nms_thresh": 0.2,
|
|
35
|
-
"num_classes": len(default_cfgs[arch]["classes"]),
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
# Build the model
|
|
39
|
-
_kwargs.update(kwargs)
|
|
40
|
-
model = faster_rcnn.__dict__[arch](weights=None, weights_backbone=None, **_kwargs)
|
|
41
|
-
model.cfg = default_cfgs[arch]
|
|
42
|
-
|
|
43
|
-
if pretrained:
|
|
44
|
-
# Load pretrained parameters
|
|
45
|
-
load_pretrained_params(model, default_cfgs[arch]["url"])
|
|
46
|
-
else:
|
|
47
|
-
# Filter keys
|
|
48
|
-
state_dict = {
|
|
49
|
-
k: v
|
|
50
|
-
for k, v in faster_rcnn.__dict__[arch](weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
|
|
51
|
-
.state_dict()
|
|
52
|
-
.items()
|
|
53
|
-
if not k.startswith("roi_heads.")
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
# Load state dict
|
|
57
|
-
model.load_state_dict(state_dict, strict=False)
|
|
58
|
-
|
|
59
|
-
return model
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def fasterrcnn_mobilenet_v3_large_fpn(pretrained: bool = False, **kwargs: Any) -> FasterRCNN:
|
|
63
|
-
"""Faster-RCNN architecture with a MobileNet V3 backbone as described in `"Faster R-CNN: Towards Real-Time
|
|
64
|
-
Object Detection with Region Proposal Networks" <https://arxiv.org/pdf/1506.01497.pdf>`_.
|
|
65
|
-
|
|
66
|
-
>>> import torch
|
|
67
|
-
>>> from doctr.models.obj_detection import fasterrcnn_mobilenet_v3_large_fpn
|
|
68
|
-
>>> model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
|
|
69
|
-
>>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32)
|
|
70
|
-
>>> out = model(input_tensor)
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
----
|
|
74
|
-
pretrained (bool): If True, returns a model pre-trained on our object detection dataset
|
|
75
|
-
**kwargs: keyword arguments of the FasterRCNN architecture
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
-------
|
|
79
|
-
object detection architecture
|
|
80
|
-
"""
|
|
81
|
-
return _fasterrcnn("fasterrcnn_mobilenet_v3_large_fpn", pretrained, **kwargs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|