python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. doctr/__init__.py +1 -1
  2. doctr/contrib/__init__.py +0 -0
  3. doctr/contrib/artefacts.py +131 -0
  4. doctr/contrib/base.py +105 -0
  5. doctr/datasets/datasets/pytorch.py +2 -2
  6. doctr/datasets/generator/base.py +6 -5
  7. doctr/datasets/imgur5k.py +1 -1
  8. doctr/datasets/loader.py +1 -6
  9. doctr/datasets/utils.py +2 -1
  10. doctr/datasets/vocabs.py +9 -2
  11. doctr/file_utils.py +26 -12
  12. doctr/io/elements.py +40 -6
  13. doctr/io/html.py +2 -2
  14. doctr/io/image/pytorch.py +6 -8
  15. doctr/io/image/tensorflow.py +1 -1
  16. doctr/io/pdf.py +5 -2
  17. doctr/io/reader.py +6 -0
  18. doctr/models/__init__.py +0 -1
  19. doctr/models/_utils.py +57 -20
  20. doctr/models/builder.py +71 -13
  21. doctr/models/classification/mobilenet/pytorch.py +45 -9
  22. doctr/models/classification/mobilenet/tensorflow.py +38 -7
  23. doctr/models/classification/predictor/pytorch.py +18 -11
  24. doctr/models/classification/predictor/tensorflow.py +16 -10
  25. doctr/models/classification/textnet/pytorch.py +3 -3
  26. doctr/models/classification/textnet/tensorflow.py +3 -3
  27. doctr/models/classification/zoo.py +39 -15
  28. doctr/models/detection/__init__.py +1 -0
  29. doctr/models/detection/_utils/__init__.py +1 -0
  30. doctr/models/detection/_utils/base.py +66 -0
  31. doctr/models/detection/differentiable_binarization/base.py +4 -3
  32. doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  33. doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
  34. doctr/models/detection/fast/__init__.py +6 -0
  35. doctr/models/detection/fast/base.py +257 -0
  36. doctr/models/detection/fast/pytorch.py +442 -0
  37. doctr/models/detection/fast/tensorflow.py +428 -0
  38. doctr/models/detection/linknet/base.py +4 -3
  39. doctr/models/detection/predictor/pytorch.py +15 -1
  40. doctr/models/detection/predictor/tensorflow.py +15 -1
  41. doctr/models/detection/zoo.py +21 -4
  42. doctr/models/factory/hub.py +3 -12
  43. doctr/models/kie_predictor/base.py +9 -3
  44. doctr/models/kie_predictor/pytorch.py +41 -20
  45. doctr/models/kie_predictor/tensorflow.py +36 -16
  46. doctr/models/modules/layers/pytorch.py +89 -10
  47. doctr/models/modules/layers/tensorflow.py +88 -10
  48. doctr/models/modules/transformer/pytorch.py +2 -2
  49. doctr/models/predictor/base.py +77 -50
  50. doctr/models/predictor/pytorch.py +31 -20
  51. doctr/models/predictor/tensorflow.py +27 -17
  52. doctr/models/preprocessor/pytorch.py +4 -4
  53. doctr/models/preprocessor/tensorflow.py +3 -2
  54. doctr/models/recognition/master/pytorch.py +2 -2
  55. doctr/models/recognition/parseq/pytorch.py +4 -3
  56. doctr/models/recognition/parseq/tensorflow.py +4 -3
  57. doctr/models/recognition/sar/pytorch.py +7 -6
  58. doctr/models/recognition/sar/tensorflow.py +3 -9
  59. doctr/models/recognition/vitstr/pytorch.py +1 -1
  60. doctr/models/recognition/zoo.py +1 -1
  61. doctr/models/zoo.py +2 -2
  62. doctr/py.typed +0 -0
  63. doctr/transforms/functional/base.py +1 -1
  64. doctr/transforms/functional/pytorch.py +4 -4
  65. doctr/transforms/modules/base.py +37 -15
  66. doctr/transforms/modules/pytorch.py +66 -8
  67. doctr/transforms/modules/tensorflow.py +63 -7
  68. doctr/utils/fonts.py +7 -5
  69. doctr/utils/geometry.py +35 -12
  70. doctr/utils/metrics.py +33 -174
  71. doctr/utils/reconstitution.py +126 -0
  72. doctr/utils/visualization.py +5 -118
  73. doctr/version.py +1 -1
  74. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
  75. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
  76. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
  77. doctr/models/artefacts/__init__.py +0 -2
  78. doctr/models/artefacts/barcode.py +0 -74
  79. doctr/models/artefacts/face.py +0 -63
  80. doctr/models/obj_detection/__init__.py +0 -1
  81. doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  82. doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  83. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
  84. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
  85. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0
@@ -1,74 +0,0 @@
1
- # Copyright (C) 2021-2024, Mindee.
2
-
3
- # This program is licensed under the Apache License 2.0.
4
- # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
-
6
- from typing import List, Tuple
7
-
8
- import cv2
9
- import numpy as np
10
-
11
- __all__ = ["BarCodeDetector"]
12
-
13
-
14
- class BarCodeDetector:
15
- """Implements a Bar-code detector.
16
- For now, only horizontal (or with a small angle) bar-codes are supported
17
-
18
- Args:
19
- ----
20
- min_size: minimum relative size of a barcode on the page
21
- canny_minval: lower bound for canny hysteresis
22
- canny_maxval: upper-bound for canny hysteresis
23
- """
24
-
25
- def __init__(self, min_size: float = 1 / 6, canny_minval: int = 50, canny_maxval: int = 150) -> None:
26
- self.min_size = min_size
27
- self.canny_minval = canny_minval
28
- self.canny_maxval = canny_maxval
29
-
30
- def __call__(
31
- self,
32
- img: np.ndarray,
33
- ) -> List[Tuple[float, float, float, float]]:
34
- """Detect Barcodes on the image
35
- Args:
36
- img: np image
37
-
38
- Returns
39
- -------
40
- A list of tuples: [(xmin, ymin, xmax, ymax), ...] containing barcodes rel. coordinates
41
- """
42
- # get image size and define parameters
43
- height, width = img.shape[:2]
44
- k = (1 + int(width / 512)) * 10 # spatial extension of kernels, 512 -> 20, 1024 -> 30, ...
45
- min_w = int(width * self.min_size) # minimal size of a possible barcode
46
-
47
- # Detect edges
48
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
49
- edges = cv2.Canny(gray, self.canny_minval, self.canny_maxval, apertureSize=3)
50
-
51
- # Horizontal dilation to aggregate bars of the potential barcode
52
- # without aggregating text lines of the page vertically
53
- edges = cv2.dilate(edges, np.ones((1, k), np.uint8))
54
-
55
- # Instantiate a barcode-shaped kernel and erode to keep only vertical-bar structures
56
- bar_code_kernel: np.ndarray = np.zeros((k, 3), np.uint8)
57
- bar_code_kernel[..., [0, 2]] = 1
58
- edges = cv2.erode(edges, bar_code_kernel, iterations=1)
59
-
60
- # Opening to remove noise
61
- edges = cv2.morphologyEx(edges, cv2.MORPH_OPEN, np.ones((k, k), np.uint8))
62
-
63
- # Dilation to retrieve vertical length (lost at the first dilation)
64
- edges = cv2.dilate(edges, np.ones((k, 1), np.uint8))
65
-
66
- # Find contours, and keep the widest as barcodes
67
- contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
68
- barcodes = []
69
- for contour in contours:
70
- x, y, w, h = cv2.boundingRect(contour)
71
- if w >= min_w:
72
- barcodes.append((x / width, y / height, (x + w) / width, (y + h) / height))
73
-
74
- return barcodes
@@ -1,63 +0,0 @@
1
- # Copyright (C) 2021-2024, Mindee.
2
-
3
- # This program is licensed under the Apache License 2.0.
4
- # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
-
6
- from typing import List, Tuple
7
-
8
- import cv2
9
- import numpy as np
10
-
11
- from doctr.utils.repr import NestedObject
12
-
13
- __all__ = ["FaceDetector"]
14
-
15
-
16
- class FaceDetector(NestedObject):
17
- """Implements a face detector to detect profile pictures on resumes, IDS, driving licenses, passports...
18
- Based on open CV CascadeClassifier (haarcascades)
19
-
20
- Args:
21
- ----
22
- n_faces: maximal number of faces to detect on a single image, default = 1
23
- """
24
-
25
- def __init__(
26
- self,
27
- n_faces: int = 1,
28
- ) -> None:
29
- self.n_faces = n_faces
30
- # Instantiate classifier
31
- self.detector = cv2.CascadeClassifier(
32
- cv2.data.haarcascades + "haarcascade_frontalface_default.xml" # type: ignore[attr-defined]
33
- )
34
-
35
- def extra_repr(self) -> str:
36
- return f"n_faces={self.n_faces}"
37
-
38
- def __call__(
39
- self,
40
- img: np.ndarray,
41
- ) -> List[Tuple[float, float, float, float]]:
42
- """Detect n_faces on the img
43
-
44
- Args:
45
- ----
46
- img: image to detect faces on
47
-
48
- Returns:
49
- -------
50
- A list of size n_faces, each face is a tuple of relative xmin, ymin, xmax, ymax
51
- """
52
- height, width = img.shape[:2]
53
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
54
-
55
- faces = self.detector.detectMultiScale(gray, 1.5, 3)
56
- # If faces are detected, keep only the biggest ones
57
- rel_faces = []
58
- if len(faces) > 0:
59
- x, y, w, h = sorted(faces, key=lambda x: x[2] + x[3])[-min(self.n_faces, len(faces))]
60
- xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
61
- rel_faces.append((xmin, ymin, xmax, ymax))
62
-
63
- return rel_faces
@@ -1 +0,0 @@
1
- from .faster_rcnn import *
@@ -1,4 +0,0 @@
1
- from doctr.file_utils import is_tf_available, is_torch_available
2
-
3
- if not is_tf_available() and is_torch_available():
4
- from .pytorch import *
@@ -1,81 +0,0 @@
1
- # Copyright (C) 2021-2024, Mindee.
2
-
3
- # This program is licensed under the Apache License 2.0.
4
- # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
-
6
- from typing import Any, Dict
7
-
8
- from torchvision.models.detection import FasterRCNN, FasterRCNN_MobileNet_V3_Large_FPN_Weights, faster_rcnn
9
-
10
- from ...utils import load_pretrained_params
11
-
12
- __all__ = ["fasterrcnn_mobilenet_v3_large_fpn"]
13
-
14
-
15
- default_cfgs: Dict[str, Dict[str, Any]] = {
16
- "fasterrcnn_mobilenet_v3_large_fpn": {
17
- "input_shape": (3, 1024, 1024),
18
- "mean": (0.485, 0.456, 0.406),
19
- "std": (0.229, 0.224, 0.225),
20
- "classes": ["background", "qr_code", "bar_code", "logo", "photo"],
21
- "url": "https://doctr-static.mindee.com/models?id=v0.4.1/fasterrcnn_mobilenet_v3_large_fpn-d5b2490d.pt&src=0",
22
- },
23
- }
24
-
25
-
26
- def _fasterrcnn(arch: str, pretrained: bool, **kwargs: Any) -> FasterRCNN:
27
- _kwargs = {
28
- "image_mean": default_cfgs[arch]["mean"],
29
- "image_std": default_cfgs[arch]["std"],
30
- "box_detections_per_img": 150,
31
- "box_score_thresh": 0.5,
32
- "box_positive_fraction": 0.35,
33
- "box_nms_thresh": 0.2,
34
- "rpn_nms_thresh": 0.2,
35
- "num_classes": len(default_cfgs[arch]["classes"]),
36
- }
37
-
38
- # Build the model
39
- _kwargs.update(kwargs)
40
- model = faster_rcnn.__dict__[arch](weights=None, weights_backbone=None, **_kwargs)
41
- model.cfg = default_cfgs[arch]
42
-
43
- if pretrained:
44
- # Load pretrained parameters
45
- load_pretrained_params(model, default_cfgs[arch]["url"])
46
- else:
47
- # Filter keys
48
- state_dict = {
49
- k: v
50
- for k, v in faster_rcnn.__dict__[arch](weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
51
- .state_dict()
52
- .items()
53
- if not k.startswith("roi_heads.")
54
- }
55
-
56
- # Load state dict
57
- model.load_state_dict(state_dict, strict=False)
58
-
59
- return model
60
-
61
-
62
- def fasterrcnn_mobilenet_v3_large_fpn(pretrained: bool = False, **kwargs: Any) -> FasterRCNN:
63
- """Faster-RCNN architecture with a MobileNet V3 backbone as described in `"Faster R-CNN: Towards Real-Time
64
- Object Detection with Region Proposal Networks" <https://arxiv.org/pdf/1506.01497.pdf>`_.
65
-
66
- >>> import torch
67
- >>> from doctr.models.obj_detection import fasterrcnn_mobilenet_v3_large_fpn
68
- >>> model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
69
- >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32)
70
- >>> out = model(input_tensor)
71
-
72
- Args:
73
- ----
74
- pretrained (bool): If True, returns a model pre-trained on our object detection dataset
75
- **kwargs: keyword arguments of the FasterRCNN architecture
76
-
77
- Returns:
78
- -------
79
- object detection architecture
80
- """
81
- return _fasterrcnn("fasterrcnn_mobilenet_v3_large_fpn", pretrained, **kwargs)