python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. doctr/datasets/__init__.py +2 -0
  2. doctr/datasets/cord.py +6 -4
  3. doctr/datasets/datasets/base.py +3 -2
  4. doctr/datasets/datasets/pytorch.py +4 -2
  5. doctr/datasets/datasets/tensorflow.py +4 -2
  6. doctr/datasets/detection.py +6 -3
  7. doctr/datasets/doc_artefacts.py +2 -1
  8. doctr/datasets/funsd.py +7 -8
  9. doctr/datasets/generator/base.py +3 -2
  10. doctr/datasets/generator/pytorch.py +3 -1
  11. doctr/datasets/generator/tensorflow.py +3 -1
  12. doctr/datasets/ic03.py +3 -2
  13. doctr/datasets/ic13.py +2 -1
  14. doctr/datasets/iiit5k.py +6 -4
  15. doctr/datasets/iiithws.py +2 -1
  16. doctr/datasets/imgur5k.py +3 -2
  17. doctr/datasets/loader.py +4 -2
  18. doctr/datasets/mjsynth.py +2 -1
  19. doctr/datasets/ocr.py +2 -1
  20. doctr/datasets/orientation.py +40 -0
  21. doctr/datasets/recognition.py +3 -2
  22. doctr/datasets/sroie.py +2 -1
  23. doctr/datasets/svhn.py +2 -1
  24. doctr/datasets/svt.py +3 -2
  25. doctr/datasets/synthtext.py +2 -1
  26. doctr/datasets/utils.py +27 -11
  27. doctr/datasets/vocabs.py +26 -1
  28. doctr/datasets/wildreceipt.py +111 -0
  29. doctr/file_utils.py +3 -1
  30. doctr/io/elements.py +52 -35
  31. doctr/io/html.py +5 -3
  32. doctr/io/image/base.py +5 -4
  33. doctr/io/image/pytorch.py +12 -7
  34. doctr/io/image/tensorflow.py +11 -6
  35. doctr/io/pdf.py +5 -4
  36. doctr/io/reader.py +13 -5
  37. doctr/models/_utils.py +30 -53
  38. doctr/models/artefacts/barcode.py +4 -3
  39. doctr/models/artefacts/face.py +4 -2
  40. doctr/models/builder.py +58 -43
  41. doctr/models/classification/__init__.py +1 -0
  42. doctr/models/classification/magc_resnet/pytorch.py +5 -2
  43. doctr/models/classification/magc_resnet/tensorflow.py +5 -2
  44. doctr/models/classification/mobilenet/pytorch.py +16 -4
  45. doctr/models/classification/mobilenet/tensorflow.py +29 -20
  46. doctr/models/classification/predictor/pytorch.py +3 -2
  47. doctr/models/classification/predictor/tensorflow.py +2 -1
  48. doctr/models/classification/resnet/pytorch.py +23 -13
  49. doctr/models/classification/resnet/tensorflow.py +33 -26
  50. doctr/models/classification/textnet/__init__.py +6 -0
  51. doctr/models/classification/textnet/pytorch.py +275 -0
  52. doctr/models/classification/textnet/tensorflow.py +267 -0
  53. doctr/models/classification/vgg/pytorch.py +4 -2
  54. doctr/models/classification/vgg/tensorflow.py +5 -2
  55. doctr/models/classification/vit/pytorch.py +9 -3
  56. doctr/models/classification/vit/tensorflow.py +9 -3
  57. doctr/models/classification/zoo.py +7 -2
  58. doctr/models/core.py +1 -1
  59. doctr/models/detection/__init__.py +1 -0
  60. doctr/models/detection/_utils/pytorch.py +7 -1
  61. doctr/models/detection/_utils/tensorflow.py +7 -3
  62. doctr/models/detection/core.py +9 -3
  63. doctr/models/detection/differentiable_binarization/base.py +37 -25
  64. doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
  65. doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
  66. doctr/models/detection/fast/__init__.py +6 -0
  67. doctr/models/detection/fast/base.py +256 -0
  68. doctr/models/detection/fast/pytorch.py +442 -0
  69. doctr/models/detection/fast/tensorflow.py +428 -0
  70. doctr/models/detection/linknet/base.py +12 -5
  71. doctr/models/detection/linknet/pytorch.py +28 -15
  72. doctr/models/detection/linknet/tensorflow.py +68 -88
  73. doctr/models/detection/predictor/pytorch.py +16 -6
  74. doctr/models/detection/predictor/tensorflow.py +13 -5
  75. doctr/models/detection/zoo.py +19 -16
  76. doctr/models/factory/hub.py +20 -10
  77. doctr/models/kie_predictor/base.py +2 -1
  78. doctr/models/kie_predictor/pytorch.py +28 -36
  79. doctr/models/kie_predictor/tensorflow.py +27 -27
  80. doctr/models/modules/__init__.py +1 -0
  81. doctr/models/modules/layers/__init__.py +6 -0
  82. doctr/models/modules/layers/pytorch.py +166 -0
  83. doctr/models/modules/layers/tensorflow.py +175 -0
  84. doctr/models/modules/transformer/pytorch.py +24 -22
  85. doctr/models/modules/transformer/tensorflow.py +6 -4
  86. doctr/models/modules/vision_transformer/pytorch.py +2 -4
  87. doctr/models/modules/vision_transformer/tensorflow.py +2 -4
  88. doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
  89. doctr/models/predictor/base.py +14 -3
  90. doctr/models/predictor/pytorch.py +26 -29
  91. doctr/models/predictor/tensorflow.py +25 -22
  92. doctr/models/preprocessor/pytorch.py +14 -9
  93. doctr/models/preprocessor/tensorflow.py +10 -5
  94. doctr/models/recognition/core.py +4 -1
  95. doctr/models/recognition/crnn/pytorch.py +23 -16
  96. doctr/models/recognition/crnn/tensorflow.py +25 -17
  97. doctr/models/recognition/master/base.py +4 -1
  98. doctr/models/recognition/master/pytorch.py +20 -9
  99. doctr/models/recognition/master/tensorflow.py +20 -8
  100. doctr/models/recognition/parseq/base.py +4 -1
  101. doctr/models/recognition/parseq/pytorch.py +28 -22
  102. doctr/models/recognition/parseq/tensorflow.py +22 -11
  103. doctr/models/recognition/predictor/_utils.py +3 -2
  104. doctr/models/recognition/predictor/pytorch.py +3 -2
  105. doctr/models/recognition/predictor/tensorflow.py +2 -1
  106. doctr/models/recognition/sar/pytorch.py +14 -7
  107. doctr/models/recognition/sar/tensorflow.py +23 -14
  108. doctr/models/recognition/utils.py +5 -1
  109. doctr/models/recognition/vitstr/base.py +4 -1
  110. doctr/models/recognition/vitstr/pytorch.py +22 -13
  111. doctr/models/recognition/vitstr/tensorflow.py +21 -10
  112. doctr/models/recognition/zoo.py +4 -2
  113. doctr/models/utils/pytorch.py +24 -6
  114. doctr/models/utils/tensorflow.py +22 -3
  115. doctr/models/zoo.py +21 -3
  116. doctr/transforms/functional/base.py +8 -3
  117. doctr/transforms/functional/pytorch.py +23 -6
  118. doctr/transforms/functional/tensorflow.py +25 -5
  119. doctr/transforms/modules/base.py +12 -5
  120. doctr/transforms/modules/pytorch.py +10 -12
  121. doctr/transforms/modules/tensorflow.py +17 -9
  122. doctr/utils/common_types.py +1 -1
  123. doctr/utils/data.py +4 -2
  124. doctr/utils/fonts.py +3 -2
  125. doctr/utils/geometry.py +95 -26
  126. doctr/utils/metrics.py +36 -22
  127. doctr/utils/multithreading.py +5 -3
  128. doctr/utils/repr.py +3 -1
  129. doctr/utils/visualization.py +31 -8
  130. doctr/version.py +1 -1
  131. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
  132. python_doctr-0.8.1.dist-info/RECORD +173 -0
  133. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
  134. python_doctr-0.7.0.dist-info/RECORD +0 -161
  135. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
  136. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
  137. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/io/image/pytorch.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -20,13 +20,14 @@ def tensor_from_pil(pil_img: Image, dtype: torch.dtype = torch.float32) -> torch
20
20
  """Convert a PIL Image to a PyTorch tensor
21
21
 
22
22
  Args:
23
+ ----
23
24
  pil_img: a PIL image
24
25
  dtype: the output tensor data type
25
26
 
26
27
  Returns:
28
+ -------
27
29
  decoded image as tensor
28
30
  """
29
-
30
31
  if dtype == torch.float32:
31
32
  img = to_tensor(pil_img)
32
33
  else:
@@ -39,13 +40,14 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
39
40
  """Read an image file as a PyTorch tensor
40
41
 
41
42
  Args:
43
+ ----
42
44
  img_path: location of the image file
43
45
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
44
46
 
45
47
  Returns:
48
+ -------
46
49
  decoded image as a tensor
47
50
  """
48
-
49
51
  if dtype not in (torch.uint8, torch.float16, torch.float32):
50
52
  raise ValueError("insupported value for dtype")
51
53
 
@@ -58,13 +60,14 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
58
60
  """Read a byte stream as a PyTorch tensor
59
61
 
60
62
  Args:
63
+ ----
61
64
  img_content: bytes of a decoded image
62
65
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
63
66
 
64
67
  Returns:
68
+ -------
65
69
  decoded image as a tensor
66
70
  """
67
-
68
71
  if dtype not in (torch.uint8, torch.float16, torch.float32):
69
72
  raise ValueError("insupported value for dtype")
70
73
 
@@ -77,13 +80,14 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
77
80
  """Read an image file as a PyTorch tensor
78
81
 
79
82
  Args:
80
- img: image encoded as a numpy array of shape (H, W, C) in np.uint8
83
+ ----
84
+ npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
81
85
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
82
86
 
83
87
  Returns:
88
+ -------
84
89
  same image as a tensor of shape (C, H, W)
85
90
  """
86
-
87
91
  if dtype not in (torch.uint8, torch.float16, torch.float32):
88
92
  raise ValueError("insupported value for dtype")
89
93
 
@@ -101,4 +105,5 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
101
105
 
102
106
 
103
107
  def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
104
- return img.shape[-2:] # type: ignore[return-value]
108
+ """Get the shape of an image"""
109
+ return img.shape[-2:]
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -19,13 +19,14 @@ def tensor_from_pil(pil_img: Image, dtype: tf.dtypes.DType = tf.float32) -> tf.T
19
19
  """Convert a PIL Image to a TensorFlow tensor
20
20
 
21
21
  Args:
22
+ ----
22
23
  pil_img: a PIL image
23
24
  dtype: the output tensor data type
24
25
 
25
26
  Returns:
27
+ -------
26
28
  decoded image as tensor
27
29
  """
28
-
29
30
  npy_img = img_to_array(pil_img)
30
31
 
31
32
  return tensor_from_numpy(npy_img, dtype)
@@ -35,13 +36,14 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: tf.dtypes.DType = tf.float
35
36
  """Read an image file as a TensorFlow tensor
36
37
 
37
38
  Args:
39
+ ----
38
40
  img_path: location of the image file
39
41
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
40
42
 
41
43
  Returns:
44
+ -------
42
45
  decoded image as a tensor
43
46
  """
44
-
45
47
  if dtype not in (tf.uint8, tf.float16, tf.float32):
46
48
  raise ValueError("insupported value for dtype")
47
49
 
@@ -59,13 +61,14 @@ def decode_img_as_tensor(img_content: bytes, dtype: tf.dtypes.DType = tf.float32
59
61
  """Read a byte stream as a TensorFlow tensor
60
62
 
61
63
  Args:
64
+ ----
62
65
  img_content: bytes of a decoded image
63
66
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
64
67
 
65
68
  Returns:
69
+ -------
66
70
  decoded image as a tensor
67
71
  """
68
-
69
72
  if dtype not in (tf.uint8, tf.float16, tf.float32):
70
73
  raise ValueError("insupported value for dtype")
71
74
 
@@ -82,13 +85,14 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
82
85
  """Read an image file as a TensorFlow tensor
83
86
 
84
87
  Args:
85
- img: image encoded as a numpy array of shape (H, W, C) in np.uint8
88
+ ----
89
+ npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
86
90
  dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
87
91
 
88
92
  Returns:
93
+ -------
89
94
  same image as a tensor of shape (H, W, C)
90
95
  """
91
-
92
96
  if dtype not in (tf.uint8, tf.float16, tf.float32):
93
97
  raise ValueError("insupported value for dtype")
94
98
 
@@ -102,4 +106,5 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
102
106
 
103
107
 
104
108
  def get_img_shape(img: tf.Tensor) -> Tuple[int, int]:
109
+ """Get the shape of an image"""
105
110
  return img.shape[:2]
doctr/io/pdf.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -22,20 +22,21 @@ def read_pdf(
22
22
  ) -> List[np.ndarray]:
23
23
  """Read a PDF file and convert it into an image in numpy format
24
24
 
25
- >>> from doctr.documents import read_pdf
25
+ >>> from doctr.io import read_pdf
26
26
  >>> doc = read_pdf("path/to/your/doc.pdf")
27
27
 
28
28
  Args:
29
+ ----
29
30
  file: the path to the PDF file
30
31
  scale: rendering scale (1 corresponds to 72dpi)
31
32
  rgb_mode: if True, the output will be RGB, otherwise BGR
32
33
  password: a password to unlock the document, if encrypted
33
- kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
34
+ **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
34
35
 
35
36
  Returns:
37
+ -------
36
38
  the list of pages decoded as numpy ndarray of shape H x W x C
37
39
  """
38
-
39
40
  # Rasterise pages to numpy ndarrays with pypdfium2
40
41
  pdf = pdfium.PdfDocument(file, password=password, autoclose=True)
41
42
  return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
doctr/io/reader.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -24,29 +24,34 @@ class DocumentFile:
24
24
  def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
25
25
  """Read a PDF file
26
26
 
27
- >>> from doctr.documents import DocumentFile
27
+ >>> from doctr.io import DocumentFile
28
28
  >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
29
29
 
30
30
  Args:
31
+ ----
31
32
  file: the path to the PDF file or a binary stream
33
+ **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
32
34
 
33
35
  Returns:
36
+ -------
34
37
  the list of pages decoded as numpy ndarray of shape H x W x 3
35
38
  """
36
-
37
39
  return read_pdf(file, **kwargs)
38
40
 
39
41
  @classmethod
40
42
  def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
41
43
  """Interpret a web page as a PDF document
42
44
 
43
- >>> from doctr.documents import DocumentFile
45
+ >>> from doctr.io import DocumentFile
44
46
  >>> doc = DocumentFile.from_url("https://www.yoursite.com")
45
47
 
46
48
  Args:
49
+ ----
47
50
  url: the URL of the target web page
51
+ **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
48
52
 
49
53
  Returns:
54
+ -------
50
55
  the list of pages decoded as numpy ndarray of shape H x W x 3
51
56
  """
52
57
  pdf_stream = read_html(url)
@@ -56,13 +61,16 @@ class DocumentFile:
56
61
  def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]:
57
62
  """Read an image file (or a collection of image files) and convert it into an image in numpy format
58
63
 
59
- >>> from doctr.documents import DocumentFile
64
+ >>> from doctr.io import DocumentFile
60
65
  >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
61
66
 
62
67
  Args:
68
+ ----
63
69
  files: the path to the image file or a binary stream, or a collection of those
70
+ **kwargs: additional parameters to :meth:`doctr.io.image.read_img_as_numpy`
64
71
 
65
72
  Returns:
73
+ -------
66
74
  the list of pages decoded as numpy ndarray of shape H x W x 3
67
75
  """
68
76
  if isinstance(files, (str, Path, bytes)):
doctr/models/_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -11,43 +11,54 @@ import cv2
11
11
  import numpy as np
12
12
  from langdetect import LangDetectException, detect_langs
13
13
 
14
- __all__ = ["estimate_orientation", "get_bitmap_angle", "get_language", "invert_data_structure"]
14
+ __all__ = ["estimate_orientation", "get_language", "invert_data_structure"]
15
15
 
16
16
 
17
17
  def get_max_width_length_ratio(contour: np.ndarray) -> float:
18
18
  """Get the maximum shape ratio of a contour.
19
19
 
20
20
  Args:
21
+ ----
21
22
  contour: the contour from cv2.findContour
22
23
 
23
- Returns: the maximum shape ratio
24
+ Returns:
25
+ -------
26
+ the maximum shape ratio
24
27
  """
25
28
  _, (w, h), _ = cv2.minAreaRect(contour)
26
29
  return max(w / h, h / w)
27
30
 
28
31
 
29
- def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
32
+ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> int:
30
33
  """Estimate the angle of the general document orientation based on the
31
34
  lines of the document and the assumption that they should be horizontal.
32
35
 
33
36
  Args:
34
- img: the img to analyze
37
+ ----
38
+ img: the img or bitmap to analyze (H, W, C)
35
39
  n_ct: the number of contours used for the orientation estimation
36
40
  ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
37
41
 
38
42
  Returns:
43
+ -------
39
44
  the angle of the general document orientation
40
45
  """
41
- gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
- gray_img = cv2.medianBlur(gray_img, 5)
43
- thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
46
+ assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
47
+ max_value = np.max(img)
48
+ min_value = np.min(img)
49
+ if max_value <= 1 and min_value >= 0 or (max_value <= 255 and min_value >= 0 and img.shape[-1] == 1):
50
+ thresh = img.astype(np.uint8)
51
+ if max_value <= 255 and min_value >= 0 and img.shape[-1] == 3:
52
+ gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
53
+ gray_img = cv2.medianBlur(gray_img, 5)
54
+ thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # type: ignore[assignment]
44
55
 
45
56
  # try to merge words in lines
46
57
  (h, w) = img.shape[:2]
47
58
  k_x = max(1, (floor(w / 100)))
48
59
  k_y = max(1, (floor(h / 100)))
49
60
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
50
- thresh = cv2.dilate(thresh, kernel, iterations=1)
61
+ thresh = cv2.dilate(thresh, kernel, iterations=1) # type: ignore[assignment]
51
62
 
52
63
  # extract contours
53
64
  contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
@@ -66,45 +77,8 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
66
77
  if len(angles) == 0:
67
78
  return 0 # in case no angles is found
68
79
  else:
69
- return -median_low(angles)
70
-
71
-
72
- def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.0) -> float:
73
- """From a binarized segmentation map, find contours and fit min area rectangles to determine page angle
74
-
75
- Args:
76
- bitmap: binarized segmentation map
77
- n_ct: number of contours to use to fit page angle
78
- std_max: maximum deviation of the angle distribution to consider the mean angle reliable
79
-
80
- Returns:
81
- The angle of the page
82
- """
83
- # Find all contours on binarized seg map
84
- contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
85
- # Sort contours
86
- contours = sorted(contours, key=cv2.contourArea, reverse=True)
87
-
88
- # Find largest contours and fit angles
89
- # Track heights and widths to find aspect ratio (determine is rotation is clockwise)
90
- angles, heights, widths = [], [], []
91
- for ct in contours[:n_ct]:
92
- _, (w, h), alpha = cv2.minAreaRect(ct)
93
- widths.append(w)
94
- heights.append(h)
95
- angles.append(alpha)
96
-
97
- if np.std(angles) > std_max:
98
- # Edge case with angles of both 0 and 90°, or multi_oriented docs
99
- angle = 0.0
100
- else:
101
- angle = -np.mean(angles)
102
- # Determine rotation direction (clockwise/counterclockwise)
103
- # Angle coverage: [-90°, +90°], half of the quadrant
104
- if np.sum(widths) < np.sum(heights): # CounterClockwise
105
- angle = 90 + angle
106
-
107
- return angle
80
+ median = -median_low(angles)
81
+ return round(median) if abs(median) != 0 else 0
108
82
 
109
83
 
110
84
  def rectify_crops(
@@ -149,9 +123,13 @@ def rectify_loc_preds(
149
123
  def get_language(text: str) -> Tuple[str, float]:
150
124
  """Get languages of a text using langdetect model.
151
125
  Get the language with the highest probability or no language if only a few words or a low probability
126
+
152
127
  Args:
128
+ ----
153
129
  text (str): text
130
+
154
131
  Returns:
132
+ -------
155
133
  The detected language in ISO 639 code and confidence score
156
134
  """
157
135
  try:
@@ -164,21 +142,20 @@ def get_language(text: str) -> Tuple[str, float]:
164
142
 
165
143
 
166
144
  def invert_data_structure(
167
- x: Union[List[Dict[str, Any]], Dict[str, List[Any]]]
145
+ x: Union[List[Dict[str, Any]], Dict[str, List[Any]]],
168
146
  ) -> Union[List[Dict[str, Any]], Dict[str, List[Any]]]:
169
147
  """Invert a List of Dict of elements to a Dict of list of elements and the other way around
170
148
 
171
149
  Args:
150
+ ----
172
151
  x: a list of dictionaries with the same keys or a dictionary of lists of the same length
173
152
 
174
153
  Returns:
154
+ -------
175
155
  dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
176
156
  """
177
-
178
157
  if isinstance(x, dict):
179
- assert (
180
- len(set([len(v) for v in x.values()])) == 1
181
- ), "All the lists in the dictionnary should have the same length."
158
+ assert len({len(v) for v in x.values()}) == 1, "All the lists in the dictionnary should have the same length."
182
159
  return [dict(zip(x, t)) for t in zip(*x.values())]
183
160
  elif isinstance(x, list):
184
161
  return {k: [dic[k] for dic in x] for k in x[0]}
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -12,11 +12,11 @@ __all__ = ["BarCodeDetector"]
12
12
 
13
13
 
14
14
  class BarCodeDetector:
15
-
16
15
  """Implements a Bar-code detector.
17
16
  For now, only horizontal (or with a small angle) bar-codes are supported
18
17
 
19
18
  Args:
19
+ ----
20
20
  min_size: minimum relative size of a barcode on the page
21
21
  canny_minval: lower bound for canny hysteresis
22
22
  canny_maxval: upper-bound for canny hysteresis
@@ -35,7 +35,8 @@ class BarCodeDetector:
35
35
  Args:
36
36
  img: np image
37
37
 
38
- Returns:
38
+ Returns
39
+ -------
39
40
  A list of tuples: [(xmin, ymin, xmax, ymax), ...] containing barcodes rel. coordinates
40
41
  """
41
42
  # get image size and define parameters
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -14,11 +14,11 @@ __all__ = ["FaceDetector"]
14
14
 
15
15
 
16
16
  class FaceDetector(NestedObject):
17
-
18
17
  """Implements a face detector to detect profile pictures on resumes, IDS, driving licenses, passports...
19
18
  Based on open CV CascadeClassifier (haarcascades)
20
19
 
21
20
  Args:
21
+ ----
22
22
  n_faces: maximal number of faces to detect on a single image, default = 1
23
23
  """
24
24
 
@@ -42,9 +42,11 @@ class FaceDetector(NestedObject):
42
42
  """Detect n_faces on the img
43
43
 
44
44
  Args:
45
+ ----
45
46
  img: image to detect faces on
46
47
 
47
48
  Returns:
49
+ -------
48
50
  A list of size n_faces, each face is a tuple of relative xmin, ymin, xmax, ymax
49
51
  """
50
52
  height, width = img.shape[:2]