python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/datasets/__init__.py +2 -0
- doctr/datasets/cord.py +6 -4
- doctr/datasets/datasets/base.py +3 -2
- doctr/datasets/datasets/pytorch.py +4 -2
- doctr/datasets/datasets/tensorflow.py +4 -2
- doctr/datasets/detection.py +6 -3
- doctr/datasets/doc_artefacts.py +2 -1
- doctr/datasets/funsd.py +7 -8
- doctr/datasets/generator/base.py +3 -2
- doctr/datasets/generator/pytorch.py +3 -1
- doctr/datasets/generator/tensorflow.py +3 -1
- doctr/datasets/ic03.py +3 -2
- doctr/datasets/ic13.py +2 -1
- doctr/datasets/iiit5k.py +6 -4
- doctr/datasets/iiithws.py +2 -1
- doctr/datasets/imgur5k.py +3 -2
- doctr/datasets/loader.py +4 -2
- doctr/datasets/mjsynth.py +2 -1
- doctr/datasets/ocr.py +2 -1
- doctr/datasets/orientation.py +40 -0
- doctr/datasets/recognition.py +3 -2
- doctr/datasets/sroie.py +2 -1
- doctr/datasets/svhn.py +2 -1
- doctr/datasets/svt.py +3 -2
- doctr/datasets/synthtext.py +2 -1
- doctr/datasets/utils.py +27 -11
- doctr/datasets/vocabs.py +26 -1
- doctr/datasets/wildreceipt.py +111 -0
- doctr/file_utils.py +3 -1
- doctr/io/elements.py +52 -35
- doctr/io/html.py +5 -3
- doctr/io/image/base.py +5 -4
- doctr/io/image/pytorch.py +12 -7
- doctr/io/image/tensorflow.py +11 -6
- doctr/io/pdf.py +5 -4
- doctr/io/reader.py +13 -5
- doctr/models/_utils.py +30 -53
- doctr/models/artefacts/barcode.py +4 -3
- doctr/models/artefacts/face.py +4 -2
- doctr/models/builder.py +58 -43
- doctr/models/classification/__init__.py +1 -0
- doctr/models/classification/magc_resnet/pytorch.py +5 -2
- doctr/models/classification/magc_resnet/tensorflow.py +5 -2
- doctr/models/classification/mobilenet/pytorch.py +16 -4
- doctr/models/classification/mobilenet/tensorflow.py +29 -20
- doctr/models/classification/predictor/pytorch.py +3 -2
- doctr/models/classification/predictor/tensorflow.py +2 -1
- doctr/models/classification/resnet/pytorch.py +23 -13
- doctr/models/classification/resnet/tensorflow.py +33 -26
- doctr/models/classification/textnet/__init__.py +6 -0
- doctr/models/classification/textnet/pytorch.py +275 -0
- doctr/models/classification/textnet/tensorflow.py +267 -0
- doctr/models/classification/vgg/pytorch.py +4 -2
- doctr/models/classification/vgg/tensorflow.py +5 -2
- doctr/models/classification/vit/pytorch.py +9 -3
- doctr/models/classification/vit/tensorflow.py +9 -3
- doctr/models/classification/zoo.py +7 -2
- doctr/models/core.py +1 -1
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/pytorch.py +7 -1
- doctr/models/detection/_utils/tensorflow.py +7 -3
- doctr/models/detection/core.py +9 -3
- doctr/models/detection/differentiable_binarization/base.py +37 -25
- doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
- doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +256 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +12 -5
- doctr/models/detection/linknet/pytorch.py +28 -15
- doctr/models/detection/linknet/tensorflow.py +68 -88
- doctr/models/detection/predictor/pytorch.py +16 -6
- doctr/models/detection/predictor/tensorflow.py +13 -5
- doctr/models/detection/zoo.py +19 -16
- doctr/models/factory/hub.py +20 -10
- doctr/models/kie_predictor/base.py +2 -1
- doctr/models/kie_predictor/pytorch.py +28 -36
- doctr/models/kie_predictor/tensorflow.py +27 -27
- doctr/models/modules/__init__.py +1 -0
- doctr/models/modules/layers/__init__.py +6 -0
- doctr/models/modules/layers/pytorch.py +166 -0
- doctr/models/modules/layers/tensorflow.py +175 -0
- doctr/models/modules/transformer/pytorch.py +24 -22
- doctr/models/modules/transformer/tensorflow.py +6 -4
- doctr/models/modules/vision_transformer/pytorch.py +2 -4
- doctr/models/modules/vision_transformer/tensorflow.py +2 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
- doctr/models/predictor/base.py +14 -3
- doctr/models/predictor/pytorch.py +26 -29
- doctr/models/predictor/tensorflow.py +25 -22
- doctr/models/preprocessor/pytorch.py +14 -9
- doctr/models/preprocessor/tensorflow.py +10 -5
- doctr/models/recognition/core.py +4 -1
- doctr/models/recognition/crnn/pytorch.py +23 -16
- doctr/models/recognition/crnn/tensorflow.py +25 -17
- doctr/models/recognition/master/base.py +4 -1
- doctr/models/recognition/master/pytorch.py +20 -9
- doctr/models/recognition/master/tensorflow.py +20 -8
- doctr/models/recognition/parseq/base.py +4 -1
- doctr/models/recognition/parseq/pytorch.py +28 -22
- doctr/models/recognition/parseq/tensorflow.py +22 -11
- doctr/models/recognition/predictor/_utils.py +3 -2
- doctr/models/recognition/predictor/pytorch.py +3 -2
- doctr/models/recognition/predictor/tensorflow.py +2 -1
- doctr/models/recognition/sar/pytorch.py +14 -7
- doctr/models/recognition/sar/tensorflow.py +23 -14
- doctr/models/recognition/utils.py +5 -1
- doctr/models/recognition/vitstr/base.py +4 -1
- doctr/models/recognition/vitstr/pytorch.py +22 -13
- doctr/models/recognition/vitstr/tensorflow.py +21 -10
- doctr/models/recognition/zoo.py +4 -2
- doctr/models/utils/pytorch.py +24 -6
- doctr/models/utils/tensorflow.py +22 -3
- doctr/models/zoo.py +21 -3
- doctr/transforms/functional/base.py +8 -3
- doctr/transforms/functional/pytorch.py +23 -6
- doctr/transforms/functional/tensorflow.py +25 -5
- doctr/transforms/modules/base.py +12 -5
- doctr/transforms/modules/pytorch.py +10 -12
- doctr/transforms/modules/tensorflow.py +17 -9
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +4 -2
- doctr/utils/fonts.py +3 -2
- doctr/utils/geometry.py +95 -26
- doctr/utils/metrics.py +36 -22
- doctr/utils/multithreading.py +5 -3
- doctr/utils/repr.py +3 -1
- doctr/utils/visualization.py +31 -8
- doctr/version.py +1 -1
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
- python_doctr-0.8.1.dist-info/RECORD +173 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
- python_doctr-0.7.0.dist-info/RECORD +0 -161
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/io/image/pytorch.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -20,13 +20,14 @@ def tensor_from_pil(pil_img: Image, dtype: torch.dtype = torch.float32) -> torch
|
|
|
20
20
|
"""Convert a PIL Image to a PyTorch tensor
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
|
+
----
|
|
23
24
|
pil_img: a PIL image
|
|
24
25
|
dtype: the output tensor data type
|
|
25
26
|
|
|
26
27
|
Returns:
|
|
28
|
+
-------
|
|
27
29
|
decoded image as tensor
|
|
28
30
|
"""
|
|
29
|
-
|
|
30
31
|
if dtype == torch.float32:
|
|
31
32
|
img = to_tensor(pil_img)
|
|
32
33
|
else:
|
|
@@ -39,13 +40,14 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
|
|
|
39
40
|
"""Read an image file as a PyTorch tensor
|
|
40
41
|
|
|
41
42
|
Args:
|
|
43
|
+
----
|
|
42
44
|
img_path: location of the image file
|
|
43
45
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
44
46
|
|
|
45
47
|
Returns:
|
|
48
|
+
-------
|
|
46
49
|
decoded image as a tensor
|
|
47
50
|
"""
|
|
48
|
-
|
|
49
51
|
if dtype not in (torch.uint8, torch.float16, torch.float32):
|
|
50
52
|
raise ValueError("insupported value for dtype")
|
|
51
53
|
|
|
@@ -58,13 +60,14 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
|
|
|
58
60
|
"""Read a byte stream as a PyTorch tensor
|
|
59
61
|
|
|
60
62
|
Args:
|
|
63
|
+
----
|
|
61
64
|
img_content: bytes of a decoded image
|
|
62
65
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
63
66
|
|
|
64
67
|
Returns:
|
|
68
|
+
-------
|
|
65
69
|
decoded image as a tensor
|
|
66
70
|
"""
|
|
67
|
-
|
|
68
71
|
if dtype not in (torch.uint8, torch.float16, torch.float32):
|
|
69
72
|
raise ValueError("insupported value for dtype")
|
|
70
73
|
|
|
@@ -77,13 +80,14 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
|
|
|
77
80
|
"""Read an image file as a PyTorch tensor
|
|
78
81
|
|
|
79
82
|
Args:
|
|
80
|
-
|
|
83
|
+
----
|
|
84
|
+
npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
|
|
81
85
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
82
86
|
|
|
83
87
|
Returns:
|
|
88
|
+
-------
|
|
84
89
|
same image as a tensor of shape (C, H, W)
|
|
85
90
|
"""
|
|
86
|
-
|
|
87
91
|
if dtype not in (torch.uint8, torch.float16, torch.float32):
|
|
88
92
|
raise ValueError("insupported value for dtype")
|
|
89
93
|
|
|
@@ -101,4 +105,5 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
|
|
|
101
105
|
|
|
102
106
|
|
|
103
107
|
def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
|
|
104
|
-
|
|
108
|
+
"""Get the shape of an image"""
|
|
109
|
+
return img.shape[-2:]
|
doctr/io/image/tensorflow.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -19,13 +19,14 @@ def tensor_from_pil(pil_img: Image, dtype: tf.dtypes.DType = tf.float32) -> tf.T
|
|
|
19
19
|
"""Convert a PIL Image to a TensorFlow tensor
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
|
+
----
|
|
22
23
|
pil_img: a PIL image
|
|
23
24
|
dtype: the output tensor data type
|
|
24
25
|
|
|
25
26
|
Returns:
|
|
27
|
+
-------
|
|
26
28
|
decoded image as tensor
|
|
27
29
|
"""
|
|
28
|
-
|
|
29
30
|
npy_img = img_to_array(pil_img)
|
|
30
31
|
|
|
31
32
|
return tensor_from_numpy(npy_img, dtype)
|
|
@@ -35,13 +36,14 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: tf.dtypes.DType = tf.float
|
|
|
35
36
|
"""Read an image file as a TensorFlow tensor
|
|
36
37
|
|
|
37
38
|
Args:
|
|
39
|
+
----
|
|
38
40
|
img_path: location of the image file
|
|
39
41
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
40
42
|
|
|
41
43
|
Returns:
|
|
44
|
+
-------
|
|
42
45
|
decoded image as a tensor
|
|
43
46
|
"""
|
|
44
|
-
|
|
45
47
|
if dtype not in (tf.uint8, tf.float16, tf.float32):
|
|
46
48
|
raise ValueError("insupported value for dtype")
|
|
47
49
|
|
|
@@ -59,13 +61,14 @@ def decode_img_as_tensor(img_content: bytes, dtype: tf.dtypes.DType = tf.float32
|
|
|
59
61
|
"""Read a byte stream as a TensorFlow tensor
|
|
60
62
|
|
|
61
63
|
Args:
|
|
64
|
+
----
|
|
62
65
|
img_content: bytes of a decoded image
|
|
63
66
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
64
67
|
|
|
65
68
|
Returns:
|
|
69
|
+
-------
|
|
66
70
|
decoded image as a tensor
|
|
67
71
|
"""
|
|
68
|
-
|
|
69
72
|
if dtype not in (tf.uint8, tf.float16, tf.float32):
|
|
70
73
|
raise ValueError("insupported value for dtype")
|
|
71
74
|
|
|
@@ -82,13 +85,14 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
|
|
|
82
85
|
"""Read an image file as a TensorFlow tensor
|
|
83
86
|
|
|
84
87
|
Args:
|
|
85
|
-
|
|
88
|
+
----
|
|
89
|
+
npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
|
|
86
90
|
dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
|
|
87
91
|
|
|
88
92
|
Returns:
|
|
93
|
+
-------
|
|
89
94
|
same image as a tensor of shape (H, W, C)
|
|
90
95
|
"""
|
|
91
|
-
|
|
92
96
|
if dtype not in (tf.uint8, tf.float16, tf.float32):
|
|
93
97
|
raise ValueError("insupported value for dtype")
|
|
94
98
|
|
|
@@ -102,4 +106,5 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
|
|
|
102
106
|
|
|
103
107
|
|
|
104
108
|
def get_img_shape(img: tf.Tensor) -> Tuple[int, int]:
|
|
109
|
+
"""Get the shape of an image"""
|
|
105
110
|
return img.shape[:2]
|
doctr/io/pdf.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -22,20 +22,21 @@ def read_pdf(
|
|
|
22
22
|
) -> List[np.ndarray]:
|
|
23
23
|
"""Read a PDF file and convert it into an image in numpy format
|
|
24
24
|
|
|
25
|
-
>>> from doctr.
|
|
25
|
+
>>> from doctr.io import read_pdf
|
|
26
26
|
>>> doc = read_pdf("path/to/your/doc.pdf")
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
|
+
----
|
|
29
30
|
file: the path to the PDF file
|
|
30
31
|
scale: rendering scale (1 corresponds to 72dpi)
|
|
31
32
|
rgb_mode: if True, the output will be RGB, otherwise BGR
|
|
32
33
|
password: a password to unlock the document, if encrypted
|
|
33
|
-
kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
34
|
+
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
34
35
|
|
|
35
36
|
Returns:
|
|
37
|
+
-------
|
|
36
38
|
the list of pages decoded as numpy ndarray of shape H x W x C
|
|
37
39
|
"""
|
|
38
|
-
|
|
39
40
|
# Rasterise pages to numpy ndarrays with pypdfium2
|
|
40
41
|
pdf = pdfium.PdfDocument(file, password=password, autoclose=True)
|
|
41
42
|
return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
|
doctr/io/reader.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -24,29 +24,34 @@ class DocumentFile:
|
|
|
24
24
|
def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
|
|
25
25
|
"""Read a PDF file
|
|
26
26
|
|
|
27
|
-
>>> from doctr.
|
|
27
|
+
>>> from doctr.io import DocumentFile
|
|
28
28
|
>>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
+
----
|
|
31
32
|
file: the path to the PDF file or a binary stream
|
|
33
|
+
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
32
34
|
|
|
33
35
|
Returns:
|
|
36
|
+
-------
|
|
34
37
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
35
38
|
"""
|
|
36
|
-
|
|
37
39
|
return read_pdf(file, **kwargs)
|
|
38
40
|
|
|
39
41
|
@classmethod
|
|
40
42
|
def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
|
|
41
43
|
"""Interpret a web page as a PDF document
|
|
42
44
|
|
|
43
|
-
>>> from doctr.
|
|
45
|
+
>>> from doctr.io import DocumentFile
|
|
44
46
|
>>> doc = DocumentFile.from_url("https://www.yoursite.com")
|
|
45
47
|
|
|
46
48
|
Args:
|
|
49
|
+
----
|
|
47
50
|
url: the URL of the target web page
|
|
51
|
+
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
|
|
48
52
|
|
|
49
53
|
Returns:
|
|
54
|
+
-------
|
|
50
55
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
51
56
|
"""
|
|
52
57
|
pdf_stream = read_html(url)
|
|
@@ -56,13 +61,16 @@ class DocumentFile:
|
|
|
56
61
|
def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]:
|
|
57
62
|
"""Read an image file (or a collection of image files) and convert it into an image in numpy format
|
|
58
63
|
|
|
59
|
-
>>> from doctr.
|
|
64
|
+
>>> from doctr.io import DocumentFile
|
|
60
65
|
>>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
|
|
61
66
|
|
|
62
67
|
Args:
|
|
68
|
+
----
|
|
63
69
|
files: the path to the image file or a binary stream, or a collection of those
|
|
70
|
+
**kwargs: additional parameters to :meth:`doctr.io.image.read_img_as_numpy`
|
|
64
71
|
|
|
65
72
|
Returns:
|
|
73
|
+
-------
|
|
66
74
|
the list of pages decoded as numpy ndarray of shape H x W x 3
|
|
67
75
|
"""
|
|
68
76
|
if isinstance(files, (str, Path, bytes)):
|
doctr/models/_utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -11,43 +11,54 @@ import cv2
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from langdetect import LangDetectException, detect_langs
|
|
13
13
|
|
|
14
|
-
__all__ = ["estimate_orientation", "
|
|
14
|
+
__all__ = ["estimate_orientation", "get_language", "invert_data_structure"]
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def get_max_width_length_ratio(contour: np.ndarray) -> float:
|
|
18
18
|
"""Get the maximum shape ratio of a contour.
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
+
----
|
|
21
22
|
contour: the contour from cv2.findContour
|
|
22
23
|
|
|
23
|
-
Returns:
|
|
24
|
+
Returns:
|
|
25
|
+
-------
|
|
26
|
+
the maximum shape ratio
|
|
24
27
|
"""
|
|
25
28
|
_, (w, h), _ = cv2.minAreaRect(contour)
|
|
26
29
|
return max(w / h, h / w)
|
|
27
30
|
|
|
28
31
|
|
|
29
|
-
def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) ->
|
|
32
|
+
def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> int:
|
|
30
33
|
"""Estimate the angle of the general document orientation based on the
|
|
31
34
|
lines of the document and the assumption that they should be horizontal.
|
|
32
35
|
|
|
33
36
|
Args:
|
|
34
|
-
|
|
37
|
+
----
|
|
38
|
+
img: the img or bitmap to analyze (H, W, C)
|
|
35
39
|
n_ct: the number of contours used for the orientation estimation
|
|
36
40
|
ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
|
|
37
41
|
|
|
38
42
|
Returns:
|
|
43
|
+
-------
|
|
39
44
|
the angle of the general document orientation
|
|
40
45
|
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
|
|
47
|
+
max_value = np.max(img)
|
|
48
|
+
min_value = np.min(img)
|
|
49
|
+
if max_value <= 1 and min_value >= 0 or (max_value <= 255 and min_value >= 0 and img.shape[-1] == 1):
|
|
50
|
+
thresh = img.astype(np.uint8)
|
|
51
|
+
if max_value <= 255 and min_value >= 0 and img.shape[-1] == 3:
|
|
52
|
+
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
53
|
+
gray_img = cv2.medianBlur(gray_img, 5)
|
|
54
|
+
thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # type: ignore[assignment]
|
|
44
55
|
|
|
45
56
|
# try to merge words in lines
|
|
46
57
|
(h, w) = img.shape[:2]
|
|
47
58
|
k_x = max(1, (floor(w / 100)))
|
|
48
59
|
k_y = max(1, (floor(h / 100)))
|
|
49
60
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
|
|
50
|
-
thresh = cv2.dilate(thresh, kernel, iterations=1)
|
|
61
|
+
thresh = cv2.dilate(thresh, kernel, iterations=1) # type: ignore[assignment]
|
|
51
62
|
|
|
52
63
|
# extract contours
|
|
53
64
|
contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
|
@@ -66,45 +77,8 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
|
|
|
66
77
|
if len(angles) == 0:
|
|
67
78
|
return 0 # in case no angles is found
|
|
68
79
|
else:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.0) -> float:
|
|
73
|
-
"""From a binarized segmentation map, find contours and fit min area rectangles to determine page angle
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
bitmap: binarized segmentation map
|
|
77
|
-
n_ct: number of contours to use to fit page angle
|
|
78
|
-
std_max: maximum deviation of the angle distribution to consider the mean angle reliable
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
The angle of the page
|
|
82
|
-
"""
|
|
83
|
-
# Find all contours on binarized seg map
|
|
84
|
-
contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
|
85
|
-
# Sort contours
|
|
86
|
-
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
|
87
|
-
|
|
88
|
-
# Find largest contours and fit angles
|
|
89
|
-
# Track heights and widths to find aspect ratio (determine is rotation is clockwise)
|
|
90
|
-
angles, heights, widths = [], [], []
|
|
91
|
-
for ct in contours[:n_ct]:
|
|
92
|
-
_, (w, h), alpha = cv2.minAreaRect(ct)
|
|
93
|
-
widths.append(w)
|
|
94
|
-
heights.append(h)
|
|
95
|
-
angles.append(alpha)
|
|
96
|
-
|
|
97
|
-
if np.std(angles) > std_max:
|
|
98
|
-
# Edge case with angles of both 0 and 90°, or multi_oriented docs
|
|
99
|
-
angle = 0.0
|
|
100
|
-
else:
|
|
101
|
-
angle = -np.mean(angles)
|
|
102
|
-
# Determine rotation direction (clockwise/counterclockwise)
|
|
103
|
-
# Angle coverage: [-90°, +90°], half of the quadrant
|
|
104
|
-
if np.sum(widths) < np.sum(heights): # CounterClockwise
|
|
105
|
-
angle = 90 + angle
|
|
106
|
-
|
|
107
|
-
return angle
|
|
80
|
+
median = -median_low(angles)
|
|
81
|
+
return round(median) if abs(median) != 0 else 0
|
|
108
82
|
|
|
109
83
|
|
|
110
84
|
def rectify_crops(
|
|
@@ -149,9 +123,13 @@ def rectify_loc_preds(
|
|
|
149
123
|
def get_language(text: str) -> Tuple[str, float]:
|
|
150
124
|
"""Get languages of a text using langdetect model.
|
|
151
125
|
Get the language with the highest probability or no language if only a few words or a low probability
|
|
126
|
+
|
|
152
127
|
Args:
|
|
128
|
+
----
|
|
153
129
|
text (str): text
|
|
130
|
+
|
|
154
131
|
Returns:
|
|
132
|
+
-------
|
|
155
133
|
The detected language in ISO 639 code and confidence score
|
|
156
134
|
"""
|
|
157
135
|
try:
|
|
@@ -164,21 +142,20 @@ def get_language(text: str) -> Tuple[str, float]:
|
|
|
164
142
|
|
|
165
143
|
|
|
166
144
|
def invert_data_structure(
|
|
167
|
-
x: Union[List[Dict[str, Any]], Dict[str, List[Any]]]
|
|
145
|
+
x: Union[List[Dict[str, Any]], Dict[str, List[Any]]],
|
|
168
146
|
) -> Union[List[Dict[str, Any]], Dict[str, List[Any]]]:
|
|
169
147
|
"""Invert a List of Dict of elements to a Dict of list of elements and the other way around
|
|
170
148
|
|
|
171
149
|
Args:
|
|
150
|
+
----
|
|
172
151
|
x: a list of dictionaries with the same keys or a dictionary of lists of the same length
|
|
173
152
|
|
|
174
153
|
Returns:
|
|
154
|
+
-------
|
|
175
155
|
dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
|
|
176
156
|
"""
|
|
177
|
-
|
|
178
157
|
if isinstance(x, dict):
|
|
179
|
-
assert (
|
|
180
|
-
len(set([len(v) for v in x.values()])) == 1
|
|
181
|
-
), "All the lists in the dictionnary should have the same length."
|
|
158
|
+
assert len({len(v) for v in x.values()}) == 1, "All the lists in the dictionnary should have the same length."
|
|
182
159
|
return [dict(zip(x, t)) for t in zip(*x.values())]
|
|
183
160
|
elif isinstance(x, list):
|
|
184
161
|
return {k: [dic[k] for dic in x] for k in x[0]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -12,11 +12,11 @@ __all__ = ["BarCodeDetector"]
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class BarCodeDetector:
|
|
15
|
-
|
|
16
15
|
"""Implements a Bar-code detector.
|
|
17
16
|
For now, only horizontal (or with a small angle) bar-codes are supported
|
|
18
17
|
|
|
19
18
|
Args:
|
|
19
|
+
----
|
|
20
20
|
min_size: minimum relative size of a barcode on the page
|
|
21
21
|
canny_minval: lower bound for canny hysteresis
|
|
22
22
|
canny_maxval: upper-bound for canny hysteresis
|
|
@@ -35,7 +35,8 @@ class BarCodeDetector:
|
|
|
35
35
|
Args:
|
|
36
36
|
img: np image
|
|
37
37
|
|
|
38
|
-
Returns
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
39
40
|
A list of tuples: [(xmin, ymin, xmax, ymax), ...] containing barcodes rel. coordinates
|
|
40
41
|
"""
|
|
41
42
|
# get image size and define parameters
|
doctr/models/artefacts/face.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -14,11 +14,11 @@ __all__ = ["FaceDetector"]
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class FaceDetector(NestedObject):
|
|
17
|
-
|
|
18
17
|
"""Implements a face detector to detect profile pictures on resumes, IDS, driving licenses, passports...
|
|
19
18
|
Based on open CV CascadeClassifier (haarcascades)
|
|
20
19
|
|
|
21
20
|
Args:
|
|
21
|
+
----
|
|
22
22
|
n_faces: maximal number of faces to detect on a single image, default = 1
|
|
23
23
|
"""
|
|
24
24
|
|
|
@@ -42,9 +42,11 @@ class FaceDetector(NestedObject):
|
|
|
42
42
|
"""Detect n_faces on the img
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
|
+
----
|
|
45
46
|
img: image to detect faces on
|
|
46
47
|
|
|
47
48
|
Returns:
|
|
49
|
+
-------
|
|
48
50
|
A list of size n_faces, each face is a tuple of relative xmin, ymin, xmax, ymax
|
|
49
51
|
"""
|
|
50
52
|
height, width = img.shape[:2]
|