python-doctr 0.12.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/__init__.py +0 -1
- doctr/contrib/artefacts.py +1 -1
- doctr/contrib/base.py +1 -1
- doctr/datasets/__init__.py +0 -5
- doctr/datasets/coco_text.py +1 -1
- doctr/datasets/cord.py +1 -1
- doctr/datasets/datasets/__init__.py +1 -6
- doctr/datasets/datasets/base.py +1 -1
- doctr/datasets/datasets/pytorch.py +3 -3
- doctr/datasets/detection.py +1 -1
- doctr/datasets/doc_artefacts.py +1 -1
- doctr/datasets/funsd.py +1 -1
- doctr/datasets/generator/__init__.py +1 -6
- doctr/datasets/generator/base.py +1 -1
- doctr/datasets/generator/pytorch.py +1 -1
- doctr/datasets/ic03.py +1 -1
- doctr/datasets/ic13.py +1 -1
- doctr/datasets/iiit5k.py +1 -1
- doctr/datasets/iiithws.py +1 -1
- doctr/datasets/imgur5k.py +1 -1
- doctr/datasets/mjsynth.py +1 -1
- doctr/datasets/ocr.py +1 -1
- doctr/datasets/orientation.py +1 -1
- doctr/datasets/recognition.py +1 -1
- doctr/datasets/sroie.py +1 -1
- doctr/datasets/svhn.py +1 -1
- doctr/datasets/svt.py +1 -1
- doctr/datasets/synthtext.py +1 -1
- doctr/datasets/utils.py +1 -1
- doctr/datasets/vocabs.py +1 -3
- doctr/datasets/wildreceipt.py +1 -1
- doctr/file_utils.py +3 -102
- doctr/io/elements.py +1 -1
- doctr/io/html.py +1 -1
- doctr/io/image/__init__.py +1 -7
- doctr/io/image/base.py +1 -1
- doctr/io/image/pytorch.py +2 -2
- doctr/io/pdf.py +1 -1
- doctr/io/reader.py +1 -1
- doctr/models/_utils.py +56 -18
- doctr/models/builder.py +1 -1
- doctr/models/classification/magc_resnet/__init__.py +1 -6
- doctr/models/classification/magc_resnet/pytorch.py +3 -3
- doctr/models/classification/mobilenet/__init__.py +1 -6
- doctr/models/classification/mobilenet/pytorch.py +1 -1
- doctr/models/classification/predictor/__init__.py +1 -6
- doctr/models/classification/predictor/pytorch.py +2 -2
- doctr/models/classification/resnet/__init__.py +1 -6
- doctr/models/classification/resnet/pytorch.py +1 -1
- doctr/models/classification/textnet/__init__.py +1 -6
- doctr/models/classification/textnet/pytorch.py +2 -2
- doctr/models/classification/vgg/__init__.py +1 -6
- doctr/models/classification/vgg/pytorch.py +1 -1
- doctr/models/classification/vip/__init__.py +1 -4
- doctr/models/classification/vip/layers/__init__.py +1 -4
- doctr/models/classification/vip/layers/pytorch.py +2 -2
- doctr/models/classification/vip/pytorch.py +1 -1
- doctr/models/classification/vit/__init__.py +1 -6
- doctr/models/classification/vit/pytorch.py +3 -3
- doctr/models/classification/zoo.py +7 -12
- doctr/models/core.py +1 -1
- doctr/models/detection/_utils/__init__.py +1 -6
- doctr/models/detection/_utils/base.py +1 -1
- doctr/models/detection/_utils/pytorch.py +1 -1
- doctr/models/detection/core.py +2 -2
- doctr/models/detection/differentiable_binarization/__init__.py +1 -6
- doctr/models/detection/differentiable_binarization/base.py +5 -13
- doctr/models/detection/differentiable_binarization/pytorch.py +4 -4
- doctr/models/detection/fast/__init__.py +1 -6
- doctr/models/detection/fast/base.py +5 -15
- doctr/models/detection/fast/pytorch.py +5 -5
- doctr/models/detection/linknet/__init__.py +1 -6
- doctr/models/detection/linknet/base.py +4 -13
- doctr/models/detection/linknet/pytorch.py +3 -3
- doctr/models/detection/predictor/__init__.py +1 -6
- doctr/models/detection/predictor/pytorch.py +2 -2
- doctr/models/detection/zoo.py +16 -33
- doctr/models/factory/hub.py +26 -34
- doctr/models/kie_predictor/__init__.py +1 -6
- doctr/models/kie_predictor/base.py +1 -1
- doctr/models/kie_predictor/pytorch.py +3 -7
- doctr/models/modules/layers/__init__.py +1 -6
- doctr/models/modules/layers/pytorch.py +4 -4
- doctr/models/modules/transformer/__init__.py +1 -6
- doctr/models/modules/transformer/pytorch.py +3 -3
- doctr/models/modules/vision_transformer/__init__.py +1 -6
- doctr/models/modules/vision_transformer/pytorch.py +1 -1
- doctr/models/predictor/__init__.py +1 -6
- doctr/models/predictor/base.py +4 -9
- doctr/models/predictor/pytorch.py +3 -6
- doctr/models/preprocessor/__init__.py +1 -6
- doctr/models/preprocessor/pytorch.py +28 -33
- doctr/models/recognition/core.py +1 -1
- doctr/models/recognition/crnn/__init__.py +1 -6
- doctr/models/recognition/crnn/pytorch.py +7 -7
- doctr/models/recognition/master/__init__.py +1 -6
- doctr/models/recognition/master/base.py +1 -1
- doctr/models/recognition/master/pytorch.py +6 -6
- doctr/models/recognition/parseq/__init__.py +1 -6
- doctr/models/recognition/parseq/base.py +1 -1
- doctr/models/recognition/parseq/pytorch.py +6 -6
- doctr/models/recognition/predictor/__init__.py +1 -6
- doctr/models/recognition/predictor/_utils.py +8 -17
- doctr/models/recognition/predictor/pytorch.py +2 -3
- doctr/models/recognition/sar/__init__.py +1 -6
- doctr/models/recognition/sar/pytorch.py +4 -4
- doctr/models/recognition/utils.py +1 -1
- doctr/models/recognition/viptr/__init__.py +1 -4
- doctr/models/recognition/viptr/pytorch.py +4 -4
- doctr/models/recognition/vitstr/__init__.py +1 -6
- doctr/models/recognition/vitstr/base.py +1 -1
- doctr/models/recognition/vitstr/pytorch.py +4 -4
- doctr/models/recognition/zoo.py +14 -14
- doctr/models/utils/__init__.py +1 -6
- doctr/models/utils/pytorch.py +3 -2
- doctr/models/zoo.py +1 -1
- doctr/transforms/functional/__init__.py +1 -6
- doctr/transforms/functional/base.py +3 -2
- doctr/transforms/functional/pytorch.py +5 -5
- doctr/transforms/modules/__init__.py +1 -7
- doctr/transforms/modules/base.py +28 -94
- doctr/transforms/modules/pytorch.py +29 -27
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +1 -2
- doctr/utils/fonts.py +1 -1
- doctr/utils/geometry.py +7 -11
- doctr/utils/metrics.py +1 -1
- doctr/utils/multithreading.py +1 -1
- doctr/utils/reconstitution.py +1 -1
- doctr/utils/repr.py +1 -1
- doctr/utils/visualization.py +2 -2
- doctr/version.py +1 -1
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/METADATA +30 -80
- python_doctr-1.0.1.dist-info/RECORD +149 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/WHEEL +1 -1
- doctr/datasets/datasets/tensorflow.py +0 -59
- doctr/datasets/generator/tensorflow.py +0 -58
- doctr/datasets/loader.py +0 -94
- doctr/io/image/tensorflow.py +0 -101
- doctr/models/classification/magc_resnet/tensorflow.py +0 -196
- doctr/models/classification/mobilenet/tensorflow.py +0 -442
- doctr/models/classification/predictor/tensorflow.py +0 -60
- doctr/models/classification/resnet/tensorflow.py +0 -418
- doctr/models/classification/textnet/tensorflow.py +0 -275
- doctr/models/classification/vgg/tensorflow.py +0 -125
- doctr/models/classification/vit/tensorflow.py +0 -201
- doctr/models/detection/_utils/tensorflow.py +0 -34
- doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
- doctr/models/detection/fast/tensorflow.py +0 -427
- doctr/models/detection/linknet/tensorflow.py +0 -377
- doctr/models/detection/predictor/tensorflow.py +0 -70
- doctr/models/kie_predictor/tensorflow.py +0 -187
- doctr/models/modules/layers/tensorflow.py +0 -171
- doctr/models/modules/transformer/tensorflow.py +0 -235
- doctr/models/modules/vision_transformer/tensorflow.py +0 -100
- doctr/models/predictor/tensorflow.py +0 -155
- doctr/models/preprocessor/tensorflow.py +0 -122
- doctr/models/recognition/crnn/tensorflow.py +0 -317
- doctr/models/recognition/master/tensorflow.py +0 -320
- doctr/models/recognition/parseq/tensorflow.py +0 -516
- doctr/models/recognition/predictor/tensorflow.py +0 -79
- doctr/models/recognition/sar/tensorflow.py +0 -423
- doctr/models/recognition/vitstr/tensorflow.py +0 -285
- doctr/models/utils/tensorflow.py +0 -189
- doctr/transforms/functional/tensorflow.py +0 -254
- doctr/transforms/modules/tensorflow.py +0 -562
- python_doctr-0.12.0.dist-info/RECORD +0 -180
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/zip-safe +0 -0
doctr/__init__.py
CHANGED
doctr/contrib/artefacts.py
CHANGED
doctr/contrib/base.py
CHANGED
doctr/datasets/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from doctr.file_utils import is_tf_available
|
|
2
|
-
|
|
3
1
|
from .generator import *
|
|
4
2
|
from .coco_text import *
|
|
5
3
|
from .cord import *
|
|
@@ -22,6 +20,3 @@ from .synthtext import *
|
|
|
22
20
|
from .utils import *
|
|
23
21
|
from .vocabs import *
|
|
24
22
|
from .wildreceipt import *
|
|
25
|
-
|
|
26
|
-
if is_tf_available():
|
|
27
|
-
from .loader import *
|
doctr/datasets/coco_text.py
CHANGED
doctr/datasets/cord.py
CHANGED
doctr/datasets/datasets/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
|
|
|
50
50
|
@staticmethod
|
|
51
51
|
def collate_fn(samples: list[tuple[torch.Tensor, Any]]) -> tuple[torch.Tensor, list[Any]]:
|
|
52
52
|
images, targets = zip(*samples)
|
|
53
|
-
images = torch.stack(images, dim=0)
|
|
53
|
+
images = torch.stack(images, dim=0) # type: ignore[assignment]
|
|
54
54
|
|
|
55
|
-
return images, list(targets)
|
|
55
|
+
return images, list(targets) # type: ignore[return-value]
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
|
doctr/datasets/detection.py
CHANGED
doctr/datasets/doc_artefacts.py
CHANGED
doctr/datasets/funsd.py
CHANGED
doctr/datasets/generator/base.py
CHANGED
doctr/datasets/ic03.py
CHANGED
doctr/datasets/ic13.py
CHANGED
doctr/datasets/iiit5k.py
CHANGED
doctr/datasets/iiithws.py
CHANGED
doctr/datasets/imgur5k.py
CHANGED
doctr/datasets/mjsynth.py
CHANGED
doctr/datasets/ocr.py
CHANGED
doctr/datasets/orientation.py
CHANGED
doctr/datasets/recognition.py
CHANGED
doctr/datasets/sroie.py
CHANGED
doctr/datasets/svhn.py
CHANGED
doctr/datasets/svt.py
CHANGED
doctr/datasets/synthtext.py
CHANGED
doctr/datasets/utils.py
CHANGED
doctr/datasets/vocabs.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -264,8 +264,6 @@ VOCABS["estonian"] = VOCABS["english"] + "šžõäöüŠŽÕÄÖÜ"
|
|
|
264
264
|
VOCABS["esperanto"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "ĉĝĥĵŝŭĈĜĤĴŜŬ" + "₷"
|
|
265
265
|
|
|
266
266
|
VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
|
|
267
|
-
# NOTE: legacy french is outdated, but kept for compatibility
|
|
268
|
-
VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂÉÈËÎÏÔÙÛÇ" + _BASE_VOCABS["currency"]
|
|
269
267
|
|
|
270
268
|
VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
|
|
271
269
|
|
doctr/datasets/wildreceipt.py
CHANGED
doctr/file_utils.py
CHANGED
|
@@ -1,104 +1,15 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
-
# Adapted from https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
|
|
7
|
-
|
|
8
6
|
import importlib.metadata
|
|
9
|
-
import importlib.util
|
|
10
7
|
import logging
|
|
11
|
-
import os
|
|
12
|
-
|
|
13
|
-
CLASS_NAME: str = "words"
|
|
14
|
-
|
|
15
8
|
|
|
16
|
-
__all__ = ["
|
|
9
|
+
__all__ = ["requires_package", "CLASS_NAME"]
|
|
17
10
|
|
|
11
|
+
CLASS_NAME: str = "words"
|
|
18
12
|
ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
|
|
19
|
-
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
|
|
20
|
-
|
|
21
|
-
USE_TF = os.environ.get("USE_TF", "AUTO").upper()
|
|
22
|
-
USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper()
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
|
|
26
|
-
_torch_available = importlib.util.find_spec("torch") is not None
|
|
27
|
-
if _torch_available:
|
|
28
|
-
try:
|
|
29
|
-
_torch_version = importlib.metadata.version("torch")
|
|
30
|
-
logging.info(f"PyTorch version {_torch_version} available.")
|
|
31
|
-
except importlib.metadata.PackageNotFoundError: # pragma: no cover
|
|
32
|
-
_torch_available = False
|
|
33
|
-
else: # pragma: no cover
|
|
34
|
-
logging.info("Disabling PyTorch because USE_TF is set")
|
|
35
|
-
_torch_available = False
|
|
36
|
-
|
|
37
|
-
# Compatibility fix to make sure tensorflow.keras stays at Keras 2
|
|
38
|
-
if "TF_USE_LEGACY_KERAS" not in os.environ:
|
|
39
|
-
os.environ["TF_USE_LEGACY_KERAS"] = "1"
|
|
40
|
-
|
|
41
|
-
elif os.environ["TF_USE_LEGACY_KERAS"] != "1":
|
|
42
|
-
raise ValueError(
|
|
43
|
-
"docTR is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. "
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def ensure_keras_v2() -> None: # pragma: no cover
|
|
48
|
-
if not os.environ.get("TF_USE_LEGACY_KERAS") == "1":
|
|
49
|
-
os.environ["TF_USE_LEGACY_KERAS"] = "1"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
|
|
53
|
-
_tf_available = importlib.util.find_spec("tensorflow") is not None
|
|
54
|
-
if _tf_available:
|
|
55
|
-
candidates = (
|
|
56
|
-
"tensorflow",
|
|
57
|
-
"tensorflow-cpu",
|
|
58
|
-
"tensorflow-gpu",
|
|
59
|
-
"tf-nightly",
|
|
60
|
-
"tf-nightly-cpu",
|
|
61
|
-
"tf-nightly-gpu",
|
|
62
|
-
"intel-tensorflow",
|
|
63
|
-
"tensorflow-rocm",
|
|
64
|
-
"tensorflow-macos",
|
|
65
|
-
)
|
|
66
|
-
_tf_version = None
|
|
67
|
-
# For the metadata, we have to look for both tensorflow and tensorflow-cpu
|
|
68
|
-
for pkg in candidates:
|
|
69
|
-
try:
|
|
70
|
-
_tf_version = importlib.metadata.version(pkg)
|
|
71
|
-
break
|
|
72
|
-
except importlib.metadata.PackageNotFoundError:
|
|
73
|
-
pass
|
|
74
|
-
_tf_available = _tf_version is not None
|
|
75
|
-
if _tf_available:
|
|
76
|
-
if int(_tf_version.split(".")[0]) < 2: # type: ignore[union-attr] # pragma: no cover
|
|
77
|
-
logging.info(f"TensorFlow found but with version {_tf_version}. DocTR requires version 2 minimum.")
|
|
78
|
-
_tf_available = False
|
|
79
|
-
else:
|
|
80
|
-
logging.info(f"TensorFlow version {_tf_version} available.")
|
|
81
|
-
ensure_keras_v2()
|
|
82
|
-
|
|
83
|
-
import warnings
|
|
84
|
-
|
|
85
|
-
warnings.simplefilter("always", DeprecationWarning)
|
|
86
|
-
warnings.warn(
|
|
87
|
-
"Support for TensorFlow in DocTR is deprecated and will be removed in the next major release (v1.0.0). "
|
|
88
|
-
"Please switch to the PyTorch backend.",
|
|
89
|
-
DeprecationWarning,
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
else: # pragma: no cover
|
|
93
|
-
logging.info("Disabling Tensorflow because USE_TORCH is set")
|
|
94
|
-
_tf_available = False
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if not _torch_available and not _tf_available: # pragma: no cover
|
|
98
|
-
raise ModuleNotFoundError(
|
|
99
|
-
"DocTR requires either TensorFlow or PyTorch to be installed. Please ensure one of them"
|
|
100
|
-
" is installed and that either USE_TF or USE_TORCH is enabled."
|
|
101
|
-
)
|
|
102
13
|
|
|
103
14
|
|
|
104
15
|
def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
|
|
@@ -117,13 +28,3 @@ def requires_package(name: str, extra_message: str | None = None) -> None: # pr
|
|
|
117
28
|
f"\n\n{extra_message if extra_message is not None else ''} "
|
|
118
29
|
f"\nPlease install it with the following command: pip install {name}\n"
|
|
119
30
|
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def is_torch_available():
|
|
123
|
-
"""Whether PyTorch is installed."""
|
|
124
|
-
return _torch_available
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def is_tf_available():
|
|
128
|
-
"""Whether TensorFlow is installed."""
|
|
129
|
-
return _tf_available
|
doctr/io/elements.py
CHANGED
doctr/io/html.py
CHANGED
doctr/io/image/__init__.py
CHANGED
doctr/io/image/base.py
CHANGED
doctr/io/image/pytorch.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -95,4 +95,4 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
|
|
|
95
95
|
|
|
96
96
|
def get_img_shape(img: torch.Tensor) -> tuple[int, int]:
|
|
97
97
|
"""Get the shape of an image"""
|
|
98
|
-
return img.shape[-2:]
|
|
98
|
+
return img.shape[-2:] # type: ignore[return-value]
|
doctr/io/pdf.py
CHANGED
doctr/io/reader.py
CHANGED
doctr/models/_utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -26,6 +26,8 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
|
|
|
26
26
|
the maximum shape ratio
|
|
27
27
|
"""
|
|
28
28
|
_, (w, h), _ = cv2.minAreaRect(contour)
|
|
29
|
+
if w == 0 or h == 0:
|
|
30
|
+
return 0.0
|
|
29
31
|
return max(w / h, h / w)
|
|
30
32
|
|
|
31
33
|
|
|
@@ -53,7 +55,7 @@ def estimate_orientation(
|
|
|
53
55
|
the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
|
|
54
56
|
"""
|
|
55
57
|
assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
|
|
56
|
-
|
|
58
|
+
|
|
57
59
|
# Convert image to grayscale if necessary
|
|
58
60
|
if img.shape[-1] == 3:
|
|
59
61
|
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
@@ -62,11 +64,14 @@ def estimate_orientation(
|
|
|
62
64
|
else:
|
|
63
65
|
thresh = img.astype(np.uint8)
|
|
64
66
|
|
|
65
|
-
page_orientation, orientation_confidence = general_page_orientation or (
|
|
66
|
-
|
|
67
|
+
page_orientation, orientation_confidence = general_page_orientation or (0, 0.0)
|
|
68
|
+
is_confident = page_orientation is not None and orientation_confidence >= min_confidence
|
|
69
|
+
base_angle = page_orientation if is_confident else 0
|
|
70
|
+
|
|
71
|
+
if is_confident:
|
|
67
72
|
# We rotate the image to the general orientation which improves the detection
|
|
68
73
|
# No expand needed bitmap is already padded
|
|
69
|
-
thresh = rotate_image(thresh, -
|
|
74
|
+
thresh = rotate_image(thresh, -base_angle)
|
|
70
75
|
else: # That's only required if we do not work on the detection models bin map
|
|
71
76
|
# try to merge words in lines
|
|
72
77
|
(h, w) = img.shape[:2]
|
|
@@ -88,30 +93,63 @@ def estimate_orientation(
|
|
|
88
93
|
angles = []
|
|
89
94
|
for contour in contours[:n_ct]:
|
|
90
95
|
_, (w, h), angle = cv2.minAreaRect(contour)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
96
|
+
|
|
97
|
+
# OpenCV version-proof normalization: force 'w' to be the long side
|
|
98
|
+
# so the angle is consistently relative to the major axis.
|
|
99
|
+
# https://github.com/opencv/opencv/pull/28051/changes
|
|
100
|
+
if w < h:
|
|
101
|
+
w, h = h, w
|
|
102
|
+
angle -= 90
|
|
103
|
+
|
|
104
|
+
# Normalize angle to be within [-90, 90]
|
|
105
|
+
while angle <= -90:
|
|
106
|
+
angle += 180
|
|
107
|
+
while angle > 90:
|
|
108
|
+
angle -= 180
|
|
109
|
+
|
|
110
|
+
if h > 0:
|
|
111
|
+
if w / h > ratio_threshold_for_lines: # select only contours with ratio like lines
|
|
112
|
+
angles.append(angle)
|
|
113
|
+
elif w / h < 1 / ratio_threshold_for_lines: # if lines are vertical, substract 90 degree
|
|
114
|
+
angles.append(angle - 90)
|
|
95
115
|
|
|
96
116
|
if len(angles) == 0:
|
|
97
|
-
|
|
117
|
+
skew_angle = 0 # in case no angles is found
|
|
98
118
|
else:
|
|
119
|
+
# median_low picks a value from the data to avoid outliers
|
|
99
120
|
median = -median_low(angles)
|
|
100
|
-
|
|
121
|
+
skew_angle = -round(median) if abs(median) != 0 else 0
|
|
122
|
+
|
|
123
|
+
# Resolve the 90-degree flip ambiguity.
|
|
124
|
+
# If the estimation is exactly 90/-90, it's usually a vertical detection of horizontal lines.
|
|
125
|
+
if abs(skew_angle) == 90:
|
|
126
|
+
skew_angle = 0
|
|
101
127
|
|
|
102
128
|
# combine with the general orientation and the estimated angle
|
|
103
|
-
|
|
129
|
+
# Apply the detected skew to our base orientation
|
|
130
|
+
final_angle = base_angle + skew_angle
|
|
131
|
+
|
|
132
|
+
# Standardize result to [-179, 180] range to handle wrap-around cases (e.g., 180 + -31)
|
|
133
|
+
while final_angle > 180:
|
|
134
|
+
final_angle -= 360
|
|
135
|
+
while final_angle <= -180:
|
|
136
|
+
final_angle += 360
|
|
137
|
+
|
|
138
|
+
if is_confident:
|
|
139
|
+
# If the estimated angle is perpendicular, treat it as 0 to avoid wrong flips
|
|
140
|
+
if abs(skew_angle) % 90 == 0:
|
|
141
|
+
return page_orientation
|
|
142
|
+
|
|
104
143
|
# special case where the estimated angle is mostly wrong:
|
|
105
144
|
# case 1: - and + swapped
|
|
106
145
|
# case 2: estimated angle is completely wrong
|
|
107
146
|
# so in this case we prefer the general page orientation
|
|
108
|
-
if abs(
|
|
147
|
+
if abs(skew_angle) == abs(page_orientation) and page_orientation != 0:
|
|
109
148
|
return page_orientation
|
|
110
|
-
estimated_angle = estimated_angle if page_orientation == 0 else page_orientation + estimated_angle
|
|
111
|
-
if estimated_angle > 180:
|
|
112
|
-
estimated_angle -= 360
|
|
113
149
|
|
|
114
|
-
return
|
|
150
|
+
return int(
|
|
151
|
+
final_angle
|
|
152
|
+
) # return the clockwise angle (negative - left side rotation, positive - right side rotation)
|
|
115
153
|
|
|
116
154
|
|
|
117
155
|
def rectify_crops(
|
|
@@ -184,7 +222,7 @@ def invert_data_structure(
|
|
|
184
222
|
dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
|
|
185
223
|
"""
|
|
186
224
|
if isinstance(x, dict):
|
|
187
|
-
assert len({len(v) for v in x.values()}) == 1, "All the lists in the
|
|
225
|
+
assert len({len(v) for v in x.values()}) == 1, "All the lists in the dictionary should have the same length."
|
|
188
226
|
return [dict(zip(x, t)) for t in zip(*x.values())]
|
|
189
227
|
elif isinstance(x, list):
|
|
190
228
|
return {k: [dic[k] for dic in x] for k in x[0]}
|
doctr/models/builder.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -14,7 +14,7 @@ from torch import nn
|
|
|
14
14
|
|
|
15
15
|
from doctr.datasets import VOCABS
|
|
16
16
|
|
|
17
|
-
from ..resnet
|
|
17
|
+
from ..resnet import ResNet
|
|
18
18
|
|
|
19
19
|
__all__ = ["magc_resnet31"]
|
|
20
20
|
|
|
@@ -72,7 +72,7 @@ class MAGC(nn.Module):
|
|
|
72
72
|
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
|
|
73
73
|
batch, _, height, width = inputs.size()
|
|
74
74
|
# (N * headers, C / headers, H , W)
|
|
75
|
-
x = inputs.view(batch * self.headers, self.single_header_inplanes, height, width)
|
|
75
|
+
x = inputs.contiguous().view(batch * self.headers, self.single_header_inplanes, height, width)
|
|
76
76
|
shortcut = x
|
|
77
77
|
# (N * headers, C / headers, H * W)
|
|
78
78
|
shortcut = shortcut.view(batch * self.headers, self.single_header_inplanes, height * width)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -35,7 +35,7 @@ class OrientationPredictor(nn.Module):
|
|
|
35
35
|
@torch.inference_mode()
|
|
36
36
|
def forward(
|
|
37
37
|
self,
|
|
38
|
-
inputs: list[np.ndarray
|
|
38
|
+
inputs: list[np.ndarray],
|
|
39
39
|
) -> list[list[int] | list[float]]:
|
|
40
40
|
# Dimension check
|
|
41
41
|
if any(input.ndim != 3 for input in inputs):
|