python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/contrib/__init__.py +1 -0
- doctr/contrib/artefacts.py +7 -9
- doctr/contrib/base.py +8 -17
- doctr/datasets/__init__.py +1 -0
- doctr/datasets/coco_text.py +139 -0
- doctr/datasets/cord.py +10 -8
- doctr/datasets/datasets/__init__.py +4 -4
- doctr/datasets/datasets/base.py +16 -16
- doctr/datasets/datasets/pytorch.py +12 -12
- doctr/datasets/datasets/tensorflow.py +10 -10
- doctr/datasets/detection.py +6 -9
- doctr/datasets/doc_artefacts.py +3 -4
- doctr/datasets/funsd.py +9 -8
- doctr/datasets/generator/__init__.py +4 -4
- doctr/datasets/generator/base.py +16 -17
- doctr/datasets/generator/pytorch.py +1 -3
- doctr/datasets/generator/tensorflow.py +1 -3
- doctr/datasets/ic03.py +5 -6
- doctr/datasets/ic13.py +6 -6
- doctr/datasets/iiit5k.py +10 -6
- doctr/datasets/iiithws.py +4 -5
- doctr/datasets/imgur5k.py +15 -7
- doctr/datasets/loader.py +4 -7
- doctr/datasets/mjsynth.py +6 -5
- doctr/datasets/ocr.py +3 -4
- doctr/datasets/orientation.py +3 -4
- doctr/datasets/recognition.py +4 -5
- doctr/datasets/sroie.py +6 -5
- doctr/datasets/svhn.py +7 -6
- doctr/datasets/svt.py +6 -7
- doctr/datasets/synthtext.py +19 -7
- doctr/datasets/utils.py +41 -35
- doctr/datasets/vocabs.py +1107 -49
- doctr/datasets/wildreceipt.py +14 -10
- doctr/file_utils.py +11 -7
- doctr/io/elements.py +96 -82
- doctr/io/html.py +1 -3
- doctr/io/image/__init__.py +3 -3
- doctr/io/image/base.py +2 -5
- doctr/io/image/pytorch.py +3 -12
- doctr/io/image/tensorflow.py +2 -11
- doctr/io/pdf.py +5 -7
- doctr/io/reader.py +5 -11
- doctr/models/_utils.py +15 -23
- doctr/models/builder.py +30 -48
- doctr/models/classification/__init__.py +1 -0
- doctr/models/classification/magc_resnet/__init__.py +3 -3
- doctr/models/classification/magc_resnet/pytorch.py +11 -15
- doctr/models/classification/magc_resnet/tensorflow.py +11 -14
- doctr/models/classification/mobilenet/__init__.py +3 -3
- doctr/models/classification/mobilenet/pytorch.py +20 -18
- doctr/models/classification/mobilenet/tensorflow.py +19 -23
- doctr/models/classification/predictor/__init__.py +4 -4
- doctr/models/classification/predictor/pytorch.py +7 -9
- doctr/models/classification/predictor/tensorflow.py +6 -8
- doctr/models/classification/resnet/__init__.py +4 -4
- doctr/models/classification/resnet/pytorch.py +47 -34
- doctr/models/classification/resnet/tensorflow.py +45 -35
- doctr/models/classification/textnet/__init__.py +3 -3
- doctr/models/classification/textnet/pytorch.py +20 -18
- doctr/models/classification/textnet/tensorflow.py +19 -17
- doctr/models/classification/vgg/__init__.py +3 -3
- doctr/models/classification/vgg/pytorch.py +21 -8
- doctr/models/classification/vgg/tensorflow.py +20 -14
- doctr/models/classification/vip/__init__.py +4 -0
- doctr/models/classification/vip/layers/__init__.py +4 -0
- doctr/models/classification/vip/layers/pytorch.py +615 -0
- doctr/models/classification/vip/pytorch.py +505 -0
- doctr/models/classification/vit/__init__.py +3 -3
- doctr/models/classification/vit/pytorch.py +18 -15
- doctr/models/classification/vit/tensorflow.py +15 -12
- doctr/models/classification/zoo.py +23 -14
- doctr/models/core.py +3 -3
- doctr/models/detection/_utils/__init__.py +4 -4
- doctr/models/detection/_utils/base.py +4 -7
- doctr/models/detection/_utils/pytorch.py +1 -5
- doctr/models/detection/_utils/tensorflow.py +1 -5
- doctr/models/detection/core.py +2 -8
- doctr/models/detection/differentiable_binarization/__init__.py +4 -4
- doctr/models/detection/differentiable_binarization/base.py +10 -21
- doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
- doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
- doctr/models/detection/fast/__init__.py +4 -4
- doctr/models/detection/fast/base.py +8 -17
- doctr/models/detection/fast/pytorch.py +37 -35
- doctr/models/detection/fast/tensorflow.py +24 -28
- doctr/models/detection/linknet/__init__.py +4 -4
- doctr/models/detection/linknet/base.py +8 -18
- doctr/models/detection/linknet/pytorch.py +34 -28
- doctr/models/detection/linknet/tensorflow.py +24 -25
- doctr/models/detection/predictor/__init__.py +5 -5
- doctr/models/detection/predictor/pytorch.py +6 -7
- doctr/models/detection/predictor/tensorflow.py +5 -6
- doctr/models/detection/zoo.py +27 -7
- doctr/models/factory/hub.py +6 -10
- doctr/models/kie_predictor/__init__.py +5 -5
- doctr/models/kie_predictor/base.py +4 -5
- doctr/models/kie_predictor/pytorch.py +19 -20
- doctr/models/kie_predictor/tensorflow.py +14 -15
- doctr/models/modules/layers/__init__.py +3 -3
- doctr/models/modules/layers/pytorch.py +55 -10
- doctr/models/modules/layers/tensorflow.py +5 -7
- doctr/models/modules/transformer/__init__.py +3 -3
- doctr/models/modules/transformer/pytorch.py +12 -13
- doctr/models/modules/transformer/tensorflow.py +9 -10
- doctr/models/modules/vision_transformer/__init__.py +3 -3
- doctr/models/modules/vision_transformer/pytorch.py +2 -3
- doctr/models/modules/vision_transformer/tensorflow.py +3 -3
- doctr/models/predictor/__init__.py +5 -5
- doctr/models/predictor/base.py +28 -29
- doctr/models/predictor/pytorch.py +13 -14
- doctr/models/predictor/tensorflow.py +9 -10
- doctr/models/preprocessor/__init__.py +4 -4
- doctr/models/preprocessor/pytorch.py +13 -17
- doctr/models/preprocessor/tensorflow.py +10 -14
- doctr/models/recognition/__init__.py +1 -0
- doctr/models/recognition/core.py +3 -7
- doctr/models/recognition/crnn/__init__.py +4 -4
- doctr/models/recognition/crnn/pytorch.py +30 -29
- doctr/models/recognition/crnn/tensorflow.py +21 -24
- doctr/models/recognition/master/__init__.py +3 -3
- doctr/models/recognition/master/base.py +3 -7
- doctr/models/recognition/master/pytorch.py +32 -25
- doctr/models/recognition/master/tensorflow.py +22 -25
- doctr/models/recognition/parseq/__init__.py +3 -3
- doctr/models/recognition/parseq/base.py +3 -7
- doctr/models/recognition/parseq/pytorch.py +47 -29
- doctr/models/recognition/parseq/tensorflow.py +29 -27
- doctr/models/recognition/predictor/__init__.py +5 -5
- doctr/models/recognition/predictor/_utils.py +111 -52
- doctr/models/recognition/predictor/pytorch.py +9 -9
- doctr/models/recognition/predictor/tensorflow.py +8 -9
- doctr/models/recognition/sar/__init__.py +4 -4
- doctr/models/recognition/sar/pytorch.py +30 -22
- doctr/models/recognition/sar/tensorflow.py +22 -24
- doctr/models/recognition/utils.py +57 -53
- doctr/models/recognition/viptr/__init__.py +4 -0
- doctr/models/recognition/viptr/pytorch.py +277 -0
- doctr/models/recognition/vitstr/__init__.py +4 -4
- doctr/models/recognition/vitstr/base.py +3 -7
- doctr/models/recognition/vitstr/pytorch.py +28 -21
- doctr/models/recognition/vitstr/tensorflow.py +22 -23
- doctr/models/recognition/zoo.py +27 -11
- doctr/models/utils/__init__.py +4 -4
- doctr/models/utils/pytorch.py +41 -34
- doctr/models/utils/tensorflow.py +31 -23
- doctr/models/zoo.py +1 -5
- doctr/transforms/functional/__init__.py +3 -3
- doctr/transforms/functional/base.py +4 -11
- doctr/transforms/functional/pytorch.py +20 -28
- doctr/transforms/functional/tensorflow.py +10 -22
- doctr/transforms/modules/__init__.py +4 -4
- doctr/transforms/modules/base.py +48 -55
- doctr/transforms/modules/pytorch.py +58 -22
- doctr/transforms/modules/tensorflow.py +18 -32
- doctr/utils/common_types.py +8 -9
- doctr/utils/data.py +9 -13
- doctr/utils/fonts.py +2 -7
- doctr/utils/geometry.py +17 -48
- doctr/utils/metrics.py +17 -37
- doctr/utils/multithreading.py +4 -6
- doctr/utils/reconstitution.py +9 -13
- doctr/utils/repr.py +2 -3
- doctr/utils/visualization.py +16 -29
- doctr/version.py +1 -1
- {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
- python_doctr-0.12.0.dist-info/RECORD +180 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
- python_doctr-0.10.0.dist-info/RECORD +0 -173
- {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
|
+
import types
|
|
6
7
|
from copy import deepcopy
|
|
7
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
from torch import nn
|
|
10
11
|
from torchvision.models import vgg as tv_vgg
|
|
@@ -16,7 +17,7 @@ from ...utils import load_pretrained_params
|
|
|
16
17
|
__all__ = ["vgg16_bn_r"]
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
default_cfgs:
|
|
20
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
20
21
|
"vgg16_bn_r": {
|
|
21
22
|
"mean": (0.694, 0.695, 0.693),
|
|
22
23
|
"std": (0.299, 0.296, 0.301),
|
|
@@ -32,7 +33,7 @@ def _vgg(
|
|
|
32
33
|
pretrained: bool,
|
|
33
34
|
tv_arch: str,
|
|
34
35
|
num_rect_pools: int = 3,
|
|
35
|
-
ignore_keys:
|
|
36
|
+
ignore_keys: list[str] | None = None,
|
|
36
37
|
**kwargs: Any,
|
|
37
38
|
) -> tv_vgg.VGG:
|
|
38
39
|
kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
|
|
@@ -45,7 +46,7 @@ def _vgg(
|
|
|
45
46
|
|
|
46
47
|
# Build the model
|
|
47
48
|
model = tv_vgg.__dict__[tv_arch](**kwargs, weights=None)
|
|
48
|
-
#
|
|
49
|
+
# list the MaxPool2d
|
|
49
50
|
pool_idcs = [idx for idx, m in enumerate(model.features) if isinstance(m, nn.MaxPool2d)]
|
|
50
51
|
# Replace their kernel with rectangular ones
|
|
51
52
|
for idx in pool_idcs[-num_rect_pools:]:
|
|
@@ -53,12 +54,26 @@ def _vgg(
|
|
|
53
54
|
# Patch average pool & classification head
|
|
54
55
|
model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
|
55
56
|
model.classifier = nn.Linear(512, kwargs["num_classes"])
|
|
57
|
+
|
|
58
|
+
# monkeypatch the model to allow for loading pretrained parameters
|
|
59
|
+
def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None: # noqa: D417
|
|
60
|
+
"""Load pretrained parameters onto the model
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
path_or_url: the path or URL to the model parameters (checkpoint)
|
|
64
|
+
**kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
|
|
65
|
+
"""
|
|
66
|
+
load_pretrained_params(self, path_or_url, **kwargs)
|
|
67
|
+
|
|
68
|
+
# Bind method to the instance
|
|
69
|
+
model.from_pretrained = types.MethodType(from_pretrained, model)
|
|
70
|
+
|
|
56
71
|
# Load pretrained parameters
|
|
57
72
|
if pretrained:
|
|
58
73
|
# The number of classes is not the same as the number of classes in the pretrained model =>
|
|
59
74
|
# remove the last layer weights
|
|
60
75
|
_ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
|
|
61
|
-
|
|
76
|
+
model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
|
|
62
77
|
|
|
63
78
|
model.cfg = _cfg
|
|
64
79
|
|
|
@@ -77,12 +92,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG:
|
|
|
77
92
|
>>> out = model(input_tensor)
|
|
78
93
|
|
|
79
94
|
Args:
|
|
80
|
-
----
|
|
81
95
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
|
82
96
|
**kwargs: keyword arguments of the VGG architecture
|
|
83
97
|
|
|
84
98
|
Returns:
|
|
85
|
-
-------
|
|
86
99
|
VGG feature extractor
|
|
87
100
|
"""
|
|
88
101
|
return _vgg(
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
from copy import deepcopy
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
from tensorflow.keras import layers
|
|
10
10
|
from tensorflow.keras.models import Sequential
|
|
@@ -16,7 +16,7 @@ from ...utils import _build_model, conv_sequence, load_pretrained_params
|
|
|
16
16
|
__all__ = ["VGG", "vgg16_bn_r"]
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
default_cfgs:
|
|
19
|
+
default_cfgs: dict[str, dict[str, Any]] = {
|
|
20
20
|
"vgg16_bn_r": {
|
|
21
21
|
"mean": (0.5, 0.5, 0.5),
|
|
22
22
|
"std": (1.0, 1.0, 1.0),
|
|
@@ -32,7 +32,6 @@ class VGG(Sequential):
|
|
|
32
32
|
<https://arxiv.org/pdf/1409.1556.pdf>`_.
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
|
-
----
|
|
36
35
|
num_blocks: number of convolutional block in each stage
|
|
37
36
|
planes: number of output channels in each stage
|
|
38
37
|
rect_pools: whether pooling square kernels should be replace with rectangular ones
|
|
@@ -43,13 +42,13 @@ class VGG(Sequential):
|
|
|
43
42
|
|
|
44
43
|
def __init__(
|
|
45
44
|
self,
|
|
46
|
-
num_blocks:
|
|
47
|
-
planes:
|
|
48
|
-
rect_pools:
|
|
45
|
+
num_blocks: list[int],
|
|
46
|
+
planes: list[int],
|
|
47
|
+
rect_pools: list[bool],
|
|
49
48
|
include_top: bool = False,
|
|
50
49
|
num_classes: int = 1000,
|
|
51
|
-
input_shape:
|
|
52
|
-
cfg:
|
|
50
|
+
input_shape: tuple[int, int, int] | None = None,
|
|
51
|
+
cfg: dict[str, Any] | None = None,
|
|
53
52
|
) -> None:
|
|
54
53
|
_layers = []
|
|
55
54
|
# Specify input_shape only for the first layer
|
|
@@ -65,9 +64,18 @@ class VGG(Sequential):
|
|
|
65
64
|
super().__init__(_layers)
|
|
66
65
|
self.cfg = cfg
|
|
67
66
|
|
|
67
|
+
def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
|
|
68
|
+
"""Load pretrained parameters onto the model
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
path_or_url: the path or URL to the model parameters (checkpoint)
|
|
72
|
+
**kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
|
|
73
|
+
"""
|
|
74
|
+
load_pretrained_params(self, path_or_url, **kwargs)
|
|
75
|
+
|
|
68
76
|
|
|
69
77
|
def _vgg(
|
|
70
|
-
arch: str, pretrained: bool, num_blocks:
|
|
78
|
+
arch: str, pretrained: bool, num_blocks: list[int], planes: list[int], rect_pools: list[bool], **kwargs: Any
|
|
71
79
|
) -> VGG:
|
|
72
80
|
kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
|
|
73
81
|
kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
|
|
@@ -87,8 +95,8 @@ def _vgg(
|
|
|
87
95
|
if pretrained:
|
|
88
96
|
# The number of classes is not the same as the number of classes in the pretrained model =>
|
|
89
97
|
# skip the mismatching layers for fine tuning
|
|
90
|
-
|
|
91
|
-
|
|
98
|
+
model.from_pretrained(
|
|
99
|
+
default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
|
|
92
100
|
)
|
|
93
101
|
|
|
94
102
|
return model
|
|
@@ -106,12 +114,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> VGG:
|
|
|
106
114
|
>>> out = model(input_tensor)
|
|
107
115
|
|
|
108
116
|
Args:
|
|
109
|
-
----
|
|
110
117
|
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
|
111
118
|
**kwargs: keyword arguments of the VGG architecture
|
|
112
119
|
|
|
113
120
|
Returns:
|
|
114
|
-
-------
|
|
115
121
|
VGG feature extractor
|
|
116
122
|
"""
|
|
117
123
|
return _vgg(
|