python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/__init__.py +1 -0
  5. doctr/datasets/coco_text.py +139 -0
  6. doctr/datasets/cord.py +10 -8
  7. doctr/datasets/datasets/__init__.py +4 -4
  8. doctr/datasets/datasets/base.py +16 -16
  9. doctr/datasets/datasets/pytorch.py +12 -12
  10. doctr/datasets/datasets/tensorflow.py +10 -10
  11. doctr/datasets/detection.py +6 -9
  12. doctr/datasets/doc_artefacts.py +3 -4
  13. doctr/datasets/funsd.py +9 -8
  14. doctr/datasets/generator/__init__.py +4 -4
  15. doctr/datasets/generator/base.py +16 -17
  16. doctr/datasets/generator/pytorch.py +1 -3
  17. doctr/datasets/generator/tensorflow.py +1 -3
  18. doctr/datasets/ic03.py +5 -6
  19. doctr/datasets/ic13.py +6 -6
  20. doctr/datasets/iiit5k.py +10 -6
  21. doctr/datasets/iiithws.py +4 -5
  22. doctr/datasets/imgur5k.py +15 -7
  23. doctr/datasets/loader.py +4 -7
  24. doctr/datasets/mjsynth.py +6 -5
  25. doctr/datasets/ocr.py +3 -4
  26. doctr/datasets/orientation.py +3 -4
  27. doctr/datasets/recognition.py +4 -5
  28. doctr/datasets/sroie.py +6 -5
  29. doctr/datasets/svhn.py +7 -6
  30. doctr/datasets/svt.py +6 -7
  31. doctr/datasets/synthtext.py +19 -7
  32. doctr/datasets/utils.py +41 -35
  33. doctr/datasets/vocabs.py +1107 -49
  34. doctr/datasets/wildreceipt.py +14 -10
  35. doctr/file_utils.py +11 -7
  36. doctr/io/elements.py +96 -82
  37. doctr/io/html.py +1 -3
  38. doctr/io/image/__init__.py +3 -3
  39. doctr/io/image/base.py +2 -5
  40. doctr/io/image/pytorch.py +3 -12
  41. doctr/io/image/tensorflow.py +2 -11
  42. doctr/io/pdf.py +5 -7
  43. doctr/io/reader.py +5 -11
  44. doctr/models/_utils.py +15 -23
  45. doctr/models/builder.py +30 -48
  46. doctr/models/classification/__init__.py +1 -0
  47. doctr/models/classification/magc_resnet/__init__.py +3 -3
  48. doctr/models/classification/magc_resnet/pytorch.py +11 -15
  49. doctr/models/classification/magc_resnet/tensorflow.py +11 -14
  50. doctr/models/classification/mobilenet/__init__.py +3 -3
  51. doctr/models/classification/mobilenet/pytorch.py +20 -18
  52. doctr/models/classification/mobilenet/tensorflow.py +19 -23
  53. doctr/models/classification/predictor/__init__.py +4 -4
  54. doctr/models/classification/predictor/pytorch.py +7 -9
  55. doctr/models/classification/predictor/tensorflow.py +6 -8
  56. doctr/models/classification/resnet/__init__.py +4 -4
  57. doctr/models/classification/resnet/pytorch.py +47 -34
  58. doctr/models/classification/resnet/tensorflow.py +45 -35
  59. doctr/models/classification/textnet/__init__.py +3 -3
  60. doctr/models/classification/textnet/pytorch.py +20 -18
  61. doctr/models/classification/textnet/tensorflow.py +19 -17
  62. doctr/models/classification/vgg/__init__.py +3 -3
  63. doctr/models/classification/vgg/pytorch.py +21 -8
  64. doctr/models/classification/vgg/tensorflow.py +20 -14
  65. doctr/models/classification/vip/__init__.py +4 -0
  66. doctr/models/classification/vip/layers/__init__.py +4 -0
  67. doctr/models/classification/vip/layers/pytorch.py +615 -0
  68. doctr/models/classification/vip/pytorch.py +505 -0
  69. doctr/models/classification/vit/__init__.py +3 -3
  70. doctr/models/classification/vit/pytorch.py +18 -15
  71. doctr/models/classification/vit/tensorflow.py +15 -12
  72. doctr/models/classification/zoo.py +23 -14
  73. doctr/models/core.py +3 -3
  74. doctr/models/detection/_utils/__init__.py +4 -4
  75. doctr/models/detection/_utils/base.py +4 -7
  76. doctr/models/detection/_utils/pytorch.py +1 -5
  77. doctr/models/detection/_utils/tensorflow.py +1 -5
  78. doctr/models/detection/core.py +2 -8
  79. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  80. doctr/models/detection/differentiable_binarization/base.py +10 -21
  81. doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
  82. doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
  83. doctr/models/detection/fast/__init__.py +4 -4
  84. doctr/models/detection/fast/base.py +8 -17
  85. doctr/models/detection/fast/pytorch.py +37 -35
  86. doctr/models/detection/fast/tensorflow.py +24 -28
  87. doctr/models/detection/linknet/__init__.py +4 -4
  88. doctr/models/detection/linknet/base.py +8 -18
  89. doctr/models/detection/linknet/pytorch.py +34 -28
  90. doctr/models/detection/linknet/tensorflow.py +24 -25
  91. doctr/models/detection/predictor/__init__.py +5 -5
  92. doctr/models/detection/predictor/pytorch.py +6 -7
  93. doctr/models/detection/predictor/tensorflow.py +5 -6
  94. doctr/models/detection/zoo.py +27 -7
  95. doctr/models/factory/hub.py +6 -10
  96. doctr/models/kie_predictor/__init__.py +5 -5
  97. doctr/models/kie_predictor/base.py +4 -5
  98. doctr/models/kie_predictor/pytorch.py +19 -20
  99. doctr/models/kie_predictor/tensorflow.py +14 -15
  100. doctr/models/modules/layers/__init__.py +3 -3
  101. doctr/models/modules/layers/pytorch.py +55 -10
  102. doctr/models/modules/layers/tensorflow.py +5 -7
  103. doctr/models/modules/transformer/__init__.py +3 -3
  104. doctr/models/modules/transformer/pytorch.py +12 -13
  105. doctr/models/modules/transformer/tensorflow.py +9 -10
  106. doctr/models/modules/vision_transformer/__init__.py +3 -3
  107. doctr/models/modules/vision_transformer/pytorch.py +2 -3
  108. doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  109. doctr/models/predictor/__init__.py +5 -5
  110. doctr/models/predictor/base.py +28 -29
  111. doctr/models/predictor/pytorch.py +13 -14
  112. doctr/models/predictor/tensorflow.py +9 -10
  113. doctr/models/preprocessor/__init__.py +4 -4
  114. doctr/models/preprocessor/pytorch.py +13 -17
  115. doctr/models/preprocessor/tensorflow.py +10 -14
  116. doctr/models/recognition/__init__.py +1 -0
  117. doctr/models/recognition/core.py +3 -7
  118. doctr/models/recognition/crnn/__init__.py +4 -4
  119. doctr/models/recognition/crnn/pytorch.py +30 -29
  120. doctr/models/recognition/crnn/tensorflow.py +21 -24
  121. doctr/models/recognition/master/__init__.py +3 -3
  122. doctr/models/recognition/master/base.py +3 -7
  123. doctr/models/recognition/master/pytorch.py +32 -25
  124. doctr/models/recognition/master/tensorflow.py +22 -25
  125. doctr/models/recognition/parseq/__init__.py +3 -3
  126. doctr/models/recognition/parseq/base.py +3 -7
  127. doctr/models/recognition/parseq/pytorch.py +47 -29
  128. doctr/models/recognition/parseq/tensorflow.py +29 -27
  129. doctr/models/recognition/predictor/__init__.py +5 -5
  130. doctr/models/recognition/predictor/_utils.py +111 -52
  131. doctr/models/recognition/predictor/pytorch.py +9 -9
  132. doctr/models/recognition/predictor/tensorflow.py +8 -9
  133. doctr/models/recognition/sar/__init__.py +4 -4
  134. doctr/models/recognition/sar/pytorch.py +30 -22
  135. doctr/models/recognition/sar/tensorflow.py +22 -24
  136. doctr/models/recognition/utils.py +57 -53
  137. doctr/models/recognition/viptr/__init__.py +4 -0
  138. doctr/models/recognition/viptr/pytorch.py +277 -0
  139. doctr/models/recognition/vitstr/__init__.py +4 -4
  140. doctr/models/recognition/vitstr/base.py +3 -7
  141. doctr/models/recognition/vitstr/pytorch.py +28 -21
  142. doctr/models/recognition/vitstr/tensorflow.py +22 -23
  143. doctr/models/recognition/zoo.py +27 -11
  144. doctr/models/utils/__init__.py +4 -4
  145. doctr/models/utils/pytorch.py +41 -34
  146. doctr/models/utils/tensorflow.py +31 -23
  147. doctr/models/zoo.py +1 -5
  148. doctr/transforms/functional/__init__.py +3 -3
  149. doctr/transforms/functional/base.py +4 -11
  150. doctr/transforms/functional/pytorch.py +20 -28
  151. doctr/transforms/functional/tensorflow.py +10 -22
  152. doctr/transforms/modules/__init__.py +4 -4
  153. doctr/transforms/modules/base.py +48 -55
  154. doctr/transforms/modules/pytorch.py +58 -22
  155. doctr/transforms/modules/tensorflow.py +18 -32
  156. doctr/utils/common_types.py +8 -9
  157. doctr/utils/data.py +9 -13
  158. doctr/utils/fonts.py +2 -7
  159. doctr/utils/geometry.py +17 -48
  160. doctr/utils/metrics.py +17 -37
  161. doctr/utils/multithreading.py +4 -6
  162. doctr/utils/reconstitution.py +9 -13
  163. doctr/utils/repr.py +2 -3
  164. doctr/utils/visualization.py +16 -29
  165. doctr/version.py +1 -1
  166. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
  167. python_doctr-0.12.0.dist-info/RECORD +180 -0
  168. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
  169. python_doctr-0.10.0.dist-info/RECORD +0 -173
  170. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
  171. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
  172. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0
@@ -1,10 +1,11 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
+ import types
6
7
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional
8
+ from typing import Any
8
9
 
9
10
  from torch import nn
10
11
  from torchvision.models import vgg as tv_vgg
@@ -16,7 +17,7 @@ from ...utils import load_pretrained_params
16
17
  __all__ = ["vgg16_bn_r"]
17
18
 
18
19
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
20
+ default_cfgs: dict[str, dict[str, Any]] = {
20
21
  "vgg16_bn_r": {
21
22
  "mean": (0.694, 0.695, 0.693),
22
23
  "std": (0.299, 0.296, 0.301),
@@ -32,7 +33,7 @@ def _vgg(
32
33
  pretrained: bool,
33
34
  tv_arch: str,
34
35
  num_rect_pools: int = 3,
35
- ignore_keys: Optional[List[str]] = None,
36
+ ignore_keys: list[str] | None = None,
36
37
  **kwargs: Any,
37
38
  ) -> tv_vgg.VGG:
38
39
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -45,7 +46,7 @@ def _vgg(
45
46
 
46
47
  # Build the model
47
48
  model = tv_vgg.__dict__[tv_arch](**kwargs, weights=None)
48
- # List the MaxPool2d
49
+ # list the MaxPool2d
49
50
  pool_idcs = [idx for idx, m in enumerate(model.features) if isinstance(m, nn.MaxPool2d)]
50
51
  # Replace their kernel with rectangular ones
51
52
  for idx in pool_idcs[-num_rect_pools:]:
@@ -53,12 +54,26 @@ def _vgg(
53
54
  # Patch average pool & classification head
54
55
  model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
55
56
  model.classifier = nn.Linear(512, kwargs["num_classes"])
57
+
58
+ # monkeypatch the model to allow for loading pretrained parameters
59
+ def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None: # noqa: D417
60
+ """Load pretrained parameters onto the model
61
+
62
+ Args:
63
+ path_or_url: the path or URL to the model parameters (checkpoint)
64
+ **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
65
+ """
66
+ load_pretrained_params(self, path_or_url, **kwargs)
67
+
68
+ # Bind method to the instance
69
+ model.from_pretrained = types.MethodType(from_pretrained, model)
70
+
56
71
  # Load pretrained parameters
57
72
  if pretrained:
58
73
  # The number of classes is not the same as the number of classes in the pretrained model =>
59
74
  # remove the last layer weights
60
75
  _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
61
- load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
76
+ model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
62
77
 
63
78
  model.cfg = _cfg
64
79
 
@@ -77,12 +92,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG:
77
92
  >>> out = model(input_tensor)
78
93
 
79
94
  Args:
80
- ----
81
95
  pretrained (bool): If True, returns a model pre-trained on ImageNet
82
96
  **kwargs: keyword arguments of the VGG architecture
83
97
 
84
98
  Returns:
85
- -------
86
99
  VGG feature extractor
87
100
  """
88
101
  return _vgg(
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any
8
8
 
9
9
  from tensorflow.keras import layers
10
10
  from tensorflow.keras.models import Sequential
@@ -16,7 +16,7 @@ from ...utils import _build_model, conv_sequence, load_pretrained_params
16
16
  __all__ = ["VGG", "vgg16_bn_r"]
17
17
 
18
18
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
19
+ default_cfgs: dict[str, dict[str, Any]] = {
20
20
  "vgg16_bn_r": {
21
21
  "mean": (0.5, 0.5, 0.5),
22
22
  "std": (1.0, 1.0, 1.0),
@@ -32,7 +32,6 @@ class VGG(Sequential):
32
32
  <https://arxiv.org/pdf/1409.1556.pdf>`_.
33
33
 
34
34
  Args:
35
- ----
36
35
  num_blocks: number of convolutional block in each stage
37
36
  planes: number of output channels in each stage
38
37
  rect_pools: whether pooling square kernels should be replace with rectangular ones
@@ -43,13 +42,13 @@ class VGG(Sequential):
43
42
 
44
43
  def __init__(
45
44
  self,
46
- num_blocks: List[int],
47
- planes: List[int],
48
- rect_pools: List[bool],
45
+ num_blocks: list[int],
46
+ planes: list[int],
47
+ rect_pools: list[bool],
49
48
  include_top: bool = False,
50
49
  num_classes: int = 1000,
51
- input_shape: Optional[Tuple[int, int, int]] = None,
52
- cfg: Optional[Dict[str, Any]] = None,
50
+ input_shape: tuple[int, int, int] | None = None,
51
+ cfg: dict[str, Any] | None = None,
53
52
  ) -> None:
54
53
  _layers = []
55
54
  # Specify input_shape only for the first layer
@@ -65,9 +64,18 @@ class VGG(Sequential):
65
64
  super().__init__(_layers)
66
65
  self.cfg = cfg
67
66
 
67
+ def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
68
+ """Load pretrained parameters onto the model
69
+
70
+ Args:
71
+ path_or_url: the path or URL to the model parameters (checkpoint)
72
+ **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
73
+ """
74
+ load_pretrained_params(self, path_or_url, **kwargs)
75
+
68
76
 
69
77
  def _vgg(
70
- arch: str, pretrained: bool, num_blocks: List[int], planes: List[int], rect_pools: List[bool], **kwargs: Any
78
+ arch: str, pretrained: bool, num_blocks: list[int], planes: list[int], rect_pools: list[bool], **kwargs: Any
71
79
  ) -> VGG:
72
80
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
73
81
  kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
@@ -87,8 +95,8 @@ def _vgg(
87
95
  if pretrained:
88
96
  # The number of classes is not the same as the number of classes in the pretrained model =>
89
97
  # skip the mismatching layers for fine tuning
90
- load_pretrained_params(
91
- model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
98
+ model.from_pretrained(
99
+ default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
92
100
  )
93
101
 
94
102
  return model
@@ -106,12 +114,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> VGG:
106
114
  >>> out = model(input_tensor)
107
115
 
108
116
  Args:
109
- ----
110
117
  pretrained (bool): If True, returns a model pre-trained on ImageNet
111
118
  **kwargs: keyword arguments of the VGG architecture
112
119
 
113
120
  Returns:
114
- -------
115
121
  VGG feature extractor
116
122
  """
117
123
  return _vgg(
@@ -0,0 +1,4 @@
1
+ from doctr.file_utils import is_torch_available
2
+
3
+ if is_torch_available():
4
+ from .pytorch import *
@@ -0,0 +1,4 @@
1
+ from doctr.file_utils import is_torch_available
2
+
3
+ if is_torch_available():
4
+ from .pytorch import *