python-doctr 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/cord.py +8 -7
  5. doctr/datasets/datasets/__init__.py +4 -4
  6. doctr/datasets/datasets/base.py +16 -16
  7. doctr/datasets/datasets/pytorch.py +12 -12
  8. doctr/datasets/datasets/tensorflow.py +10 -10
  9. doctr/datasets/detection.py +6 -9
  10. doctr/datasets/doc_artefacts.py +3 -4
  11. doctr/datasets/funsd.py +7 -6
  12. doctr/datasets/generator/__init__.py +4 -4
  13. doctr/datasets/generator/base.py +16 -17
  14. doctr/datasets/generator/pytorch.py +1 -3
  15. doctr/datasets/generator/tensorflow.py +1 -3
  16. doctr/datasets/ic03.py +4 -5
  17. doctr/datasets/ic13.py +4 -5
  18. doctr/datasets/iiit5k.py +6 -5
  19. doctr/datasets/iiithws.py +4 -5
  20. doctr/datasets/imgur5k.py +6 -5
  21. doctr/datasets/loader.py +4 -7
  22. doctr/datasets/mjsynth.py +6 -5
  23. doctr/datasets/ocr.py +3 -4
  24. doctr/datasets/orientation.py +3 -4
  25. doctr/datasets/recognition.py +3 -4
  26. doctr/datasets/sroie.py +6 -5
  27. doctr/datasets/svhn.py +6 -5
  28. doctr/datasets/svt.py +4 -5
  29. doctr/datasets/synthtext.py +4 -5
  30. doctr/datasets/utils.py +34 -29
  31. doctr/datasets/vocabs.py +17 -7
  32. doctr/datasets/wildreceipt.py +14 -10
  33. doctr/file_utils.py +2 -7
  34. doctr/io/elements.py +59 -79
  35. doctr/io/html.py +1 -3
  36. doctr/io/image/__init__.py +3 -3
  37. doctr/io/image/base.py +2 -5
  38. doctr/io/image/pytorch.py +3 -12
  39. doctr/io/image/tensorflow.py +2 -11
  40. doctr/io/pdf.py +5 -7
  41. doctr/io/reader.py +5 -11
  42. doctr/models/_utils.py +14 -22
  43. doctr/models/builder.py +30 -48
  44. doctr/models/classification/magc_resnet/__init__.py +3 -3
  45. doctr/models/classification/magc_resnet/pytorch.py +10 -13
  46. doctr/models/classification/magc_resnet/tensorflow.py +8 -11
  47. doctr/models/classification/mobilenet/__init__.py +3 -3
  48. doctr/models/classification/mobilenet/pytorch.py +5 -17
  49. doctr/models/classification/mobilenet/tensorflow.py +8 -21
  50. doctr/models/classification/predictor/__init__.py +4 -4
  51. doctr/models/classification/predictor/pytorch.py +6 -8
  52. doctr/models/classification/predictor/tensorflow.py +6 -8
  53. doctr/models/classification/resnet/__init__.py +4 -4
  54. doctr/models/classification/resnet/pytorch.py +21 -31
  55. doctr/models/classification/resnet/tensorflow.py +20 -31
  56. doctr/models/classification/textnet/__init__.py +3 -3
  57. doctr/models/classification/textnet/pytorch.py +10 -17
  58. doctr/models/classification/textnet/tensorflow.py +8 -15
  59. doctr/models/classification/vgg/__init__.py +3 -3
  60. doctr/models/classification/vgg/pytorch.py +5 -7
  61. doctr/models/classification/vgg/tensorflow.py +9 -12
  62. doctr/models/classification/vit/__init__.py +3 -3
  63. doctr/models/classification/vit/pytorch.py +8 -14
  64. doctr/models/classification/vit/tensorflow.py +6 -12
  65. doctr/models/classification/zoo.py +19 -14
  66. doctr/models/core.py +3 -3
  67. doctr/models/detection/_utils/__init__.py +4 -4
  68. doctr/models/detection/_utils/base.py +4 -7
  69. doctr/models/detection/_utils/pytorch.py +1 -5
  70. doctr/models/detection/_utils/tensorflow.py +1 -5
  71. doctr/models/detection/core.py +2 -8
  72. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  73. doctr/models/detection/differentiable_binarization/base.py +7 -17
  74. doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
  75. doctr/models/detection/differentiable_binarization/tensorflow.py +15 -25
  76. doctr/models/detection/fast/__init__.py +4 -4
  77. doctr/models/detection/fast/base.py +6 -14
  78. doctr/models/detection/fast/pytorch.py +24 -31
  79. doctr/models/detection/fast/tensorflow.py +14 -26
  80. doctr/models/detection/linknet/__init__.py +4 -4
  81. doctr/models/detection/linknet/base.py +6 -15
  82. doctr/models/detection/linknet/pytorch.py +24 -27
  83. doctr/models/detection/linknet/tensorflow.py +14 -23
  84. doctr/models/detection/predictor/__init__.py +5 -5
  85. doctr/models/detection/predictor/pytorch.py +6 -7
  86. doctr/models/detection/predictor/tensorflow.py +5 -6
  87. doctr/models/detection/zoo.py +27 -7
  88. doctr/models/factory/hub.py +3 -7
  89. doctr/models/kie_predictor/__init__.py +5 -5
  90. doctr/models/kie_predictor/base.py +4 -5
  91. doctr/models/kie_predictor/pytorch.py +18 -19
  92. doctr/models/kie_predictor/tensorflow.py +13 -14
  93. doctr/models/modules/layers/__init__.py +3 -3
  94. doctr/models/modules/layers/pytorch.py +6 -9
  95. doctr/models/modules/layers/tensorflow.py +5 -7
  96. doctr/models/modules/transformer/__init__.py +3 -3
  97. doctr/models/modules/transformer/pytorch.py +12 -13
  98. doctr/models/modules/transformer/tensorflow.py +9 -10
  99. doctr/models/modules/vision_transformer/__init__.py +3 -3
  100. doctr/models/modules/vision_transformer/pytorch.py +2 -3
  101. doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  102. doctr/models/predictor/__init__.py +5 -5
  103. doctr/models/predictor/base.py +28 -29
  104. doctr/models/predictor/pytorch.py +12 -13
  105. doctr/models/predictor/tensorflow.py +8 -9
  106. doctr/models/preprocessor/__init__.py +4 -4
  107. doctr/models/preprocessor/pytorch.py +13 -17
  108. doctr/models/preprocessor/tensorflow.py +10 -14
  109. doctr/models/recognition/core.py +3 -7
  110. doctr/models/recognition/crnn/__init__.py +4 -4
  111. doctr/models/recognition/crnn/pytorch.py +20 -28
  112. doctr/models/recognition/crnn/tensorflow.py +11 -23
  113. doctr/models/recognition/master/__init__.py +3 -3
  114. doctr/models/recognition/master/base.py +3 -7
  115. doctr/models/recognition/master/pytorch.py +22 -24
  116. doctr/models/recognition/master/tensorflow.py +12 -22
  117. doctr/models/recognition/parseq/__init__.py +3 -3
  118. doctr/models/recognition/parseq/base.py +3 -7
  119. doctr/models/recognition/parseq/pytorch.py +26 -26
  120. doctr/models/recognition/parseq/tensorflow.py +16 -22
  121. doctr/models/recognition/predictor/__init__.py +5 -5
  122. doctr/models/recognition/predictor/_utils.py +7 -10
  123. doctr/models/recognition/predictor/pytorch.py +6 -6
  124. doctr/models/recognition/predictor/tensorflow.py +5 -6
  125. doctr/models/recognition/sar/__init__.py +4 -4
  126. doctr/models/recognition/sar/pytorch.py +20 -21
  127. doctr/models/recognition/sar/tensorflow.py +12 -21
  128. doctr/models/recognition/utils.py +5 -10
  129. doctr/models/recognition/vitstr/__init__.py +4 -4
  130. doctr/models/recognition/vitstr/base.py +3 -7
  131. doctr/models/recognition/vitstr/pytorch.py +18 -20
  132. doctr/models/recognition/vitstr/tensorflow.py +12 -20
  133. doctr/models/recognition/zoo.py +22 -11
  134. doctr/models/utils/__init__.py +4 -4
  135. doctr/models/utils/pytorch.py +14 -17
  136. doctr/models/utils/tensorflow.py +17 -16
  137. doctr/models/zoo.py +1 -5
  138. doctr/transforms/functional/__init__.py +3 -3
  139. doctr/transforms/functional/base.py +4 -11
  140. doctr/transforms/functional/pytorch.py +20 -28
  141. doctr/transforms/functional/tensorflow.py +10 -22
  142. doctr/transforms/modules/__init__.py +4 -4
  143. doctr/transforms/modules/base.py +48 -55
  144. doctr/transforms/modules/pytorch.py +58 -22
  145. doctr/transforms/modules/tensorflow.py +18 -32
  146. doctr/utils/common_types.py +8 -9
  147. doctr/utils/data.py +8 -12
  148. doctr/utils/fonts.py +2 -7
  149. doctr/utils/geometry.py +16 -47
  150. doctr/utils/metrics.py +17 -37
  151. doctr/utils/multithreading.py +4 -6
  152. doctr/utils/reconstitution.py +9 -13
  153. doctr/utils/repr.py +2 -3
  154. doctr/utils/visualization.py +16 -29
  155. doctr/version.py +1 -1
  156. {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +54 -52
  157. python_doctr-0.11.0.dist-info/RECORD +173 -0
  158. {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
  159. python_doctr-0.10.0.dist-info/RECORD +0 -173
  160. {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
  161. {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
  162. {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
@@ -1,10 +1,11 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
+ from collections.abc import Callable
6
7
  from copy import deepcopy
7
- from typing import Any, Callable, Dict, List, Optional, Tuple
8
+ from typing import Any
8
9
 
9
10
  import tensorflow as tf
10
11
  from tensorflow.keras import layers
@@ -18,7 +19,7 @@ from ...utils import _build_model, conv_sequence, load_pretrained_params
18
19
  __all__ = ["ResNet", "resnet18", "resnet31", "resnet34", "resnet50", "resnet34_wide"]
19
20
 
20
21
 
21
- default_cfgs: Dict[str, Dict[str, Any]] = {
22
+ default_cfgs: dict[str, dict[str, Any]] = {
22
23
  "resnet18": {
23
24
  "mean": (0.694, 0.695, 0.693),
24
25
  "std": (0.299, 0.296, 0.301),
@@ -61,7 +62,6 @@ class ResnetBlock(layers.Layer):
61
62
  """Implements a resnet31 block with shortcut
62
63
 
63
64
  Args:
64
- ----
65
65
  conv_shortcut: Use of shortcut
66
66
  output_channels: number of channels to use in Conv2D
67
67
  kernel_size: size of square kernels
@@ -92,7 +92,7 @@ class ResnetBlock(layers.Layer):
92
92
  output_channels: int,
93
93
  kernel_size: int,
94
94
  strides: int = 1,
95
- ) -> List[layers.Layer]:
95
+ ) -> list[layers.Layer]:
96
96
  return [
97
97
  *conv_sequence(output_channels, "relu", bn=True, strides=strides, kernel_size=kernel_size),
98
98
  *conv_sequence(output_channels, None, bn=True, kernel_size=kernel_size),
@@ -108,8 +108,8 @@ class ResnetBlock(layers.Layer):
108
108
 
109
109
  def resnet_stage(
110
110
  num_blocks: int, out_channels: int, shortcut: bool = False, downsample: bool = False
111
- ) -> List[layers.Layer]:
112
- _layers: List[layers.Layer] = [ResnetBlock(out_channels, conv_shortcut=shortcut, strides=2 if downsample else 1)]
111
+ ) -> list[layers.Layer]:
112
+ _layers: list[layers.Layer] = [ResnetBlock(out_channels, conv_shortcut=shortcut, strides=2 if downsample else 1)]
113
113
 
114
114
  for _ in range(1, num_blocks):
115
115
  _layers.append(ResnetBlock(out_channels, conv_shortcut=False))
@@ -121,7 +121,6 @@ class ResNet(Sequential):
121
121
  """Implements a ResNet architecture
122
122
 
123
123
  Args:
124
- ----
125
124
  num_blocks: number of resnet block in each stage
126
125
  output_channels: number of channels in each stage
127
126
  stage_downsample: whether the first residual block of a stage should downsample
@@ -137,18 +136,18 @@ class ResNet(Sequential):
137
136
 
138
137
  def __init__(
139
138
  self,
140
- num_blocks: List[int],
141
- output_channels: List[int],
142
- stage_downsample: List[bool],
143
- stage_conv: List[bool],
144
- stage_pooling: List[Optional[Tuple[int, int]]],
139
+ num_blocks: list[int],
140
+ output_channels: list[int],
141
+ stage_downsample: list[bool],
142
+ stage_conv: list[bool],
143
+ stage_pooling: list[tuple[int, int] | None],
145
144
  origin_stem: bool = True,
146
145
  stem_channels: int = 64,
147
- attn_module: Optional[Callable[[int], layers.Layer]] = None,
146
+ attn_module: Callable[[int], layers.Layer] | None = None,
148
147
  include_top: bool = True,
149
148
  num_classes: int = 1000,
150
- cfg: Optional[Dict[str, Any]] = None,
151
- input_shape: Optional[Tuple[int, int, int]] = None,
149
+ cfg: dict[str, Any] | None = None,
150
+ input_shape: tuple[int, int, int] | None = None,
152
151
  ) -> None:
153
152
  inplanes = stem_channels
154
153
  if origin_stem:
@@ -188,11 +187,11 @@ class ResNet(Sequential):
188
187
  def _resnet(
189
188
  arch: str,
190
189
  pretrained: bool,
191
- num_blocks: List[int],
192
- output_channels: List[int],
193
- stage_downsample: List[bool],
194
- stage_conv: List[bool],
195
- stage_pooling: List[Optional[Tuple[int, int]]],
190
+ num_blocks: list[int],
191
+ output_channels: list[int],
192
+ stage_downsample: list[bool],
193
+ stage_conv: list[bool],
194
+ stage_pooling: list[tuple[int, int] | None],
196
195
  origin_stem: bool = True,
197
196
  **kwargs: Any,
198
197
  ) -> ResNet:
@@ -234,12 +233,10 @@ def resnet18(pretrained: bool = False, **kwargs: Any) -> ResNet:
234
233
  >>> out = model(input_tensor)
235
234
 
236
235
  Args:
237
- ----
238
236
  pretrained: boolean, True if model is pretrained
239
237
  **kwargs: keyword arguments of the ResNet architecture
240
238
 
241
239
  Returns:
242
- -------
243
240
  A classification model
244
241
  """
245
242
  return _resnet(
@@ -267,12 +264,10 @@ def resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
267
264
  >>> out = model(input_tensor)
268
265
 
269
266
  Args:
270
- ----
271
267
  pretrained: boolean, True if model is pretrained
272
268
  **kwargs: keyword arguments of the ResNet architecture
273
269
 
274
270
  Returns:
275
- -------
276
271
  A classification model
277
272
  """
278
273
  return _resnet(
@@ -300,12 +295,10 @@ def resnet34(pretrained: bool = False, **kwargs: Any) -> ResNet:
300
295
  >>> out = model(input_tensor)
301
296
 
302
297
  Args:
303
- ----
304
298
  pretrained: boolean, True if model is pretrained
305
299
  **kwargs: keyword arguments of the ResNet architecture
306
300
 
307
301
  Returns:
308
- -------
309
302
  A classification model
310
303
  """
311
304
  return _resnet(
@@ -332,12 +325,10 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> ResNet:
332
325
  >>> out = model(input_tensor)
333
326
 
334
327
  Args:
335
- ----
336
328
  pretrained: boolean, True if model is pretrained
337
329
  **kwargs: keyword arguments of the ResNet architecture
338
330
 
339
331
  Returns:
340
- -------
341
332
  A classification model
342
333
  """
343
334
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs["resnet50"]["classes"]))
@@ -386,12 +377,10 @@ def resnet34_wide(pretrained: bool = False, **kwargs: Any) -> ResNet:
386
377
  >>> out = model(input_tensor)
387
378
 
388
379
  Args:
389
- ----
390
380
  pretrained: boolean, True if model is pretrained
391
381
  **kwargs: keyword arguments of the ResNet architecture
392
382
 
393
383
  Returns:
394
- -------
395
384
  A classification model
396
385
  """
397
386
  return _resnet(
@@ -1,6 +1,6 @@
1
1
  from doctr.file_utils import is_tf_available, is_torch_available
2
2
 
3
- if is_tf_available():
3
+ if is_torch_available():
4
+ from .pytorch import *
5
+ elif is_tf_available():
4
6
  from .tensorflow import *
5
- elif is_torch_available():
6
- from .pytorch import * # type: ignore[assignment]
@@ -1,11 +1,11 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
 
7
7
  from copy import deepcopy
8
- from typing import Any, Dict, List, Optional, Tuple
8
+ from typing import Any
9
9
 
10
10
  from torch import nn
11
11
 
@@ -16,7 +16,7 @@ from ...utils import conv_sequence_pt, load_pretrained_params
16
16
 
17
17
  __all__ = ["textnet_tiny", "textnet_small", "textnet_base"]
18
18
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
19
+ default_cfgs: dict[str, dict[str, Any]] = {
20
20
  "textnet_tiny": {
21
21
  "mean": (0.694, 0.695, 0.693),
22
22
  "std": (0.299, 0.296, 0.301),
@@ -47,22 +47,21 @@ class TextNet(nn.Sequential):
47
47
  Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
48
48
 
49
49
  Args:
50
- ----
51
- stages (List[Dict[str, List[int]]]): List of dictionaries containing the parameters of each stage.
50
+ stages (list[dict[str, list[int]]]): list of dictionaries containing the parameters of each stage.
52
51
  include_top (bool, optional): Whether to include the classifier head. Defaults to True.
53
52
  num_classes (int, optional): Number of output classes. Defaults to 1000.
54
- cfg (Optional[Dict[str, Any]], optional): Additional configuration. Defaults to None.
53
+ cfg (dict[str, Any], optional): Additional configuration. Defaults to None.
55
54
  """
56
55
 
57
56
  def __init__(
58
57
  self,
59
- stages: List[Dict[str, List[int]]],
60
- input_shape: Tuple[int, int, int] = (3, 32, 32),
58
+ stages: list[dict[str, list[int]]],
59
+ input_shape: tuple[int, int, int] = (3, 32, 32),
61
60
  num_classes: int = 1000,
62
61
  include_top: bool = True,
63
- cfg: Optional[Dict[str, Any]] = None,
62
+ cfg: dict[str, Any] | None = None,
64
63
  ) -> None:
65
- _layers: List[nn.Module] = [
64
+ _layers: list[nn.Module] = [
66
65
  *conv_sequence_pt(
67
66
  in_channels=3, out_channels=64, relu=True, bn=True, kernel_size=3, stride=2, padding=(1, 1)
68
67
  ),
@@ -98,7 +97,7 @@ class TextNet(nn.Sequential):
98
97
  def _textnet(
99
98
  arch: str,
100
99
  pretrained: bool,
101
- ignore_keys: Optional[List[str]] = None,
100
+ ignore_keys: list[str] | None = None,
102
101
  **kwargs: Any,
103
102
  ) -> TextNet:
104
103
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -135,12 +134,10 @@ def textnet_tiny(pretrained: bool = False, **kwargs: Any) -> TextNet:
135
134
  >>> out = model(input_tensor)
136
135
 
137
136
  Args:
138
- ----
139
137
  pretrained: boolean, True if model is pretrained
140
138
  **kwargs: keyword arguments of the TextNet architecture
141
139
 
142
140
  Returns:
143
- -------
144
141
  A textnet tiny model
145
142
  """
146
143
  return _textnet(
@@ -184,12 +181,10 @@ def textnet_small(pretrained: bool = False, **kwargs: Any) -> TextNet:
184
181
  >>> out = model(input_tensor)
185
182
 
186
183
  Args:
187
- ----
188
184
  pretrained: boolean, True if model is pretrained
189
185
  **kwargs: keyword arguments of the TextNet architecture
190
186
 
191
187
  Returns:
192
- -------
193
188
  A TextNet small model
194
189
  """
195
190
  return _textnet(
@@ -233,12 +228,10 @@ def textnet_base(pretrained: bool = False, **kwargs: Any) -> TextNet:
233
228
  >>> out = model(input_tensor)
234
229
 
235
230
  Args:
236
- ----
237
231
  pretrained: boolean, True if model is pretrained
238
232
  **kwargs: keyword arguments of the TextNet architecture
239
233
 
240
234
  Returns:
241
- -------
242
235
  A TextNet base model
243
236
  """
244
237
  return _textnet(
@@ -1,11 +1,11 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
 
7
7
  from copy import deepcopy
8
- from typing import Any, Dict, List, Optional, Tuple
8
+ from typing import Any
9
9
 
10
10
  from tensorflow.keras import Sequential, layers
11
11
 
@@ -16,7 +16,7 @@ from ...utils import _build_model, conv_sequence, load_pretrained_params
16
16
 
17
17
  __all__ = ["textnet_tiny", "textnet_small", "textnet_base"]
18
18
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
19
+ default_cfgs: dict[str, dict[str, Any]] = {
20
20
  "textnet_tiny": {
21
21
  "mean": (0.694, 0.695, 0.693),
22
22
  "std": (0.299, 0.296, 0.301),
@@ -47,20 +47,19 @@ class TextNet(Sequential):
47
47
  Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
48
48
 
49
49
  Args:
50
- ----
51
- stages (List[Dict[str, List[int]]]): List of dictionaries containing the parameters of each stage.
50
+ stages (list[dict[str, list[int]]]): list of dictionaries containing the parameters of each stage.
52
51
  include_top (bool, optional): Whether to include the classifier head. Defaults to True.
53
52
  num_classes (int, optional): Number of output classes. Defaults to 1000.
54
- cfg (Optional[Dict[str, Any]], optional): Additional configuration. Defaults to None.
53
+ cfg (dict[str, Any], optional): Additional configuration. Defaults to None.
55
54
  """
56
55
 
57
56
  def __init__(
58
57
  self,
59
- stages: List[Dict[str, List[int]]],
60
- input_shape: Tuple[int, int, int] = (32, 32, 3),
58
+ stages: list[dict[str, list[int]]],
59
+ input_shape: tuple[int, int, int] = (32, 32, 3),
61
60
  num_classes: int = 1000,
62
61
  include_top: bool = True,
63
- cfg: Optional[Dict[str, Any]] = None,
62
+ cfg: dict[str, Any] | None = None,
64
63
  ) -> None:
65
64
  _layers = [
66
65
  *conv_sequence(
@@ -136,12 +135,10 @@ def textnet_tiny(pretrained: bool = False, **kwargs: Any) -> TextNet:
136
135
  >>> out = model(input_tensor)
137
136
 
138
137
  Args:
139
- ----
140
138
  pretrained: boolean, True if model is pretrained
141
139
  **kwargs: keyword arguments of the TextNet architecture
142
140
 
143
141
  Returns:
144
- -------
145
142
  A textnet tiny model
146
143
  """
147
144
  return _textnet(
@@ -184,12 +181,10 @@ def textnet_small(pretrained: bool = False, **kwargs: Any) -> TextNet:
184
181
  >>> out = model(input_tensor)
185
182
 
186
183
  Args:
187
- ----
188
184
  pretrained: boolean, True if model is pretrained
189
185
  **kwargs: keyword arguments of the TextNet architecture
190
186
 
191
187
  Returns:
192
- -------
193
188
  A TextNet small model
194
189
  """
195
190
  return _textnet(
@@ -232,12 +227,10 @@ def textnet_base(pretrained: bool = False, **kwargs: Any) -> TextNet:
232
227
  >>> out = model(input_tensor)
233
228
 
234
229
  Args:
235
- ----
236
230
  pretrained: boolean, True if model is pretrained
237
231
  **kwargs: keyword arguments of the TextNet architecture
238
232
 
239
233
  Returns:
240
- -------
241
234
  A TextNet base model
242
235
  """
243
236
  return _textnet(
@@ -1,6 +1,6 @@
1
1
  from doctr.file_utils import is_tf_available, is_torch_available
2
2
 
3
- if is_tf_available():
4
- from .tensorflow import *
5
- elif is_torch_available():
3
+ if is_torch_available():
6
4
  from .pytorch import *
5
+ elif is_tf_available():
6
+ from .tensorflow import *
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional
7
+ from typing import Any
8
8
 
9
9
  from torch import nn
10
10
  from torchvision.models import vgg as tv_vgg
@@ -16,7 +16,7 @@ from ...utils import load_pretrained_params
16
16
  __all__ = ["vgg16_bn_r"]
17
17
 
18
18
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
19
+ default_cfgs: dict[str, dict[str, Any]] = {
20
20
  "vgg16_bn_r": {
21
21
  "mean": (0.694, 0.695, 0.693),
22
22
  "std": (0.299, 0.296, 0.301),
@@ -32,7 +32,7 @@ def _vgg(
32
32
  pretrained: bool,
33
33
  tv_arch: str,
34
34
  num_rect_pools: int = 3,
35
- ignore_keys: Optional[List[str]] = None,
35
+ ignore_keys: list[str] | None = None,
36
36
  **kwargs: Any,
37
37
  ) -> tv_vgg.VGG:
38
38
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -45,7 +45,7 @@ def _vgg(
45
45
 
46
46
  # Build the model
47
47
  model = tv_vgg.__dict__[tv_arch](**kwargs, weights=None)
48
- # List the MaxPool2d
48
+ # list the MaxPool2d
49
49
  pool_idcs = [idx for idx, m in enumerate(model.features) if isinstance(m, nn.MaxPool2d)]
50
50
  # Replace their kernel with rectangular ones
51
51
  for idx in pool_idcs[-num_rect_pools:]:
@@ -77,12 +77,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG:
77
77
  >>> out = model(input_tensor)
78
78
 
79
79
  Args:
80
- ----
81
80
  pretrained (bool): If True, returns a model pre-trained on ImageNet
82
81
  **kwargs: keyword arguments of the VGG architecture
83
82
 
84
83
  Returns:
85
- -------
86
84
  VGG feature extractor
87
85
  """
88
86
  return _vgg(
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any
8
8
 
9
9
  from tensorflow.keras import layers
10
10
  from tensorflow.keras.models import Sequential
@@ -16,7 +16,7 @@ from ...utils import _build_model, conv_sequence, load_pretrained_params
16
16
  __all__ = ["VGG", "vgg16_bn_r"]
17
17
 
18
18
 
19
- default_cfgs: Dict[str, Dict[str, Any]] = {
19
+ default_cfgs: dict[str, dict[str, Any]] = {
20
20
  "vgg16_bn_r": {
21
21
  "mean": (0.5, 0.5, 0.5),
22
22
  "std": (1.0, 1.0, 1.0),
@@ -32,7 +32,6 @@ class VGG(Sequential):
32
32
  <https://arxiv.org/pdf/1409.1556.pdf>`_.
33
33
 
34
34
  Args:
35
- ----
36
35
  num_blocks: number of convolutional block in each stage
37
36
  planes: number of output channels in each stage
38
37
  rect_pools: whether pooling square kernels should be replace with rectangular ones
@@ -43,13 +42,13 @@ class VGG(Sequential):
43
42
 
44
43
  def __init__(
45
44
  self,
46
- num_blocks: List[int],
47
- planes: List[int],
48
- rect_pools: List[bool],
45
+ num_blocks: list[int],
46
+ planes: list[int],
47
+ rect_pools: list[bool],
49
48
  include_top: bool = False,
50
49
  num_classes: int = 1000,
51
- input_shape: Optional[Tuple[int, int, int]] = None,
52
- cfg: Optional[Dict[str, Any]] = None,
50
+ input_shape: tuple[int, int, int] | None = None,
51
+ cfg: dict[str, Any] | None = None,
53
52
  ) -> None:
54
53
  _layers = []
55
54
  # Specify input_shape only for the first layer
@@ -67,7 +66,7 @@ class VGG(Sequential):
67
66
 
68
67
 
69
68
  def _vgg(
70
- arch: str, pretrained: bool, num_blocks: List[int], planes: List[int], rect_pools: List[bool], **kwargs: Any
69
+ arch: str, pretrained: bool, num_blocks: list[int], planes: list[int], rect_pools: list[bool], **kwargs: Any
71
70
  ) -> VGG:
72
71
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
73
72
  kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
@@ -106,12 +105,10 @@ def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> VGG:
106
105
  >>> out = model(input_tensor)
107
106
 
108
107
  Args:
109
- ----
110
108
  pretrained (bool): If True, returns a model pre-trained on ImageNet
111
109
  **kwargs: keyword arguments of the VGG architecture
112
110
 
113
111
  Returns:
114
- -------
115
112
  VGG feature extractor
116
113
  """
117
114
  return _vgg(
@@ -1,6 +1,6 @@
1
1
  from doctr.file_utils import is_tf_available, is_torch_available
2
2
 
3
- if is_tf_available():
3
+ if is_torch_available():
4
+ from .pytorch import *
5
+ elif is_tf_available():
4
6
  from .tensorflow import *
5
- elif is_torch_available():
6
- from .pytorch import * # type: ignore[assignment]
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any
8
8
 
9
9
  import torch
10
10
  from torch import nn
@@ -18,7 +18,7 @@ from ...utils.pytorch import load_pretrained_params
18
18
  __all__ = ["vit_s", "vit_b"]
19
19
 
20
20
 
21
- default_cfgs: Dict[str, Dict[str, Any]] = {
21
+ default_cfgs: dict[str, dict[str, Any]] = {
22
22
  "vit_s": {
23
23
  "mean": (0.694, 0.695, 0.693),
24
24
  "std": (0.299, 0.296, 0.301),
@@ -40,7 +40,6 @@ class ClassifierHead(nn.Module):
40
40
  """Classifier head for Vision Transformer
41
41
 
42
42
  Args:
43
- ----
44
43
  in_channels: number of input channels
45
44
  num_classes: number of output classes
46
45
  """
@@ -65,7 +64,6 @@ class VisionTransformer(nn.Sequential):
65
64
  <https://arxiv.org/pdf/2010.11929.pdf>`_.
66
65
 
67
66
  Args:
68
- ----
69
67
  d_model: dimension of the transformer layers
70
68
  num_layers: number of transformer layers
71
69
  num_heads: number of attention heads
@@ -83,14 +81,14 @@ class VisionTransformer(nn.Sequential):
83
81
  num_layers: int,
84
82
  num_heads: int,
85
83
  ffd_ratio: int,
86
- patch_size: Tuple[int, int] = (4, 4),
87
- input_shape: Tuple[int, int, int] = (3, 32, 32),
84
+ patch_size: tuple[int, int] = (4, 4),
85
+ input_shape: tuple[int, int, int] = (3, 32, 32),
88
86
  dropout: float = 0.0,
89
87
  num_classes: int = 1000,
90
88
  include_top: bool = True,
91
- cfg: Optional[Dict[str, Any]] = None,
89
+ cfg: dict[str, Any] | None = None,
92
90
  ) -> None:
93
- _layers: List[nn.Module] = [
91
+ _layers: list[nn.Module] = [
94
92
  PatchEmbedding(input_shape, d_model, patch_size),
95
93
  EncoderBlock(num_layers, num_heads, d_model, d_model * ffd_ratio, dropout, nn.GELU()),
96
94
  ]
@@ -104,7 +102,7 @@ class VisionTransformer(nn.Sequential):
104
102
  def _vit(
105
103
  arch: str,
106
104
  pretrained: bool,
107
- ignore_keys: Optional[List[str]] = None,
105
+ ignore_keys: list[str] | None = None,
108
106
  **kwargs: Any,
109
107
  ) -> VisionTransformer:
110
108
  kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -143,12 +141,10 @@ def vit_s(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
143
141
  >>> out = model(input_tensor)
144
142
 
145
143
  Args:
146
- ----
147
144
  pretrained: boolean, True if model is pretrained
148
145
  **kwargs: keyword arguments of the VisionTransformer architecture
149
146
 
150
147
  Returns:
151
- -------
152
148
  A feature extractor model
153
149
  """
154
150
  return _vit(
@@ -175,12 +171,10 @@ def vit_b(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
175
171
  >>> out = model(input_tensor)
176
172
 
177
173
  Args:
178
- ----
179
174
  pretrained: boolean, True if model is pretrained
180
175
  **kwargs: keyword arguments of the VisionTransformer architecture
181
176
 
182
177
  Returns:
183
- -------
184
178
  A feature extractor model
185
179
  """
186
180
  return _vit(
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, Optional, Tuple
7
+ from typing import Any
8
8
 
9
9
  import tensorflow as tf
10
10
  from tensorflow.keras import Sequential, layers
@@ -19,7 +19,7 @@ from ...utils import _build_model, load_pretrained_params
19
19
  __all__ = ["vit_s", "vit_b"]
20
20
 
21
21
 
22
- default_cfgs: Dict[str, Dict[str, Any]] = {
22
+ default_cfgs: dict[str, dict[str, Any]] = {
23
23
  "vit_s": {
24
24
  "mean": (0.694, 0.695, 0.693),
25
25
  "std": (0.299, 0.296, 0.301),
@@ -41,7 +41,6 @@ class ClassifierHead(layers.Layer, NestedObject):
41
41
  """Classifier head for Vision Transformer
42
42
 
43
43
  Args:
44
- ----
45
44
  num_classes: number of output classes
46
45
  """
47
46
 
@@ -61,7 +60,6 @@ class VisionTransformer(Sequential):
61
60
  <https://arxiv.org/pdf/2010.11929.pdf>`_.
62
61
 
63
62
  Args:
64
- ----
65
63
  d_model: dimension of the transformer layers
66
64
  num_layers: number of transformer layers
67
65
  num_heads: number of attention heads
@@ -79,12 +77,12 @@ class VisionTransformer(Sequential):
79
77
  num_layers: int,
80
78
  num_heads: int,
81
79
  ffd_ratio: int,
82
- patch_size: Tuple[int, int] = (4, 4),
83
- input_shape: Tuple[int, int, int] = (32, 32, 3),
80
+ patch_size: tuple[int, int] = (4, 4),
81
+ input_shape: tuple[int, int, int] = (32, 32, 3),
84
82
  dropout: float = 0.0,
85
83
  num_classes: int = 1000,
86
84
  include_top: bool = True,
87
- cfg: Optional[Dict[str, Any]] = None,
85
+ cfg: dict[str, Any] | None = None,
88
86
  ) -> None:
89
87
  _layers = [
90
88
  PatchEmbedding(input_shape, d_model, patch_size),
@@ -148,12 +146,10 @@ def vit_s(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
148
146
  >>> out = model(input_tensor)
149
147
 
150
148
  Args:
151
- ----
152
149
  pretrained: boolean, True if model is pretrained
153
150
  **kwargs: keyword arguments of the VisionTransformer architecture
154
151
 
155
152
  Returns:
156
- -------
157
153
  A feature extractor model
158
154
  """
159
155
  return _vit(
@@ -179,12 +175,10 @@ def vit_b(pretrained: bool = False, **kwargs: Any) -> VisionTransformer:
179
175
  >>> out = model(input_tensor)
180
176
 
181
177
  Args:
182
- ----
183
178
  pretrained: boolean, True if model is pretrained
184
179
  **kwargs: keyword arguments of the VisionTransformer architecture
185
180
 
186
181
  Returns:
187
- -------
188
182
  A feature extractor model
189
183
  """
190
184
  return _vit(