python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/__init__.py +1 -0
  5. doctr/datasets/coco_text.py +139 -0
  6. doctr/datasets/cord.py +10 -8
  7. doctr/datasets/datasets/__init__.py +4 -4
  8. doctr/datasets/datasets/base.py +16 -16
  9. doctr/datasets/datasets/pytorch.py +12 -12
  10. doctr/datasets/datasets/tensorflow.py +10 -10
  11. doctr/datasets/detection.py +6 -9
  12. doctr/datasets/doc_artefacts.py +3 -4
  13. doctr/datasets/funsd.py +9 -8
  14. doctr/datasets/generator/__init__.py +4 -4
  15. doctr/datasets/generator/base.py +16 -17
  16. doctr/datasets/generator/pytorch.py +1 -3
  17. doctr/datasets/generator/tensorflow.py +1 -3
  18. doctr/datasets/ic03.py +5 -6
  19. doctr/datasets/ic13.py +6 -6
  20. doctr/datasets/iiit5k.py +10 -6
  21. doctr/datasets/iiithws.py +4 -5
  22. doctr/datasets/imgur5k.py +15 -7
  23. doctr/datasets/loader.py +4 -7
  24. doctr/datasets/mjsynth.py +6 -5
  25. doctr/datasets/ocr.py +3 -4
  26. doctr/datasets/orientation.py +3 -4
  27. doctr/datasets/recognition.py +4 -5
  28. doctr/datasets/sroie.py +6 -5
  29. doctr/datasets/svhn.py +7 -6
  30. doctr/datasets/svt.py +6 -7
  31. doctr/datasets/synthtext.py +19 -7
  32. doctr/datasets/utils.py +41 -35
  33. doctr/datasets/vocabs.py +1107 -49
  34. doctr/datasets/wildreceipt.py +14 -10
  35. doctr/file_utils.py +11 -7
  36. doctr/io/elements.py +96 -82
  37. doctr/io/html.py +1 -3
  38. doctr/io/image/__init__.py +3 -3
  39. doctr/io/image/base.py +2 -5
  40. doctr/io/image/pytorch.py +3 -12
  41. doctr/io/image/tensorflow.py +2 -11
  42. doctr/io/pdf.py +5 -7
  43. doctr/io/reader.py +5 -11
  44. doctr/models/_utils.py +15 -23
  45. doctr/models/builder.py +30 -48
  46. doctr/models/classification/__init__.py +1 -0
  47. doctr/models/classification/magc_resnet/__init__.py +3 -3
  48. doctr/models/classification/magc_resnet/pytorch.py +11 -15
  49. doctr/models/classification/magc_resnet/tensorflow.py +11 -14
  50. doctr/models/classification/mobilenet/__init__.py +3 -3
  51. doctr/models/classification/mobilenet/pytorch.py +20 -18
  52. doctr/models/classification/mobilenet/tensorflow.py +19 -23
  53. doctr/models/classification/predictor/__init__.py +4 -4
  54. doctr/models/classification/predictor/pytorch.py +7 -9
  55. doctr/models/classification/predictor/tensorflow.py +6 -8
  56. doctr/models/classification/resnet/__init__.py +4 -4
  57. doctr/models/classification/resnet/pytorch.py +47 -34
  58. doctr/models/classification/resnet/tensorflow.py +45 -35
  59. doctr/models/classification/textnet/__init__.py +3 -3
  60. doctr/models/classification/textnet/pytorch.py +20 -18
  61. doctr/models/classification/textnet/tensorflow.py +19 -17
  62. doctr/models/classification/vgg/__init__.py +3 -3
  63. doctr/models/classification/vgg/pytorch.py +21 -8
  64. doctr/models/classification/vgg/tensorflow.py +20 -14
  65. doctr/models/classification/vip/__init__.py +4 -0
  66. doctr/models/classification/vip/layers/__init__.py +4 -0
  67. doctr/models/classification/vip/layers/pytorch.py +615 -0
  68. doctr/models/classification/vip/pytorch.py +505 -0
  69. doctr/models/classification/vit/__init__.py +3 -3
  70. doctr/models/classification/vit/pytorch.py +18 -15
  71. doctr/models/classification/vit/tensorflow.py +15 -12
  72. doctr/models/classification/zoo.py +23 -14
  73. doctr/models/core.py +3 -3
  74. doctr/models/detection/_utils/__init__.py +4 -4
  75. doctr/models/detection/_utils/base.py +4 -7
  76. doctr/models/detection/_utils/pytorch.py +1 -5
  77. doctr/models/detection/_utils/tensorflow.py +1 -5
  78. doctr/models/detection/core.py +2 -8
  79. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  80. doctr/models/detection/differentiable_binarization/base.py +10 -21
  81. doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
  82. doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
  83. doctr/models/detection/fast/__init__.py +4 -4
  84. doctr/models/detection/fast/base.py +8 -17
  85. doctr/models/detection/fast/pytorch.py +37 -35
  86. doctr/models/detection/fast/tensorflow.py +24 -28
  87. doctr/models/detection/linknet/__init__.py +4 -4
  88. doctr/models/detection/linknet/base.py +8 -18
  89. doctr/models/detection/linknet/pytorch.py +34 -28
  90. doctr/models/detection/linknet/tensorflow.py +24 -25
  91. doctr/models/detection/predictor/__init__.py +5 -5
  92. doctr/models/detection/predictor/pytorch.py +6 -7
  93. doctr/models/detection/predictor/tensorflow.py +5 -6
  94. doctr/models/detection/zoo.py +27 -7
  95. doctr/models/factory/hub.py +6 -10
  96. doctr/models/kie_predictor/__init__.py +5 -5
  97. doctr/models/kie_predictor/base.py +4 -5
  98. doctr/models/kie_predictor/pytorch.py +19 -20
  99. doctr/models/kie_predictor/tensorflow.py +14 -15
  100. doctr/models/modules/layers/__init__.py +3 -3
  101. doctr/models/modules/layers/pytorch.py +55 -10
  102. doctr/models/modules/layers/tensorflow.py +5 -7
  103. doctr/models/modules/transformer/__init__.py +3 -3
  104. doctr/models/modules/transformer/pytorch.py +12 -13
  105. doctr/models/modules/transformer/tensorflow.py +9 -10
  106. doctr/models/modules/vision_transformer/__init__.py +3 -3
  107. doctr/models/modules/vision_transformer/pytorch.py +2 -3
  108. doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  109. doctr/models/predictor/__init__.py +5 -5
  110. doctr/models/predictor/base.py +28 -29
  111. doctr/models/predictor/pytorch.py +13 -14
  112. doctr/models/predictor/tensorflow.py +9 -10
  113. doctr/models/preprocessor/__init__.py +4 -4
  114. doctr/models/preprocessor/pytorch.py +13 -17
  115. doctr/models/preprocessor/tensorflow.py +10 -14
  116. doctr/models/recognition/__init__.py +1 -0
  117. doctr/models/recognition/core.py +3 -7
  118. doctr/models/recognition/crnn/__init__.py +4 -4
  119. doctr/models/recognition/crnn/pytorch.py +30 -29
  120. doctr/models/recognition/crnn/tensorflow.py +21 -24
  121. doctr/models/recognition/master/__init__.py +3 -3
  122. doctr/models/recognition/master/base.py +3 -7
  123. doctr/models/recognition/master/pytorch.py +32 -25
  124. doctr/models/recognition/master/tensorflow.py +22 -25
  125. doctr/models/recognition/parseq/__init__.py +3 -3
  126. doctr/models/recognition/parseq/base.py +3 -7
  127. doctr/models/recognition/parseq/pytorch.py +47 -29
  128. doctr/models/recognition/parseq/tensorflow.py +29 -27
  129. doctr/models/recognition/predictor/__init__.py +5 -5
  130. doctr/models/recognition/predictor/_utils.py +111 -52
  131. doctr/models/recognition/predictor/pytorch.py +9 -9
  132. doctr/models/recognition/predictor/tensorflow.py +8 -9
  133. doctr/models/recognition/sar/__init__.py +4 -4
  134. doctr/models/recognition/sar/pytorch.py +30 -22
  135. doctr/models/recognition/sar/tensorflow.py +22 -24
  136. doctr/models/recognition/utils.py +57 -53
  137. doctr/models/recognition/viptr/__init__.py +4 -0
  138. doctr/models/recognition/viptr/pytorch.py +277 -0
  139. doctr/models/recognition/vitstr/__init__.py +4 -4
  140. doctr/models/recognition/vitstr/base.py +3 -7
  141. doctr/models/recognition/vitstr/pytorch.py +28 -21
  142. doctr/models/recognition/vitstr/tensorflow.py +22 -23
  143. doctr/models/recognition/zoo.py +27 -11
  144. doctr/models/utils/__init__.py +4 -4
  145. doctr/models/utils/pytorch.py +41 -34
  146. doctr/models/utils/tensorflow.py +31 -23
  147. doctr/models/zoo.py +1 -5
  148. doctr/transforms/functional/__init__.py +3 -3
  149. doctr/transforms/functional/base.py +4 -11
  150. doctr/transforms/functional/pytorch.py +20 -28
  151. doctr/transforms/functional/tensorflow.py +10 -22
  152. doctr/transforms/modules/__init__.py +4 -4
  153. doctr/transforms/modules/base.py +48 -55
  154. doctr/transforms/modules/pytorch.py +58 -22
  155. doctr/transforms/modules/tensorflow.py +18 -32
  156. doctr/utils/common_types.py +8 -9
  157. doctr/utils/data.py +9 -13
  158. doctr/utils/fonts.py +2 -7
  159. doctr/utils/geometry.py +17 -48
  160. doctr/utils/metrics.py +17 -37
  161. doctr/utils/multithreading.py +4 -6
  162. doctr/utils/reconstitution.py +9 -13
  163. doctr/utils/repr.py +2 -3
  164. doctr/utils/visualization.py +16 -29
  165. doctr/version.py +1 -1
  166. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
  167. python_doctr-0.12.0.dist-info/RECORD +180 -0
  168. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
  169. python_doctr-0.10.0.dist-info/RECORD +0 -173
  170. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
  171. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
  172. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,9 +6,10 @@
6
6
  import json
7
7
  import os
8
8
  from pathlib import Path
9
- from typing import Any, Dict, List, Tuple, Union
9
+ from typing import Any
10
10
 
11
11
  import numpy as np
12
+ from tqdm import tqdm
12
13
 
13
14
  from .datasets import AbstractDataset
14
15
  from .utils import convert_target_to_relative, crop_bboxes_from_image
@@ -17,9 +18,10 @@ __all__ = ["WILDRECEIPT"]
17
18
 
18
19
 
19
20
  class WILDRECEIPT(AbstractDataset):
20
- """WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
21
- <https://arxiv.org/abs/2103.14470v1>`_ |
22
- `repository <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
21
+ """
22
+ WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
23
+ <https://arxiv.org/abs/2103.14470v1>`_ |
24
+ `"repository" <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
23
25
 
24
26
  .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0
25
27
  :align: center
@@ -34,7 +36,6 @@ class WILDRECEIPT(AbstractDataset):
34
36
  >>> img, target = test_set[0]
35
37
 
36
38
  Args:
37
- ----
38
39
  img_folder: folder with all the images of the dataset
39
40
  label_path: path to the annotations file of the dataset
40
41
  train: whether the subset should be the training one
@@ -71,15 +72,18 @@ class WILDRECEIPT(AbstractDataset):
71
72
  tmp_root = img_folder
72
73
  self.train = train
73
74
  np_dtype = np.float32
74
- self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
75
+ self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
75
76
 
76
77
  with open(label_path, "r") as file:
77
78
  data = file.read()
78
79
  # Split the text file into separate JSON strings
79
80
  json_strings = data.strip().split("\n")
80
- box: Union[List[float], np.ndarray]
81
- _targets = []
82
- for json_string in json_strings:
81
+ box: list[float] | np.ndarray
82
+
83
+ for json_string in tqdm(
84
+ iterable=json_strings, desc="Preparing and Loading WILDRECEIPT", total=len(json_strings)
85
+ ):
86
+ _targets = []
83
87
  json_data = json.loads(json_string)
84
88
  img_path = json_data["file_name"]
85
89
  annotations = json_data["annotations"]
doctr/file_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -9,7 +9,6 @@ import importlib.metadata
9
9
  import importlib.util
10
10
  import logging
11
11
  import os
12
- from typing import Optional
13
12
 
14
13
  CLASS_NAME: str = "words"
15
14
 
@@ -80,10 +79,16 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
80
79
  else:
81
80
  logging.info(f"TensorFlow version {_tf_version} available.")
82
81
  ensure_keras_v2()
83
- import tensorflow as tf
84
82
 
85
- # Enable eager execution - this is required for some models to work properly
86
- tf.config.run_functions_eagerly(True)
83
+ import warnings
84
+
85
+ warnings.simplefilter("always", DeprecationWarning)
86
+ warnings.warn(
87
+ "Support for TensorFlow in DocTR is deprecated and will be removed in the next major release (v1.0.0). "
88
+ "Please switch to the PyTorch backend.",
89
+ DeprecationWarning,
90
+ )
91
+
87
92
  else: # pragma: no cover
88
93
  logging.info("Disabling Tensorflow because USE_TORCH is set")
89
94
  _tf_available = False
@@ -96,12 +101,11 @@ if not _torch_available and not _tf_available: # pragma: no cover
96
101
  )
97
102
 
98
103
 
99
- def requires_package(name: str, extra_message: Optional[str] = None) -> None: # pragma: no cover
104
+ def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
100
105
  """
101
106
  package requirement helper
102
107
 
103
108
  Args:
104
- ----
105
109
  name: name of the package
106
110
  extra_message: additional message to display if the package is not found
107
111
  """
doctr/io/elements.py CHANGED
@@ -1,9 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, Dict, List, Optional, Tuple, Union
6
+ from typing import Any
7
7
 
8
8
  from defusedxml import defuse_stdlib
9
9
 
@@ -32,8 +32,8 @@ __all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page",
32
32
  class Element(NestedObject):
33
33
  """Implements an abstract document element with exporting and text rendering capabilities"""
34
34
 
35
- _children_names: List[str] = []
36
- _exported_keys: List[str] = []
35
+ _children_names: list[str] = []
36
+ _exported_keys: list[str] = []
37
37
 
38
38
  def __init__(self, **kwargs: Any) -> None:
39
39
  for k, v in kwargs.items():
@@ -42,7 +42,7 @@ class Element(NestedObject):
42
42
  else:
43
43
  raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
44
44
 
45
- def export(self) -> Dict[str, Any]:
45
+ def export(self) -> dict[str, Any]:
46
46
  """Exports the object into a nested dict format"""
47
47
  export_dict = {k: getattr(self, k) for k in self._exported_keys}
48
48
  for children_name in self._children_names:
@@ -56,7 +56,7 @@ class Element(NestedObject):
56
56
  return export_dict
57
57
 
58
58
  @classmethod
59
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
59
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
60
60
  raise NotImplementedError
61
61
 
62
62
  def render(self) -> str:
@@ -67,7 +67,6 @@ class Word(Element):
67
67
  """Implements a word element
68
68
 
69
69
  Args:
70
- ----
71
70
  value: the text string of the word
72
71
  confidence: the confidence associated with the text prediction
73
72
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -76,16 +75,16 @@ class Word(Element):
76
75
  crop_orientation: the general orientation of the crop in degrees and its confidence
77
76
  """
78
77
 
79
- _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
80
- _children_names: List[str] = []
78
+ _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
79
+ _children_names: list[str] = []
81
80
 
82
81
  def __init__(
83
82
  self,
84
83
  value: str,
85
84
  confidence: float,
86
- geometry: Union[BoundingBox, np.ndarray],
85
+ geometry: BoundingBox | np.ndarray,
87
86
  objectness_score: float,
88
- crop_orientation: Dict[str, Any],
87
+ crop_orientation: dict[str, Any],
89
88
  ) -> None:
90
89
  super().__init__()
91
90
  self.value = value
@@ -102,7 +101,7 @@ class Word(Element):
102
101
  return f"value='{self.value}', confidence={self.confidence:.2}"
103
102
 
104
103
  @classmethod
105
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
104
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
106
105
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
107
106
  return cls(**kwargs)
108
107
 
@@ -111,15 +110,14 @@ class Artefact(Element):
111
110
  """Implements a non-textual element
112
111
 
113
112
  Args:
114
- ----
115
113
  artefact_type: the type of artefact
116
114
  confidence: the confidence of the type prediction
117
115
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
118
116
  the page's size.
119
117
  """
120
118
 
121
- _exported_keys: List[str] = ["geometry", "type", "confidence"]
122
- _children_names: List[str] = []
119
+ _exported_keys: list[str] = ["geometry", "type", "confidence"]
120
+ _children_names: list[str] = []
123
121
 
124
122
  def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
125
123
  super().__init__()
@@ -135,7 +133,7 @@ class Artefact(Element):
135
133
  return f"type='{self.type}', confidence={self.confidence:.2}"
136
134
 
137
135
  @classmethod
138
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
136
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
139
137
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
140
138
  return cls(**kwargs)
141
139
 
@@ -144,22 +142,21 @@ class Line(Element):
144
142
  """Implements a line element as a collection of words
145
143
 
146
144
  Args:
147
- ----
148
145
  words: list of word elements
149
146
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
150
147
  the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
151
148
  all words in it.
152
149
  """
153
150
 
154
- _exported_keys: List[str] = ["geometry", "objectness_score"]
155
- _children_names: List[str] = ["words"]
156
- words: List[Word] = []
151
+ _exported_keys: list[str] = ["geometry", "objectness_score"]
152
+ _children_names: list[str] = ["words"]
153
+ words: list[Word] = []
157
154
 
158
155
  def __init__(
159
156
  self,
160
- words: List[Word],
161
- geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
162
- objectness_score: Optional[float] = None,
157
+ words: list[Word],
158
+ geometry: BoundingBox | np.ndarray | None = None,
159
+ objectness_score: float | None = None,
163
160
  ) -> None:
164
161
  # Compute the objectness score of the line
165
162
  if objectness_score is None:
@@ -179,7 +176,7 @@ class Line(Element):
179
176
  return " ".join(w.render() for w in self.words)
180
177
 
181
178
  @classmethod
182
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
179
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
183
180
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
184
181
  kwargs.update({
185
182
  "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
@@ -202,7 +199,6 @@ class Block(Element):
202
199
  """Implements a block element as a collection of lines and artefacts
203
200
 
204
201
  Args:
205
- ----
206
202
  lines: list of line elements
207
203
  artefacts: list of artefacts
208
204
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -210,17 +206,17 @@ class Block(Element):
210
206
  all lines and artefacts in it.
211
207
  """
212
208
 
213
- _exported_keys: List[str] = ["geometry", "objectness_score"]
214
- _children_names: List[str] = ["lines", "artefacts"]
215
- lines: List[Line] = []
216
- artefacts: List[Artefact] = []
209
+ _exported_keys: list[str] = ["geometry", "objectness_score"]
210
+ _children_names: list[str] = ["lines", "artefacts"]
211
+ lines: list[Line] = []
212
+ artefacts: list[Artefact] = []
217
213
 
218
214
  def __init__(
219
215
  self,
220
- lines: List[Line] = [],
221
- artefacts: List[Artefact] = [],
222
- geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
223
- objectness_score: Optional[float] = None,
216
+ lines: list[Line] = [],
217
+ artefacts: list[Artefact] = [],
218
+ geometry: BoundingBox | np.ndarray | None = None,
219
+ objectness_score: float | None = None,
224
220
  ) -> None:
225
221
  # Compute the objectness score of the line
226
222
  if objectness_score is None:
@@ -243,7 +239,7 @@ class Block(Element):
243
239
  return line_break.join(line.render() for line in self.lines)
244
240
 
245
241
  @classmethod
246
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
242
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
247
243
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
248
244
  kwargs.update({
249
245
  "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
@@ -256,7 +252,6 @@ class Page(Element):
256
252
  """Implements a page element as a collection of blocks
257
253
 
258
254
  Args:
259
- ----
260
255
  page: image encoded as a numpy array in uint8
261
256
  blocks: list of block elements
262
257
  page_idx: the index of the page in the input raw document
@@ -265,18 +260,18 @@ class Page(Element):
265
260
  language: a dictionary with the language value and confidence of the prediction
266
261
  """
267
262
 
268
- _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
269
- _children_names: List[str] = ["blocks"]
270
- blocks: List[Block] = []
263
+ _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
264
+ _children_names: list[str] = ["blocks"]
265
+ blocks: list[Block] = []
271
266
 
272
267
  def __init__(
273
268
  self,
274
269
  page: np.ndarray,
275
- blocks: List[Block],
270
+ blocks: list[Block],
276
271
  page_idx: int,
277
- dimensions: Tuple[int, int],
278
- orientation: Optional[Dict[str, Any]] = None,
279
- language: Optional[Dict[str, Any]] = None,
272
+ dimensions: tuple[int, int],
273
+ orientation: dict[str, Any] | None = None,
274
+ language: dict[str, Any] | None = None,
280
275
  ) -> None:
281
276
  super().__init__(blocks=blocks)
282
277
  self.page = page
@@ -311,25 +306,21 @@ class Page(Element):
311
306
  """Synthesize the page from the predictions
312
307
 
313
308
  Args:
314
- ----
315
309
  **kwargs: keyword arguments passed to the `synthesize_page` method
316
310
 
317
- Returns
318
- -------
311
+ Returns:
319
312
  synthesized page
320
313
  """
321
314
  return synthesize_page(self.export(), **kwargs)
322
315
 
323
- def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
316
+ def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
324
317
  """Export the page as XML (hOCR-format)
325
318
  convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
326
319
 
327
320
  Args:
328
- ----
329
321
  file_title: the title of the XML file
330
322
 
331
323
  Returns:
332
- -------
333
324
  a tuple of the XML byte string, and its ElementTree
334
325
  """
335
326
  p_idx = self.page_idx
@@ -356,7 +347,7 @@ class Page(Element):
356
347
  )
357
348
  # Create the body
358
349
  body = SubElement(page_hocr, "body")
359
- SubElement(
350
+ page_div = SubElement(
360
351
  body,
361
352
  "div",
362
353
  attrib={
@@ -371,7 +362,7 @@ class Page(Element):
371
362
  raise TypeError("XML export is only available for straight bounding boxes for now.")
372
363
  (xmin, ymin), (xmax, ymax) = block.geometry
373
364
  block_div = SubElement(
374
- body,
365
+ page_div,
375
366
  "div",
376
367
  attrib={
377
368
  "class": "ocr_carea",
@@ -427,7 +418,7 @@ class Page(Element):
427
418
  return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))
428
419
 
429
420
  @classmethod
430
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
421
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
431
422
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
432
423
  kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
433
424
  return cls(**kwargs)
@@ -437,7 +428,6 @@ class KIEPage(Element):
437
428
  """Implements a KIE page element as a collection of predictions
438
429
 
439
430
  Args:
440
- ----
441
431
  predictions: Dictionary with list of block elements for each detection class
442
432
  page: image encoded as a numpy array in uint8
443
433
  page_idx: the index of the page in the input raw document
@@ -446,18 +436,18 @@ class KIEPage(Element):
446
436
  language: a dictionary with the language value and confidence of the prediction
447
437
  """
448
438
 
449
- _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
450
- _children_names: List[str] = ["predictions"]
451
- predictions: Dict[str, List[Prediction]] = {}
439
+ _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
440
+ _children_names: list[str] = ["predictions"]
441
+ predictions: dict[str, list[Prediction]] = {}
452
442
 
453
443
  def __init__(
454
444
  self,
455
445
  page: np.ndarray,
456
- predictions: Dict[str, List[Prediction]],
446
+ predictions: dict[str, list[Prediction]],
457
447
  page_idx: int,
458
- dimensions: Tuple[int, int],
459
- orientation: Optional[Dict[str, Any]] = None,
460
- language: Optional[Dict[str, Any]] = None,
448
+ dimensions: tuple[int, int],
449
+ orientation: dict[str, Any] | None = None,
450
+ language: dict[str, Any] | None = None,
461
451
  ) -> None:
462
452
  super().__init__(predictions=predictions)
463
453
  self.page = page
@@ -496,25 +486,21 @@ class KIEPage(Element):
496
486
  """Synthesize the page from the predictions
497
487
 
498
488
  Args:
499
- ----
500
489
  **kwargs: keyword arguments passed to the `synthesize_kie_page` method
501
490
 
502
491
  Returns:
503
- -------
504
492
  synthesized page
505
493
  """
506
494
  return synthesize_kie_page(self.export(), **kwargs)
507
495
 
508
- def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
496
+ def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
509
497
  """Export the page as XML (hOCR-format)
510
498
  convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
511
499
 
512
500
  Args:
513
- ----
514
501
  file_title: the title of the XML file
515
502
 
516
503
  Returns:
517
- -------
518
504
  a tuple of the XML byte string, and its ElementTree
519
505
  """
520
506
  p_idx = self.page_idx
@@ -564,13 +550,47 @@ class KIEPage(Element):
564
550
  {int(round(xmax * width))} {int(round(ymax * height))}",
565
551
  },
566
552
  )
567
- prediction_div.text = prediction.value
553
+ # NOTE: ocr_par, ocr_line and ocrx_word are the same because the KIE predictions contain only words
554
+ # This is a workaround to make it PDF/A compatible
555
+ par_div = SubElement(
556
+ prediction_div,
557
+ "p",
558
+ attrib={
559
+ "class": "ocr_par",
560
+ "id": f"{class_name}_par_{prediction_count}",
561
+ "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
562
+ {int(round(xmax * width))} {int(round(ymax * height))}",
563
+ },
564
+ )
565
+ line_span = SubElement(
566
+ par_div,
567
+ "span",
568
+ attrib={
569
+ "class": "ocr_line",
570
+ "id": f"{class_name}_line_{prediction_count}",
571
+ "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
572
+ {int(round(xmax * width))} {int(round(ymax * height))}; \
573
+ baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0",
574
+ },
575
+ )
576
+ word_div = SubElement(
577
+ line_span,
578
+ "span",
579
+ attrib={
580
+ "class": "ocrx_word",
581
+ "id": f"{class_name}_word_{prediction_count}",
582
+ "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
583
+ {int(round(xmax * width))} {int(round(ymax * height))}; \
584
+ x_wconf {int(round(prediction.confidence * 100))}",
585
+ },
586
+ )
587
+ word_div.text = prediction.value
568
588
  prediction_count += 1
569
589
 
570
590
  return ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)
571
591
 
572
592
  @classmethod
573
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
593
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
574
594
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
575
595
  kwargs.update({
576
596
  "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]
@@ -582,16 +602,15 @@ class Document(Element):
582
602
  """Implements a document element as a collection of pages
583
603
 
584
604
  Args:
585
- ----
586
605
  pages: list of page elements
587
606
  """
588
607
 
589
- _children_names: List[str] = ["pages"]
590
- pages: List[Page] = []
608
+ _children_names: list[str] = ["pages"]
609
+ pages: list[Page] = []
591
610
 
592
611
  def __init__(
593
612
  self,
594
- pages: List[Page],
613
+ pages: list[Page],
595
614
  ) -> None:
596
615
  super().__init__(pages=pages)
597
616
 
@@ -604,34 +623,30 @@ class Document(Element):
604
623
  for result in self.pages:
605
624
  result.show(**kwargs)
606
625
 
607
- def synthesize(self, **kwargs) -> List[np.ndarray]:
626
+ def synthesize(self, **kwargs) -> list[np.ndarray]:
608
627
  """Synthesize all pages from their predictions
609
628
 
610
629
  Args:
611
- ----
612
630
  **kwargs: keyword arguments passed to the `Page.synthesize` method
613
631
 
614
- Returns
615
- -------
632
+ Returns:
616
633
  list of synthesized pages
617
634
  """
618
635
  return [page.synthesize(**kwargs) for page in self.pages]
619
636
 
620
- def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
637
+ def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
621
638
  """Export the document as XML (hOCR-format)
622
639
 
623
640
  Args:
624
- ----
625
641
  **kwargs: additional keyword arguments passed to the Page.export_as_xml method
626
642
 
627
643
  Returns:
628
- -------
629
644
  list of tuple of (bytes, ElementTree)
630
645
  """
631
646
  return [page.export_as_xml(**kwargs) for page in self.pages]
632
647
 
633
648
  @classmethod
634
- def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
649
+ def from_dict(cls, save_dict: dict[str, Any], **kwargs):
635
650
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
636
651
  kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
637
652
  return cls(**kwargs)
@@ -641,15 +656,14 @@ class KIEDocument(Document):
641
656
  """Implements a document element as a collection of pages
642
657
 
643
658
  Args:
644
- ----
645
659
  pages: list of page elements
646
660
  """
647
661
 
648
- _children_names: List[str] = ["pages"]
649
- pages: List[KIEPage] = [] # type: ignore[assignment]
662
+ _children_names: list[str] = ["pages"]
663
+ pages: list[KIEPage] = [] # type: ignore[assignment]
650
664
 
651
665
  def __init__(
652
666
  self,
653
- pages: List[KIEPage],
667
+ pages: list[KIEPage],
654
668
  ) -> None:
655
669
  super().__init__(pages=pages) # type: ignore[arg-type]
doctr/io/html.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
15
15
  >>> doc = read_html("https://www.yoursite.com")
16
16
 
17
17
  Args:
18
- ----
19
18
  url: URL of the target web page
20
19
  **kwargs: keyword arguments from `weasyprint.HTML`
21
20
 
22
21
  Returns:
23
- -------
24
22
  decoded PDF file as a bytes stream
25
23
  """
26
24
  from weasyprint import HTML
@@ -2,7 +2,7 @@ from doctr.file_utils import is_tf_available, is_torch_available
2
2
 
3
3
  from .base import *
4
4
 
5
- if is_tf_available():
6
- from .tensorflow import *
7
- elif is_torch_available():
5
+ if is_torch_available():
8
6
  from .pytorch import *
7
+ elif is_tf_available():
8
+ from .tensorflow import *
doctr/io/image/base.py CHANGED
@@ -1,10 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  from pathlib import Path
7
- from typing import Optional, Tuple
8
7
 
9
8
  import cv2
10
9
  import numpy as np
@@ -16,7 +15,7 @@ __all__ = ["read_img_as_numpy"]
16
15
 
17
16
  def read_img_as_numpy(
18
17
  file: AbstractFile,
19
- output_size: Optional[Tuple[int, int]] = None,
18
+ output_size: tuple[int, int] | None = None,
20
19
  rgb_output: bool = True,
21
20
  ) -> np.ndarray:
22
21
  """Read an image file into numpy format
@@ -25,13 +24,11 @@ def read_img_as_numpy(
25
24
  >>> page = read_img_as_numpy("path/to/your/doc.jpg")
26
25
 
27
26
  Args:
28
- ----
29
27
  file: the path to the image file
30
28
  output_size: the expected output size of each page in format H x W
31
29
  rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
32
30
 
33
31
  Returns:
34
- -------
35
32
  the page decoded as numpy ndarray of shape H x W x 3
36
33
  """
37
34
  if isinstance(file, (str, Path)):