python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. doctr/datasets/__init__.py +2 -0
  2. doctr/datasets/cord.py +6 -4
  3. doctr/datasets/datasets/base.py +3 -2
  4. doctr/datasets/datasets/pytorch.py +4 -2
  5. doctr/datasets/datasets/tensorflow.py +4 -2
  6. doctr/datasets/detection.py +6 -3
  7. doctr/datasets/doc_artefacts.py +2 -1
  8. doctr/datasets/funsd.py +7 -8
  9. doctr/datasets/generator/base.py +3 -2
  10. doctr/datasets/generator/pytorch.py +3 -1
  11. doctr/datasets/generator/tensorflow.py +3 -1
  12. doctr/datasets/ic03.py +3 -2
  13. doctr/datasets/ic13.py +2 -1
  14. doctr/datasets/iiit5k.py +6 -4
  15. doctr/datasets/iiithws.py +2 -1
  16. doctr/datasets/imgur5k.py +3 -2
  17. doctr/datasets/loader.py +4 -2
  18. doctr/datasets/mjsynth.py +2 -1
  19. doctr/datasets/ocr.py +2 -1
  20. doctr/datasets/orientation.py +40 -0
  21. doctr/datasets/recognition.py +3 -2
  22. doctr/datasets/sroie.py +2 -1
  23. doctr/datasets/svhn.py +2 -1
  24. doctr/datasets/svt.py +3 -2
  25. doctr/datasets/synthtext.py +2 -1
  26. doctr/datasets/utils.py +27 -11
  27. doctr/datasets/vocabs.py +26 -1
  28. doctr/datasets/wildreceipt.py +111 -0
  29. doctr/file_utils.py +3 -1
  30. doctr/io/elements.py +52 -35
  31. doctr/io/html.py +5 -3
  32. doctr/io/image/base.py +5 -4
  33. doctr/io/image/pytorch.py +12 -7
  34. doctr/io/image/tensorflow.py +11 -6
  35. doctr/io/pdf.py +5 -4
  36. doctr/io/reader.py +13 -5
  37. doctr/models/_utils.py +30 -53
  38. doctr/models/artefacts/barcode.py +4 -3
  39. doctr/models/artefacts/face.py +4 -2
  40. doctr/models/builder.py +58 -43
  41. doctr/models/classification/__init__.py +1 -0
  42. doctr/models/classification/magc_resnet/pytorch.py +5 -2
  43. doctr/models/classification/magc_resnet/tensorflow.py +5 -2
  44. doctr/models/classification/mobilenet/pytorch.py +16 -4
  45. doctr/models/classification/mobilenet/tensorflow.py +29 -20
  46. doctr/models/classification/predictor/pytorch.py +3 -2
  47. doctr/models/classification/predictor/tensorflow.py +2 -1
  48. doctr/models/classification/resnet/pytorch.py +23 -13
  49. doctr/models/classification/resnet/tensorflow.py +33 -26
  50. doctr/models/classification/textnet/__init__.py +6 -0
  51. doctr/models/classification/textnet/pytorch.py +275 -0
  52. doctr/models/classification/textnet/tensorflow.py +267 -0
  53. doctr/models/classification/vgg/pytorch.py +4 -2
  54. doctr/models/classification/vgg/tensorflow.py +5 -2
  55. doctr/models/classification/vit/pytorch.py +9 -3
  56. doctr/models/classification/vit/tensorflow.py +9 -3
  57. doctr/models/classification/zoo.py +7 -2
  58. doctr/models/core.py +1 -1
  59. doctr/models/detection/__init__.py +1 -0
  60. doctr/models/detection/_utils/pytorch.py +7 -1
  61. doctr/models/detection/_utils/tensorflow.py +7 -3
  62. doctr/models/detection/core.py +9 -3
  63. doctr/models/detection/differentiable_binarization/base.py +37 -25
  64. doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
  65. doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
  66. doctr/models/detection/fast/__init__.py +6 -0
  67. doctr/models/detection/fast/base.py +256 -0
  68. doctr/models/detection/fast/pytorch.py +442 -0
  69. doctr/models/detection/fast/tensorflow.py +428 -0
  70. doctr/models/detection/linknet/base.py +12 -5
  71. doctr/models/detection/linknet/pytorch.py +28 -15
  72. doctr/models/detection/linknet/tensorflow.py +68 -88
  73. doctr/models/detection/predictor/pytorch.py +16 -6
  74. doctr/models/detection/predictor/tensorflow.py +13 -5
  75. doctr/models/detection/zoo.py +19 -16
  76. doctr/models/factory/hub.py +20 -10
  77. doctr/models/kie_predictor/base.py +2 -1
  78. doctr/models/kie_predictor/pytorch.py +28 -36
  79. doctr/models/kie_predictor/tensorflow.py +27 -27
  80. doctr/models/modules/__init__.py +1 -0
  81. doctr/models/modules/layers/__init__.py +6 -0
  82. doctr/models/modules/layers/pytorch.py +166 -0
  83. doctr/models/modules/layers/tensorflow.py +175 -0
  84. doctr/models/modules/transformer/pytorch.py +24 -22
  85. doctr/models/modules/transformer/tensorflow.py +6 -4
  86. doctr/models/modules/vision_transformer/pytorch.py +2 -4
  87. doctr/models/modules/vision_transformer/tensorflow.py +2 -4
  88. doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
  89. doctr/models/predictor/base.py +14 -3
  90. doctr/models/predictor/pytorch.py +26 -29
  91. doctr/models/predictor/tensorflow.py +25 -22
  92. doctr/models/preprocessor/pytorch.py +14 -9
  93. doctr/models/preprocessor/tensorflow.py +10 -5
  94. doctr/models/recognition/core.py +4 -1
  95. doctr/models/recognition/crnn/pytorch.py +23 -16
  96. doctr/models/recognition/crnn/tensorflow.py +25 -17
  97. doctr/models/recognition/master/base.py +4 -1
  98. doctr/models/recognition/master/pytorch.py +20 -9
  99. doctr/models/recognition/master/tensorflow.py +20 -8
  100. doctr/models/recognition/parseq/base.py +4 -1
  101. doctr/models/recognition/parseq/pytorch.py +28 -22
  102. doctr/models/recognition/parseq/tensorflow.py +22 -11
  103. doctr/models/recognition/predictor/_utils.py +3 -2
  104. doctr/models/recognition/predictor/pytorch.py +3 -2
  105. doctr/models/recognition/predictor/tensorflow.py +2 -1
  106. doctr/models/recognition/sar/pytorch.py +14 -7
  107. doctr/models/recognition/sar/tensorflow.py +23 -14
  108. doctr/models/recognition/utils.py +5 -1
  109. doctr/models/recognition/vitstr/base.py +4 -1
  110. doctr/models/recognition/vitstr/pytorch.py +22 -13
  111. doctr/models/recognition/vitstr/tensorflow.py +21 -10
  112. doctr/models/recognition/zoo.py +4 -2
  113. doctr/models/utils/pytorch.py +24 -6
  114. doctr/models/utils/tensorflow.py +22 -3
  115. doctr/models/zoo.py +21 -3
  116. doctr/transforms/functional/base.py +8 -3
  117. doctr/transforms/functional/pytorch.py +23 -6
  118. doctr/transforms/functional/tensorflow.py +25 -5
  119. doctr/transforms/modules/base.py +12 -5
  120. doctr/transforms/modules/pytorch.py +10 -12
  121. doctr/transforms/modules/tensorflow.py +17 -9
  122. doctr/utils/common_types.py +1 -1
  123. doctr/utils/data.py +4 -2
  124. doctr/utils/fonts.py +3 -2
  125. doctr/utils/geometry.py +95 -26
  126. doctr/utils/metrics.py +36 -22
  127. doctr/utils/multithreading.py +5 -3
  128. doctr/utils/repr.py +3 -1
  129. doctr/utils/visualization.py +31 -8
  130. doctr/version.py +1 -1
  131. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
  132. python_doctr-0.8.1.dist-info/RECORD +173 -0
  133. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
  134. python_doctr-0.7.0.dist-info/RECORD +0 -161
  135. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
  136. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
  137. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/datasets/utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -8,9 +8,8 @@ import unicodedata
8
8
  from collections.abc import Sequence
9
9
  from functools import partial
10
10
  from pathlib import Path
11
- from typing import Any, Dict, List, Optional
11
+ from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
12
12
  from typing import Sequence as SequenceType
13
- from typing import Tuple, TypeVar, Union
14
13
 
15
14
  import numpy as np
16
15
  from PIL import Image
@@ -33,13 +32,15 @@ def translate(
33
32
  """Translate a string input in a given vocabulary
34
33
 
35
34
  Args:
35
+ ----
36
36
  input_string: input string to translate
37
37
  vocab_name: vocabulary to use (french, latin, ...)
38
38
  unknown_char: unknown character for non-translatable characters
39
39
 
40
40
  Returns:
41
- A string translated in a given vocab"""
42
-
41
+ -------
42
+ A string translated in a given vocab
43
+ """
43
44
  if VOCABS.get(vocab_name) is None:
44
45
  raise KeyError("output vocabulary must be in vocabs dictionnary")
45
46
 
@@ -66,16 +67,21 @@ def encode_string(
66
67
  """Given a predefined mapping, encode the string to a sequence of numbers
67
68
 
68
69
  Args:
70
+ ----
69
71
  input_string: string to encode
70
72
  vocab: vocabulary (string), the encoding is given by the indexing of the character sequence
71
73
 
72
74
  Returns:
73
- A list encoding the input_string"""
74
-
75
+ -------
76
+ A list encoding the input_string
77
+ """
75
78
  try:
76
79
  return list(map(vocab.index, input_string))
77
80
  except ValueError:
78
- raise ValueError("some characters cannot be found in 'vocab'")
81
+ raise ValueError(
82
+ f"some characters cannot be found in 'vocab'. \
83
+ Please check the input string {input_string} and the vocabulary {vocab}"
84
+ )
79
85
 
80
86
 
81
87
  def decode_sequence(
@@ -85,13 +91,14 @@ def decode_sequence(
85
91
  """Given a predefined mapping, decode the sequence of numbers to a string
86
92
 
87
93
  Args:
94
+ ----
88
95
  input_seq: array to decode
89
96
  mapping: vocabulary (string), the encoding is given by the indexing of the character sequence
90
97
 
91
98
  Returns:
99
+ -------
92
100
  A string, decoded from input_seq
93
101
  """
94
-
95
102
  if not isinstance(input_seq, (Sequence, np.ndarray)):
96
103
  raise TypeError("Invalid sequence type")
97
104
  if isinstance(input_seq, np.ndarray) and (input_seq.dtype != np.int_ or input_seq.max() >= len(mapping)):
@@ -108,11 +115,11 @@ def encode_sequences(
108
115
  sos: Optional[int] = None,
109
116
  pad: Optional[int] = None,
110
117
  dynamic_seq_length: bool = False,
111
- **kwargs: Any,
112
118
  ) -> np.ndarray:
113
119
  """Encode character sequences using a given vocab as mapping
114
120
 
115
121
  Args:
122
+ ----
116
123
  sequences: the list of character sequences of size N
117
124
  vocab: the ordered vocab to use for encoding
118
125
  target_size: maximum length of the encoded data
@@ -122,9 +129,9 @@ def encode_sequences(
122
129
  dynamic_seq_length: if `target_size` is specified, uses it as upper bound and enables dynamic sequence size
123
130
 
124
131
  Returns:
132
+ -------
125
133
  the padded encoded data as a tensor
126
134
  """
127
-
128
135
  if 0 <= eos < len(vocab):
129
136
  raise ValueError("argument 'eos' needs to be outside of vocab possible indices")
130
137
 
@@ -169,10 +176,14 @@ def convert_target_to_relative(img: ImageTensor, target: Dict[str, Any]) -> Tupl
169
176
 
170
177
  def crop_bboxes_from_image(img_path: Union[str, Path], geoms: np.ndarray) -> List[np.ndarray]:
171
178
  """Crop a set of bounding boxes from an image
179
+
172
180
  Args:
181
+ ----
173
182
  img_path: path to the image
174
183
  geoms: a array of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
184
+
175
185
  Returns:
186
+ -------
176
187
  a list of cropped images
177
188
  """
178
189
  img: np.ndarray = np.array(Image.open(img_path).convert("RGB"))
@@ -188,8 +199,13 @@ def pre_transform_multiclass(img, target: Tuple[np.ndarray, List]) -> Tuple[np.n
188
199
  """Converts multiclass target to relative coordinates.
189
200
 
190
201
  Args:
202
+ ----
191
203
  img: Image
192
204
  target: tuple of target polygons and their classes names
205
+
206
+ Returns:
207
+ -------
208
+ Image and dictionary of boxes, with class names as keys
193
209
  """
194
210
  boxes = convert_to_relative_coords(target[0], get_img_shape(img))
195
211
  boxes_classes = target[1]
doctr/datasets/vocabs.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -28,6 +28,7 @@ VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂ
28
28
  VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
29
29
  VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"
30
30
  VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"
31
+ VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
31
32
  VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
32
33
  VOCABS["arabic"] = (
33
34
  VOCABS["digits"]
@@ -39,8 +40,32 @@ VOCABS["arabic"] = (
39
40
  + VOCABS["punctuation"]
40
41
  )
41
42
  VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
43
+ VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
44
+ VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
45
+ VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
46
+ VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
47
+ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
48
+ VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
42
49
  VOCABS["vietnamese"] = (
43
50
  VOCABS["english"]
44
51
  + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
45
52
  + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
46
53
  )
54
+ VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
55
+ VOCABS["multilingual"] = "".join(
56
+ dict.fromkeys(
57
+ VOCABS["french"]
58
+ + VOCABS["portuguese"]
59
+ + VOCABS["spanish"]
60
+ + VOCABS["german"]
61
+ + VOCABS["czech"]
62
+ + VOCABS["polish"]
63
+ + VOCABS["dutch"]
64
+ + VOCABS["italian"]
65
+ + VOCABS["norwegian"]
66
+ + VOCABS["danish"]
67
+ + VOCABS["finnish"]
68
+ + VOCABS["swedish"]
69
+ + "§"
70
+ )
71
+ )
@@ -0,0 +1,111 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Tuple, Union
10
+
11
+ import numpy as np
12
+
13
+ from .datasets import AbstractDataset
14
+ from .utils import convert_target_to_relative, crop_bboxes_from_image
15
+
16
+ __all__ = ["WILDRECEIPT"]
17
+
18
+
19
+ class WILDRECEIPT(AbstractDataset):
20
+ """WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
21
+ <https://arxiv.org/abs/2103.14470v1>`_ |
22
+ `repository <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
23
+
24
+ .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0
25
+ :align: center
26
+
27
+ >>> # NOTE: You need to download the dataset first.
28
+ >>> from doctr.datasets import WILDRECEIPT
29
+ >>> train_set = WILDRECEIPT(train=True, img_folder="/path/to/wildreceipt/",
30
+ >>> label_path="/path/to/wildreceipt/train.txt")
31
+ >>> img, target = train_set[0]
32
+ >>> test_set = WILDRECEIPT(train=False, img_folder="/path/to/wildreceipt/",
33
+ >>> label_path="/path/to/wildreceipt/test.txt")
34
+ >>> img, target = test_set[0]
35
+
36
+ Args:
37
+ ----
38
+ img_folder: folder with all the images of the dataset
39
+ label_path: path to the annotations file of the dataset
40
+ train: whether the subset should be the training one
41
+ use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
42
+ recognition_task: whether the dataset should be used for recognition task
43
+ **kwargs: keyword arguments from `AbstractDataset`.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ img_folder: str,
49
+ label_path: str,
50
+ train: bool = True,
51
+ use_polygons: bool = False,
52
+ recognition_task: bool = False,
53
+ **kwargs: Any,
54
+ ) -> None:
55
+ super().__init__(
56
+ img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
57
+ )
58
+ # File existence check
59
+ if not os.path.exists(label_path) or not os.path.exists(img_folder):
60
+ raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
61
+
62
+ tmp_root = img_folder
63
+ self.train = train
64
+ np_dtype = np.float32
65
+ self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
66
+
67
+ with open(label_path, "r") as file:
68
+ data = file.read()
69
+ # Split the text file into separate JSON strings
70
+ json_strings = data.strip().split("\n")
71
+ box: Union[List[float], np.ndarray]
72
+ _targets = []
73
+ for json_string in json_strings:
74
+ json_data = json.loads(json_string)
75
+ img_path = json_data["file_name"]
76
+ annotations = json_data["annotations"]
77
+ for annotation in annotations:
78
+ coordinates = annotation["box"]
79
+ if use_polygons:
80
+ # (x, y) coordinates of top left, top right, bottom right, bottom left corners
81
+ box = np.array(
82
+ [
83
+ [coordinates[0], coordinates[1]],
84
+ [coordinates[2], coordinates[3]],
85
+ [coordinates[4], coordinates[5]],
86
+ [coordinates[6], coordinates[7]],
87
+ ],
88
+ dtype=np_dtype,
89
+ )
90
+ else:
91
+ x, y = coordinates[::2], coordinates[1::2]
92
+ box = [min(x), min(y), max(x), max(y)]
93
+ _targets.append((annotation["text"], box))
94
+ text_targets, box_targets = zip(*_targets)
95
+
96
+ if recognition_task:
97
+ crops = crop_bboxes_from_image(
98
+ img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
99
+ )
100
+ for crop, label in zip(crops, list(text_targets)):
101
+ if label and " " not in label:
102
+ self.data.append((crop, label))
103
+ else:
104
+ self.data.append((
105
+ img_path,
106
+ dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
107
+ ))
108
+ self.root = tmp_root
109
+
110
+ def extra_repr(self) -> str:
111
+ return f"train={self.train}"
doctr/file_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -83,8 +83,10 @@ if not _torch_available and not _tf_available: # pragma: no cover
83
83
 
84
84
 
85
85
  def is_torch_available():
86
+ """Whether PyTorch is installed."""
86
87
  return _torch_available
87
88
 
88
89
 
89
90
  def is_tf_available():
91
+ """Whether TensorFlow is installed."""
90
92
  return _tf_available
doctr/io/elements.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -39,7 +39,6 @@ class Element(NestedObject):
39
39
 
40
40
  def export(self) -> Dict[str, Any]:
41
41
  """Exports the object into a nested dict format"""
42
-
43
42
  export_dict = {k: getattr(self, k) for k in self._exported_keys}
44
43
  for children_name in self._children_names:
45
44
  if children_name in ["predictions"]:
@@ -63,6 +62,7 @@ class Word(Element):
63
62
  """Implements a word element
64
63
 
65
64
  Args:
65
+ ----
66
66
  value: the text string of the word
67
67
  confidence: the confidence associated with the text prediction
68
68
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -95,6 +95,7 @@ class Artefact(Element):
95
95
  """Implements a non-textual element
96
96
 
97
97
  Args:
98
+ ----
98
99
  artefact_type: the type of artefact
99
100
  confidence: the confidence of the type prediction
100
101
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -127,6 +128,7 @@ class Line(Element):
127
128
  """Implements a line element as a collection of words
128
129
 
129
130
  Args:
131
+ ----
130
132
  words: list of word elements
131
133
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
132
134
  the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
@@ -158,11 +160,9 @@ class Line(Element):
158
160
  @classmethod
159
161
  def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
160
162
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
161
- kwargs.update(
162
- {
163
- "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
164
- }
165
- )
163
+ kwargs.update({
164
+ "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
165
+ })
166
166
  return cls(**kwargs)
167
167
 
168
168
 
@@ -181,6 +181,7 @@ class Block(Element):
181
181
  """Implements a block element as a collection of lines and artefacts
182
182
 
183
183
  Args:
184
+ ----
184
185
  lines: list of line elements
185
186
  artefacts: list of artefacts
186
187
  geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -218,12 +219,10 @@ class Block(Element):
218
219
  @classmethod
219
220
  def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
220
221
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
221
- kwargs.update(
222
- {
223
- "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
224
- "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
225
- }
226
- )
222
+ kwargs.update({
223
+ "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
224
+ "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
225
+ })
227
226
  return cls(**kwargs)
228
227
 
229
228
 
@@ -231,6 +230,8 @@ class Page(Element):
231
230
  """Implements a page element as a collection of blocks
232
231
 
233
232
  Args:
233
+ ----
234
+ page: image encoded as a numpy array in uint8
234
235
  blocks: list of block elements
235
236
  page_idx: the index of the page in the input raw document
236
237
  dimensions: the page size in pixels in format (height, width)
@@ -244,6 +245,7 @@ class Page(Element):
244
245
 
245
246
  def __init__(
246
247
  self,
248
+ page: np.ndarray,
247
249
  blocks: List[Block],
248
250
  page_idx: int,
249
251
  dimensions: Tuple[int, int],
@@ -251,6 +253,7 @@ class Page(Element):
251
253
  language: Optional[Dict[str, Any]] = None,
252
254
  ) -> None:
253
255
  super().__init__(blocks=blocks)
256
+ self.page = page
254
257
  self.page_idx = page_idx
255
258
  self.dimensions = dimensions
256
259
  self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -263,24 +266,24 @@ class Page(Element):
263
266
  def extra_repr(self) -> str:
264
267
  return f"dimensions={self.dimensions}"
265
268
 
266
- def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
269
+ def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
267
270
  """Overlay the result on a given image
268
271
 
269
272
  Args:
270
- page: image encoded as a numpy array in uint8
271
273
  interactive: whether the display should be interactive
272
274
  preserve_aspect_ratio: pass True if you passed True to the predictor
275
+ **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
273
276
  """
274
- visualize_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
277
+ visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
275
278
  plt.show(**kwargs)
276
279
 
277
280
  def synthesize(self, **kwargs) -> np.ndarray:
278
281
  """Synthesize the page from the predictions
279
282
 
280
- Returns:
283
+ Returns
284
+ -------
281
285
  synthesized page
282
286
  """
283
-
284
287
  return synthesize_page(self.export(), **kwargs)
285
288
 
286
289
  def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
@@ -288,9 +291,11 @@ class Page(Element):
288
291
  convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
289
292
 
290
293
  Args:
294
+ ----
291
295
  file_title: the title of the XML file
292
296
 
293
297
  Returns:
298
+ -------
294
299
  a tuple of the XML byte string, and its ElementTree
295
300
  """
296
301
  p_idx = self.page_idx
@@ -398,7 +403,9 @@ class KIEPage(Element):
398
403
  """Implements a KIE page element as a collection of predictions
399
404
 
400
405
  Args:
406
+ ----
401
407
  predictions: Dictionary with list of block elements for each detection class
408
+ page: image encoded as a numpy array in uint8
402
409
  page_idx: the index of the page in the input raw document
403
410
  dimensions: the page size in pixels in format (height, width)
404
411
  orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
@@ -411,6 +418,7 @@ class KIEPage(Element):
411
418
 
412
419
  def __init__(
413
420
  self,
421
+ page: np.ndarray,
414
422
  predictions: Dict[str, List[Prediction]],
415
423
  page_idx: int,
416
424
  dimensions: Tuple[int, int],
@@ -418,6 +426,7 @@ class KIEPage(Element):
418
426
  language: Optional[Dict[str, Any]] = None,
419
427
  ) -> None:
420
428
  super().__init__(predictions=predictions)
429
+ self.page = page
421
430
  self.page_idx = page_idx
422
431
  self.dimensions = dimensions
423
432
  self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -432,24 +441,30 @@ class KIEPage(Element):
432
441
  def extra_repr(self) -> str:
433
442
  return f"dimensions={self.dimensions}"
434
443
 
435
- def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
444
+ def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
436
445
  """Overlay the result on a given image
437
446
 
438
447
  Args:
439
- page: image encoded as a numpy array in uint8
440
448
  interactive: whether the display should be interactive
441
449
  preserve_aspect_ratio: pass True if you passed True to the predictor
450
+ **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
442
451
  """
443
- visualize_kie_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
452
+ visualize_kie_page(
453
+ self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
454
+ )
444
455
  plt.show(**kwargs)
445
456
 
446
457
  def synthesize(self, **kwargs) -> np.ndarray:
447
458
  """Synthesize the page from the predictions
448
459
 
460
+ Args:
461
+ ----
462
+ **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
463
+
449
464
  Returns:
465
+ -------
450
466
  synthesized page
451
467
  """
452
-
453
468
  return synthesize_kie_page(self.export(), **kwargs)
454
469
 
455
470
  def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
@@ -457,9 +472,11 @@ class KIEPage(Element):
457
472
  convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
458
473
 
459
474
  Args:
475
+ ----
460
476
  file_title: the title of the XML file
461
477
 
462
478
  Returns:
479
+ -------
463
480
  a tuple of the XML byte string, and its ElementTree
464
481
  """
465
482
  p_idx = self.page_idx
@@ -517,9 +534,9 @@ class KIEPage(Element):
517
534
  @classmethod
518
535
  def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
519
536
  kwargs = {k: save_dict[k] for k in cls._exported_keys}
520
- kwargs.update(
521
- {"predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]}
522
- )
537
+ kwargs.update({
538
+ "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]
539
+ })
523
540
  return cls(**kwargs)
524
541
 
525
542
 
@@ -527,6 +544,7 @@ class Document(Element):
527
544
  """Implements a document element as a collection of pages
528
545
 
529
546
  Args:
547
+ ----
530
548
  pages: list of page elements
531
549
  """
532
550
 
@@ -543,31 +561,29 @@ class Document(Element):
543
561
  """Renders the full text of the element"""
544
562
  return page_break.join(p.render() for p in self.pages)
545
563
 
546
- def show(self, pages: List[np.ndarray], **kwargs) -> None:
547
- """Overlay the result on a given image
548
-
549
- Args:
550
- pages: list of images encoded as numpy arrays in uint8
551
- """
552
- for img, result in zip(pages, self.pages):
553
- result.show(img, **kwargs)
564
+ def show(self, **kwargs) -> None:
565
+ """Overlay the result on a given image"""
566
+ for result in self.pages:
567
+ result.show(**kwargs)
554
568
 
555
569
  def synthesize(self, **kwargs) -> List[np.ndarray]:
556
570
  """Synthesize all pages from their predictions
557
571
 
558
- Returns:
572
+ Returns
573
+ -------
559
574
  list of synthesized pages
560
575
  """
561
-
562
576
  return [page.synthesize() for page in self.pages]
563
577
 
564
578
  def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
565
579
  """Export the document as XML (hOCR-format)
566
580
 
567
581
  Args:
582
+ ----
568
583
  **kwargs: additional keyword arguments passed to the Page.export_as_xml method
569
584
 
570
585
  Returns:
586
+ -------
571
587
  list of tuple of (bytes, ElementTree)
572
588
  """
573
589
  return [page.export_as_xml(**kwargs) for page in self.pages]
@@ -583,6 +599,7 @@ class KIEDocument(Document):
583
599
  """Implements a document element as a collection of pages
584
600
 
585
601
  Args:
602
+ ----
586
603
  pages: list of page elements
587
604
  """
588
605
 
doctr/io/html.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -13,14 +13,16 @@ __all__ = ["read_html"]
13
13
  def read_html(url: str, **kwargs: Any) -> bytes:
14
14
  """Read a PDF file and convert it into an image in numpy format
15
15
 
16
- >>> from doctr.documents import read_html
16
+ >>> from doctr.io import read_html
17
17
  >>> doc = read_html("https://www.yoursite.com")
18
18
 
19
19
  Args:
20
+ ----
20
21
  url: URL of the target web page
22
+ **kwargs: keyword arguments from `weasyprint.HTML`
21
23
 
22
24
  Returns:
25
+ -------
23
26
  decoded PDF file as a bytes stream
24
27
  """
25
-
26
28
  return HTML(url, **kwargs).write_pdf()
doctr/io/image/base.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -21,18 +21,19 @@ def read_img_as_numpy(
21
21
  ) -> np.ndarray:
22
22
  """Read an image file into numpy format
23
23
 
24
- >>> from doctr.documents import read_img
25
- >>> page = read_img("path/to/your/doc.jpg")
24
+ >>> from doctr.io import read_img_as_numpy
25
+ >>> page = read_img_as_numpy("path/to/your/doc.jpg")
26
26
 
27
27
  Args:
28
+ ----
28
29
  file: the path to the image file
29
30
  output_size: the expected output size of each page in format H x W
30
31
  rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
31
32
 
32
33
  Returns:
34
+ -------
33
35
  the page decoded as numpy ndarray of shape H x W x 3
34
36
  """
35
-
36
37
  if isinstance(file, (str, Path)):
37
38
  if not Path(file).is_file():
38
39
  raise FileNotFoundError(f"unable to access {file}")