deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -16,13 +16,14 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Subclasses for ImageAnnotation and Image objects with various properties. These classes
20
- simplify consumption
19
+ Subclasses for `ImageAnnotation` and `Image` objects for consumption
21
20
  """
21
+
22
22
  from __future__ import annotations
23
23
 
24
24
  from copy import copy
25
- from typing import Any, Mapping, Optional, Sequence, Type, TypedDict, Union, no_type_check
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, Union, no_type_check
26
27
 
27
28
  import numpy as np
28
29
 
@@ -50,21 +51,23 @@ from .image import Image
50
51
 
51
52
  class ImageAnnotationBaseView(ImageAnnotation):
52
53
  """
53
- Consumption class for having easier access to categories added to an ImageAnnotation.
54
+ Consumption class for having easier access to categories added to an `ImageAnnotation`.
54
55
 
55
- ImageAnnotation is a generic class in the sense that different categories might have different
56
- sub categories collected while running through a pipeline. In order to get properties for a specific
57
- category one has to understand the internal data structure.
56
+ Note:
57
+ `ImageAnnotation` is a generic class in the sense that different categories might have different
58
+ sub categories collected while running through a pipeline. In order to get properties for a specific
59
+ category one has to understand the internal data structure.
58
60
 
59
- To circumvent this obstacle `ImageAnnotationBaseView` provides the `__getattr__` so that
60
- to gather values defined by `ObjectTypes`. To be more precise: A sub class will have attributes either
61
- defined explicitly by a `@property` or by the set of `get_attribute_names()` . Do not define any attribute
62
- setter method and regard this class as a view to the super class.
61
+ To circumvent this obstacle `ImageAnnotationBaseView` provides the `__getattr__` so that
62
+ to gather values defined by `ObjectTypes`. To be more precise: A sub class will have attributes either
63
+ defined explicitly by a `@property` or by the set of `get_attribute_names()` . Do not define any attribute
64
+ setter method and regard this class as a view to the super class.
63
65
 
64
66
  The class does contain its base page, which mean, that it is possible to retrieve all annotations that have a
65
67
  relation.
66
68
 
67
- base_page: `Page` class instantiated by the lowest hierarchy `Image`
69
+ Attributes:
70
+ base_page: `Page` class instantiated by the lowest hierarchy `Image`
68
71
  """
69
72
 
70
73
  base_page: Page
@@ -72,7 +75,10 @@ class ImageAnnotationBaseView(ImageAnnotation):
72
75
  @property
73
76
  def bbox(self) -> list[float]:
74
77
  """
75
- Get the bounding box as list and in absolute coordinates of the base page.
78
+ Get the bounding box as list and in absolute `xyxy`-coordinates of the base page.
79
+
80
+ Returns:
81
+ [ulx, uly, lrx, lry] as list of floats in absolute coordinates.
76
82
  """
77
83
 
78
84
  bounding_box = self.get_bounding_box(self.base_page.image_id)
@@ -85,9 +91,9 @@ class ImageAnnotationBaseView(ImageAnnotation):
85
91
  """
86
92
  Display the annotation (without any sub-layout elements).
87
93
 
88
- :param interactive: If set to True will open an interactive image, otherwise it will return a numpy array that
89
- can be displayed with e.g. matplotlib
90
- :return:
94
+ Returns:
95
+ If `interactive=True` will open an interactive image, otherwise it will return a `np.array` that
96
+ can be displayed with e.g. `matplotlib`
91
97
  """
92
98
 
93
99
  bounding_box = self.get_bounding_box(self.base_page.image_id)
@@ -104,7 +110,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
104
110
 
105
111
  def __getattr__(self, item: str) -> Optional[Union[str, int, list[str], list[ImageAnnotationBaseView]]]:
106
112
  """
107
- Get attributes defined by registered `self.get_attribute_names()` in a multi step process:
113
+ Get attributes defined by registered `self.get_attribute_names()` in a multi-step process:
108
114
 
109
115
  - Unregistered attributes will raise an `AttributeError`.
110
116
  - Registered attribute will look for a corresponding sub category. If the sub category does not exist `Null`
@@ -115,8 +121,12 @@ class ImageAnnotationBaseView(ImageAnnotation):
115
121
  `category_id` will be returned.
116
122
  - If nothing works, look at `self.image.summary` if the item exist. Follow the same logic as for ordinary sub
117
123
  categories.
118
- :param item: attribute name
119
- :return: value according to the logic described above
124
+
125
+ Args:
126
+ item: attribute name
127
+
128
+ Returns:
129
+ Value according to the logic described above
120
130
  """
121
131
  if item not in self.get_attribute_names():
122
132
  raise AnnotationError(f"Attribute {item} is not supported for {type(self)}")
@@ -142,14 +152,15 @@ class ImageAnnotationBaseView(ImageAnnotation):
142
152
 
143
153
  def get_attribute_names(self) -> set[str]:
144
154
  """
145
- :return: A set of registered attributes. When sub classing modify this method accordingly.
155
+ Returns:
156
+ A set of registered attributes. When sub classing modify this method accordingly.
146
157
  """
147
158
 
148
159
  # sub categories and summary sub categories are valid attribute names
149
- attribute_names = {"bbox", "np_image"}.union({cat.value for cat in self.sub_categories})
160
+ attr_names = {"bbox", "np_image"}.union({cat.value for cat in self.sub_categories})
150
161
  if self.image:
151
- attribute_names = attribute_names.union({cat.value for cat in self.image.summary.sub_categories.keys()})
152
- return attribute_names
162
+ attr_names = attr_names.union({cat.value for cat in self.image.summary.sub_categories.keys()})
163
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
153
164
 
154
165
  @classmethod
155
166
  def from_dict(cls, **kwargs: AnnotationDict) -> ImageAnnotationBaseView:
@@ -169,11 +180,12 @@ class Word(ImageAnnotationBaseView):
169
180
  """
170
181
 
171
182
  def get_attribute_names(self) -> set[str]:
172
- return (
183
+ attr_names = (
173
184
  set(WordType)
174
185
  .union(super().get_attribute_names())
175
186
  .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
176
187
  )
188
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
177
189
 
178
190
 
179
191
  class Layout(ImageAnnotationBaseView):
@@ -181,8 +193,9 @@ class Layout(ImageAnnotationBaseView):
181
193
  Layout specific subclass of `ImageAnnotationBaseView`. In order check what ImageAnnotation will be wrapped
182
194
  into `Layout`, please consult `IMAGE_ANNOTATION_TO_LAYOUTS`.
183
195
 
184
- text_container: Pass the `LayoutObject` that is supposed to be used for `words`. It is possible that the
185
- text_container is equal to `self.category_name`, in which case `words` returns `self`.
196
+ Attributes:
197
+ text_container: Pass the `LayoutObject` that is supposed to be used for `words`. It is possible that the
198
+ text_container is equal to `self.category_name`, in which case `words` returns `self`.
186
199
  """
187
200
 
188
201
  text_container: Optional[ObjectTypes] = None
@@ -190,8 +203,9 @@ class Layout(ImageAnnotationBaseView):
190
203
  @property
191
204
  def words(self) -> list[ImageAnnotationBaseView]:
192
205
  """
193
- Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
194
- It will only select those among all annotations that have an entry in `Relationships.child` .
206
+ Returns:
207
+ A list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
208
+ It will only select those among all annotations that have an entry in `Relationships.child` .
195
209
  """
196
210
  if self.category_name != self.text_container:
197
211
  text_ids = self.get_relationship(Relationships.CHILD)
@@ -201,28 +215,36 @@ class Layout(ImageAnnotationBaseView):
201
215
  @property
202
216
  def text(self) -> str:
203
217
  """
204
- Text captured within the instance respecting the reading order of each word.
218
+ Returns:
219
+ Text captured within the instance respecting the reading order of each word.
205
220
  """
206
221
  words = self.get_ordered_words()
207
222
  return " ".join([word.characters for word in words]) # type: ignore
208
223
 
209
224
  def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
210
- """Returns a list of words order by reading order. Words with no reading order will not be returned"""
225
+ """
226
+ Returns:
227
+ A list of `word`s ordered by `reading_order`. Words with no `reading_order` will not be returned
228
+ """
211
229
  words_with_reading_order = [word for word in self.words if word.reading_order is not None]
212
230
  words_with_reading_order.sort(key=lambda x: x.reading_order) # type: ignore
213
231
  return words_with_reading_order
214
232
 
215
233
  @property
216
234
  def text_(self) -> Text_:
217
- """Returns a dict
235
+ """
236
+ Returns:
237
+ A dict
218
238
 
219
- `{"text": text string,
220
- "text_list": list of single words,
221
- "ann_ids": word annotation ids`,
222
- "token_classes": token classes,
223
- "token_tags": token tags,
224
- "token_class_ids": token class ids,
225
- "token_tag_ids": token tag ids}`
239
+ ```python
240
+ {"text": text string,
241
+ "text_list": list of single words,
242
+ "ann_ids": word annotation ids`,
243
+ "token_classes": token classes,
244
+ "token_tags": token tags,
245
+ "token_class_ids": token class ids,
246
+ "token_tag_ids": token tag ids}
247
+ ```
226
248
 
227
249
  """
228
250
  words = self.get_ordered_words()
@@ -264,14 +286,18 @@ class Layout(ImageAnnotationBaseView):
264
286
  }
265
287
 
266
288
  def get_attribute_names(self) -> set[str]:
267
- return (
289
+ attr_names = (
268
290
  {"words", "text"}
269
291
  .union(super().get_attribute_names())
270
292
  .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
271
293
  )
294
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
272
295
 
273
296
  def __len__(self) -> int:
274
- """len of text counted by number of characters"""
297
+ """
298
+ Returns:
299
+ len of text counted by number of characters
300
+ """
275
301
  return len(self.text)
276
302
 
277
303
 
@@ -281,7 +307,8 @@ class Cell(Layout):
281
307
  """
282
308
 
283
309
  def get_attribute_names(self) -> set[str]:
284
- return set(CellType).union(super().get_attribute_names())
310
+ attr_names = set(CellType).union(super().get_attribute_names())
311
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
285
312
 
286
313
 
287
314
  class List(Layout):
@@ -292,8 +319,9 @@ class List(Layout):
292
319
  @property
293
320
  def words(self) -> list[ImageAnnotationBaseView]:
294
321
  """
295
- Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
296
- It will only select those among all annotations that have an entry in `Relationships.child` .
322
+ Returns:
323
+ Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
324
+ It will only select those among all annotations that have an entry in `Relationships.child` .
297
325
  """
298
326
  all_words: list[ImageAnnotationBaseView] = []
299
327
 
@@ -302,13 +330,15 @@ class List(Layout):
302
330
  return all_words
303
331
 
304
332
  def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
305
- """Returns a list of words order by reading order. Words with no reading order will not be returned"""
333
+ """
334
+ Returns:
335
+ A list of words order by reading order. Words with no `reading_order` will not be returned"""
306
336
  try:
307
337
  list_items = self.list_items
308
338
  all_words = []
309
339
  list_items.sort(key=lambda x: x.bbox[1])
310
340
  for list_item in list_items:
311
- all_words.extend(list_item.get_ordered_words()) # type: ignore
341
+ all_words.extend(list_item.get_ordered_words()) # type: ignore
312
342
  return all_words
313
343
  except (TypeError, AnnotationError):
314
344
  return super().get_ordered_words()
@@ -316,7 +346,8 @@ class List(Layout):
316
346
  @property
317
347
  def list_items(self) -> list[ImageAnnotationBaseView]:
318
348
  """
319
- A list of a list items.
349
+ Returns:
350
+ A list of a `list_item`s.
320
351
  """
321
352
  all_relation_ids = self.get_relationship(Relationships.CHILD)
322
353
  list_items = self.base_page.get_annotation(
@@ -332,13 +363,14 @@ class List(Layout):
332
363
 
333
364
  class Table(Layout):
334
365
  """
335
- Table specific sub class of `ImageAnnotationBaseView` modelled by `TableType`.
366
+ Table specific subclass of `ImageAnnotationBaseView` modelled by `TableType`.
336
367
  """
337
368
 
338
369
  @property
339
370
  def cells(self) -> list[Cell]:
340
371
  """
341
- A list of a table cells.
372
+ Returns:
373
+ A list of a table cells.
342
374
  """
343
375
  all_relation_ids = self.get_relationship(Relationships.CHILD)
344
376
  cell_anns: list[Cell] = self.base_page.get_annotation( # type: ignore
@@ -355,12 +387,11 @@ class Table(Layout):
355
387
  @property
356
388
  def column_header_cells(self) -> list[Cell]:
357
389
  """
358
- Retrieve a list of cells that are column headers in the table.
359
-
360
390
  This property filters and sorts the cells in the table to return only those that are column headers.
361
391
  The cells are sorted by their column number.
362
392
 
363
- :return: A list of `Cell` objects that are column headers.
393
+ Returns:
394
+ A list of cells that are column headers in the table.
364
395
  """
365
396
  all_relation_ids = self.get_relationship(Relationships.CHILD)
366
397
  all_cells: list[Cell] = self.base_page.get_annotation( # type: ignore
@@ -373,12 +404,11 @@ class Table(Layout):
373
404
  @property
374
405
  def row_header_cells(self) -> list[Cell]:
375
406
  """
376
- Retrieve a list of cells that are row headers in the table.
377
-
378
407
  This property filters and sorts the cells in the table to return only those that are row headers.
379
408
  The cells are sorted by their column number.
380
409
 
381
- :return: A list of `Cell` objects that are row headers.
410
+ Returns:
411
+ A list of `Cell` objects that are row headers.
382
412
  """
383
413
  all_relation_ids = self.get_relationship(Relationships.CHILD)
384
414
  all_cells: list[Cell] = self.base_page.get_annotation( # type: ignore
@@ -396,21 +426,28 @@ class Table(Layout):
396
426
  It then creates a key-value pair where the key is a tuple containing the column number and header text,
397
427
  and the value is the cell text.
398
428
 
399
- :param row_number: The row number for which to retrieve the key-value pairs.
400
- :return: A dictionary where keys are tuples of (column number, header text) and values are cell texts.
429
+ Args:
430
+ row_number: The row number for which to retrieve the key-value pairs.
431
+
432
+ Returns:
433
+ A dictionary where keys are tuples of (column number, header text) and values are cell texts.
401
434
 
402
435
  Example:
403
- If the table has the following structure:
404
- | Header1 | Header2 |
405
- |---------|---------|
406
- | Value1 | Value2 |
407
- | Value3 | Value4 |
408
-
409
- Calling kv_header_rows(1) would return:
410
- {
411
- (1, 'Header1'): 'Value1',
412
- (2, 'Header2'): 'Value2'
413
- }
436
+ If the table has the structure:
437
+
438
+ | Header1 | Header2 |
439
+ |---------|---------|
440
+ | Value1 | Value2 |
441
+ | Value3 | Value4 |
442
+
443
+ Calling kv_header_rows(1) would return:
444
+
445
+ ```python
446
+ {
447
+ (1, 'Header1'): 'Value1',
448
+ (2, 'Header2'): 'Value2'
449
+ }
450
+ ```
414
451
  """
415
452
  all_relation_ids = self.get_relationship(Relationships.CHILD)
416
453
  all_cells = self.base_page.get_annotation(
@@ -436,7 +473,8 @@ class Table(Layout):
436
473
  @property
437
474
  def rows(self) -> list[ImageAnnotationBaseView]:
438
475
  """
439
- A list of a table rows.
476
+ Returns:
477
+ A list of a table rows.
440
478
  """
441
479
  all_relation_ids = self.get_relationship(Relationships.CHILD)
442
480
  row_anns = self.base_page.get_annotation(annotation_ids=all_relation_ids, category_names=[LayoutType.ROW])
@@ -445,7 +483,8 @@ class Table(Layout):
445
483
  @property
446
484
  def columns(self) -> list[ImageAnnotationBaseView]:
447
485
  """
448
- A list of a table columns.
486
+ Returns:
487
+ A list of a table columns.
449
488
  """
450
489
  all_relation_ids = self.get_relationship(Relationships.CHILD)
451
490
  col_anns = self.base_page.get_annotation(annotation_ids=all_relation_ids, category_names=[LayoutType.COLUMN])
@@ -453,7 +492,10 @@ class Table(Layout):
453
492
 
454
493
  def row(self, row_number: int) -> list[ImageAnnotationBaseView]:
455
494
  """
456
- Get a list of cells in a row.
495
+ Args:
496
+ row_number: The row number for which to retrieve the cells.
497
+ Returns:
498
+ Get a list of cells in a row.
457
499
  """
458
500
  all_relation_ids = self.get_relationship(Relationships.CHILD)
459
501
  all_cells = self.base_page.get_annotation(
@@ -467,7 +509,10 @@ class Table(Layout):
467
509
 
468
510
  def column(self, column_number: int) -> list[ImageAnnotationBaseView]:
469
511
  """
470
- Get a list of cells in a column.
512
+ Args:
513
+ column_number: The column number for which to retrieve the cells.
514
+ Returns:
515
+ Get a list of cells in a column.
471
516
  """
472
517
  all_relation_ids = self.get_relationship(Relationships.CHILD)
473
518
  all_cells = self.base_page.get_annotation(
@@ -485,7 +530,8 @@ class Table(Layout):
485
530
  @property
486
531
  def html(self) -> HTML:
487
532
  """
488
- The html representation of the table
533
+ Returns:
534
+ The `html` representation of the table
489
535
  """
490
536
 
491
537
  html_list = []
@@ -505,17 +551,20 @@ class Table(Layout):
505
551
  return "".join(html_list)
506
552
 
507
553
  def get_attribute_names(self) -> set[str]:
508
- return (
554
+ attr_names = (
509
555
  set(TableType)
510
556
  .union(super().get_attribute_names())
511
557
  .union({"cells", "rows", "columns", "html", "csv", "text"})
512
558
  )
559
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
513
560
 
514
561
  @property
515
562
  def csv(self) -> csv:
516
- """Returns a csv-style representation of a table as list of lists of string. Cell content of cell with higher
517
- row or column spans will be shown at the upper left cell tile. All other tiles covered by the cell will be left
518
- as blank
563
+ """
564
+ Returns:
565
+ A csv-style representation of a table as list of lists of string. Cell content of cell with higher
566
+ row or column spans will be shown at the upper left cell tile. All other tiles covered by the cell
567
+ will be left as blank.
519
568
  """
520
569
  cells = self.cells
521
570
  table_list = [["" for _ in range(self.number_of_columns)] for _ in range(self.number_of_rows)] # type: ignore
@@ -575,8 +624,9 @@ class Table(Layout):
575
624
  @property
576
625
  def words(self) -> list[ImageAnnotationBaseView]:
577
626
  """
578
- Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
579
- It will only select those among all annotations that have an entry in `Relationships.child` .
627
+ Returns:
628
+ A list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
629
+ It will only select those among all annotations that have an entry in `Relationships.child` .
580
630
  """
581
631
  all_words: list[ImageAnnotationBaseView] = []
582
632
  cells = self.cells
@@ -587,7 +637,10 @@ class Table(Layout):
587
637
  return all_words
588
638
 
589
639
  def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
590
- """Returns a list of words order by reading order. Words with no reading order will not be returned"""
640
+ """
641
+ Returns:
642
+ A list of `word`s order by `reading_order`. Words with no `reading_order` will not be returned
643
+ """
591
644
  try:
592
645
  cells = self.cells
593
646
  all_words = []
@@ -599,65 +652,82 @@ class Table(Layout):
599
652
  return super().get_ordered_words()
600
653
 
601
654
 
602
- IMAGE_ANNOTATION_TO_LAYOUTS: dict[ObjectTypes, Type[Union[Layout, Table, Word]]] = {
603
- **{i: Layout for i in LayoutType if (i not in {LayoutType.TABLE, LayoutType.WORD, LayoutType.CELL})},
604
- LayoutType.TABLE: Table,
605
- LayoutType.TABLE_ROTATED: Table,
606
- LayoutType.WORD: Word,
607
- LayoutType.CELL: Cell,
608
- LayoutType.LIST: List,
609
- CellType.SPANNING: Cell,
610
- CellType.ROW_HEADER: Cell,
611
- CellType.COLUMN_HEADER: Cell,
612
- CellType.PROJECTED_ROW_HEADER: Cell,
613
- }
655
+ @dataclass
656
+ class ImageDefaults:
657
+ """ImageDefaults"""
614
658
 
659
+ TEXT_CONTAINER: LayoutType = LayoutType.WORD
660
+ FLOATING_TEXT_BLOCK_CATEGORIES: Tuple[Union[LayoutType, CellType], ...] = field(
661
+ default_factory=lambda: (
662
+ LayoutType.TEXT,
663
+ LayoutType.TITLE,
664
+ LayoutType.LIST,
665
+ LayoutType.KEY_VALUE_AREA,
666
+ )
667
+ )
668
+ TEXT_BLOCK_CATEGORIES: Tuple[Union[LayoutType, CellType], ...] = field(
669
+ default_factory=lambda: (
670
+ LayoutType.TEXT,
671
+ LayoutType.TITLE,
672
+ LayoutType.LIST_ITEM,
673
+ LayoutType.LIST,
674
+ LayoutType.CAPTION,
675
+ LayoutType.PAGE_HEADER,
676
+ LayoutType.PAGE_FOOTER,
677
+ LayoutType.PAGE_NUMBER,
678
+ LayoutType.MARK,
679
+ LayoutType.KEY_VALUE_AREA,
680
+ LayoutType.FIGURE,
681
+ CellType.SPANNING,
682
+ LayoutType.CELL,
683
+ )
684
+ )
685
+ RESIDUAL_TEXT_BLOCK_CATEGORIES: Tuple[LayoutType, ...] = field(
686
+ default_factory=lambda: (
687
+ LayoutType.PAGE_HEADER,
688
+ LayoutType.PAGE_FOOTER,
689
+ LayoutType.MARK,
690
+ LayoutType.PAGE_NUMBER,
691
+ )
692
+ )
693
+ IMAGE_ANNOTATION_TO_LAYOUTS: Dict[ObjectTypes, Type[Union[Layout, Table, Word]]] = field(
694
+ default_factory=lambda: { # type: ignore
695
+ **{i: Layout for i in LayoutType if (i not in {LayoutType.TABLE, LayoutType.WORD, LayoutType.CELL})},
696
+ LayoutType.TABLE: Table,
697
+ LayoutType.TABLE_ROTATED: Table,
698
+ LayoutType.WORD: Word,
699
+ LayoutType.CELL: Cell,
700
+ LayoutType.LIST: List,
701
+ CellType.SPANNING: Cell,
702
+ CellType.ROW_HEADER: Cell,
703
+ CellType.COLUMN_HEADER: Cell,
704
+ CellType.PROJECTED_ROW_HEADER: Cell,
705
+ }
706
+ )
615
707
 
616
- class ImageDefaults(TypedDict):
617
- """ImageDefaults"""
618
708
 
619
- text_container: LayoutType
620
- floating_text_block_categories: tuple[Union[LayoutType, CellType], ...]
621
- text_block_categories: tuple[Union[LayoutType, CellType], ...]
622
- residual_layouts: tuple[LayoutType, ...]
623
-
624
-
625
- IMAGE_DEFAULTS: ImageDefaults = {
626
- "text_container": LayoutType.WORD,
627
- "floating_text_block_categories": (
628
- LayoutType.TEXT,
629
- LayoutType.TITLE,
630
- LayoutType.FIGURE,
631
- LayoutType.LIST,
632
- ),
633
- "text_block_categories": (
634
- LayoutType.TEXT,
635
- LayoutType.TITLE,
636
- LayoutType.LIST,
637
- LayoutType.CELL,
638
- LayoutType.FIGURE,
639
- CellType.SPANNING,
640
- ),
641
- "residual_layouts": (LayoutType.LINE,),
642
- }
709
+ IMAGE_DEFAULTS = ImageDefaults()
643
710
 
644
711
 
645
712
  @no_type_check
646
713
  def ann_obj_view_factory(annotation: ImageAnnotation, text_container: ObjectTypes) -> ImageAnnotationBaseView:
647
714
  """
648
- Create an `ImageAnnotationBaseView` sub class given the mapping `IMAGE_ANNOTATION_TO_LAYOUTS` .
715
+ Create an `ImageAnnotationBaseView` subclass given the mapping `IMAGE_ANNOTATION_TO_LAYOUTS`.
649
716
 
650
- :param annotation: The annotation to transform. Note, that we do not use the input annotation as base class
717
+ Args:
718
+ annotation: The annotation to transform. Note, that we do not use the input annotation as base class
651
719
  but create a whole new instance.
652
- :param text_container: `LayoutType` to create a list of `words` and eventually generate `text`
653
- :return: Transformed annotation
720
+ text_container: `LayoutType` to create a list of `words` and eventually generate `text`
721
+
722
+ Returns:
723
+ Transformed annotation
654
724
  """
655
725
 
656
726
  # We need to handle annotations that are text containers like words
657
727
  if annotation.category_name == text_container:
658
- layout_class = IMAGE_ANNOTATION_TO_LAYOUTS[LayoutType.WORD]
728
+ layout_class = IMAGE_DEFAULTS.IMAGE_ANNOTATION_TO_LAYOUTS[LayoutType.WORD]
659
729
  else:
660
- layout_class = IMAGE_ANNOTATION_TO_LAYOUTS[annotation.category_name]
730
+ layout_class = IMAGE_DEFAULTS.IMAGE_ANNOTATION_TO_LAYOUTS[annotation.category_name]
661
731
  ann_dict = annotation.as_dict()
662
732
  layout = layout_class.from_dict(**ann_dict)
663
733
  if image_dict := ann_dict.get("image"):
@@ -668,24 +738,27 @@ def ann_obj_view_factory(annotation: ImageAnnotation, text_container: ObjectType
668
738
 
669
739
  class Page(Image):
670
740
  """
671
- Consumer class for its super `Image` class. It comes with some handy `@property` as well as
741
+ Consumer class for its super `Image` class. It comes with some `@property`s as well as
672
742
  custom `__getattr__` to give easier access to various information that are stored in the base class
673
743
  as `ImageAnnotation` or `CategoryAnnotation`.
674
744
 
675
- Its factory function `Page().from_image(image, text_container, text_block_names)` creates for every
676
- `ImageAnnotation` a corresponding subclass of `ImageAnnotationBaseView` which drives the object towards
677
- less generic classes with custom attributes that are controlled some `ObjectTypes`.
678
-
679
- top_level_text_block_names: Top level layout objects, e.g. `LayoutType.text` or `LayoutType.table`.
680
-
681
- image_orig: Base image
682
-
683
- text_container: LayoutType to take the text from
745
+ Info:
746
+ Its factory function `Page().from_image(image, text_container, text_block_names)` creates for every
747
+ `ImageAnnotation` a corresponding subclass of `ImageAnnotationBaseView` which drives the object towards
748
+ less generic classes with custom attributes that are controlled some `ObjectTypes`.
749
+
750
+ Attributes:
751
+ text_container: The `LayoutType` that is used to extract the text from.
752
+ floating_text_block_categories: Categories that are considered as floating text blocks, e.g. `LayoutType.TEXT`
753
+ image_orig: Base image
754
+ residual_text_block_categories: Categories that are considered as residual text blocks, e.g.
755
+ `LayoutType.page_header`
684
756
  """
685
757
 
686
758
  text_container: ObjectTypes
687
759
  floating_text_block_categories: list[ObjectTypes]
688
760
  image_orig: Image
761
+ residual_text_block_categories: list[ObjectTypes]
689
762
  _attribute_names: set[str] = {
690
763
  "text",
691
764
  "chunks",
@@ -699,6 +772,7 @@ class Page(Image):
699
772
  "angle",
700
773
  "figures",
701
774
  "residual_layouts",
775
+ "document_summary",
702
776
  }
703
777
  include_residual_text_container: bool = True
704
778
 
@@ -713,22 +787,25 @@ class Page(Image):
713
787
  ) -> list[ImageAnnotationBaseView]:
714
788
  """
715
789
  Selection of annotations from the annotation container. Filter conditions can be defined by specifying
716
- the annotation_id or the category name. (Since only image annotations are currently allowed in the container,
717
- annotation_type is a redundant filter condition.) Only annotations that have active = 'True' are
790
+ the annotation_id or the `category_name`. (Since only image annotations are currently allowed in the container,
791
+ annotation_type is a redundant filter condition.) Only annotations that have `active=True` are
718
792
  returned. If more than one condition is provided, only annotations will be returned that satisfy all conditions.
719
793
  If no condition is provided, it will return all active annotations.
720
794
 
721
- Identical to its base class method for having correct return types. If the base class changes, please
722
- change this method as well.
795
+ Note:
796
+ Identical to its base class method for having correct return types. If the base class changes, please
797
+ change this method as well.
723
798
 
724
- :param category_names: A single name or list of names
725
- :param annotation_ids: A single id or list of ids
726
- :param service_ids: A single service name or list of service names
727
- :param model_id: A single model name or list of model names
728
- :param session_ids: A single session id or list of session ids
729
- :param ignore_inactive: If set to `True` only active annotations are returned.
799
+ Args:
800
+ category_names: A single name or list of names
801
+ annotation_ids: A single id or list of ids
802
+ service_ids: A single service name or list of service names
803
+ model_id: A single model name or list of model names
804
+ session_ids: A single session id or list of session ids
805
+ ignore_inactive: If set to `True` only active annotations are returned.
730
806
 
731
- :return: A (possibly empty) list of Annotations
807
+ Returns:
808
+ A (possibly empty) list of `ImageAnnotationBaseView`
732
809
  """
733
810
 
734
811
  if category_names is not None:
@@ -779,47 +856,50 @@ class Page(Image):
779
856
  @property
780
857
  def layouts(self) -> list[ImageAnnotationBaseView]:
781
858
  """
782
- A list of a layouts. Layouts are all exactly all floating text block categories
859
+ Returns:
860
+ A list of a layouts. Layouts are all exactly all floating text block categories
783
861
  """
784
862
  return self.get_annotation(category_names=self.floating_text_block_categories)
785
863
 
786
864
  @property
787
865
  def words(self) -> list[ImageAnnotationBaseView]:
788
866
  """
789
- A list of a words. Word are all text containers
867
+ Returns:
868
+ A list of a words. Word are all text containers
790
869
  """
791
870
  return self.get_annotation(category_names=self.text_container)
792
871
 
793
872
  @property
794
873
  def tables(self) -> list[ImageAnnotationBaseView]:
795
874
  """
796
- A list of a tables.
875
+ Returns:
876
+ A list of a tables.
797
877
  """
798
878
  return self.get_annotation(category_names=LayoutType.TABLE)
799
879
 
800
880
  @property
801
881
  def figures(self) -> list[ImageAnnotationBaseView]:
802
882
  """
803
- A list of a figures.
883
+ Returns:
884
+ A list of a figures.
804
885
  """
805
886
  return self.get_annotation(category_names=LayoutType.FIGURE)
806
887
 
807
888
  @property
808
889
  def residual_layouts(self) -> list[ImageAnnotationBaseView]:
809
890
  """
810
- A list of all residual layouts. Residual layouts are all layouts that are
811
- - not floating text blocks,
812
- - not text containers,
813
- - not tables,
814
- - not figures
815
- - not cells
816
- - not rows
817
- - not columns
818
- """
819
- return self.get_annotation(category_names=self._get_residual_layout())
891
+ Returns:
892
+ A list of all residual layouts. Residual layouts are all layouts that are:
820
893
 
821
- def _get_residual_layout(self) -> tuple[LayoutType, ...]:
822
- return IMAGE_DEFAULTS["residual_layouts"]
894
+ - not floating text blocks,
895
+ - not text containers,
896
+ - not tables,
897
+ - not figures
898
+ - not cells
899
+ - not rows
900
+ - not columns
901
+ """
902
+ return self.get_annotation(category_names=self.residual_text_block_categories)
823
903
 
824
904
  @classmethod
825
905
  def from_image(
@@ -827,28 +907,37 @@ class Page(Image):
827
907
  image_orig: Image,
828
908
  text_container: Optional[ObjectTypes] = None,
829
909
  floating_text_block_categories: Optional[Sequence[ObjectTypes]] = None,
910
+ residual_text_block_categories: Optional[Sequence[ObjectTypes]] = None,
830
911
  include_residual_text_container: bool = True,
831
912
  base_page: Optional[Page] = None,
832
913
  ) -> Page:
833
914
  """
834
915
  Factory function for generating a `Page` instance from `image_orig` .
835
916
 
836
- :param image_orig: `Image` instance to convert
837
- :param text_container: A LayoutType to get the text from. It will steer the output of `Layout.words`.
838
- :param floating_text_block_categories: A list of top level layout objects
839
- :param include_residual_text_container: This will regard synthetic text line annotations as floating text
840
- blocks and therefore incorporate all image annotations of category
841
- `word` when building text strings.
842
- :param base_page: For top level objects that are images themselves, pass the page that encloses all objects.
843
- In doubt, do not populate this value.
844
- :return:
917
+ Args:
918
+ image_orig: `Image` instance to convert
919
+ text_container: A LayoutType to get the text from. It will steer the output of `Layout.words`.
920
+ floating_text_block_categories: A list of top level layout objects
921
+ residual_text_block_categories: A list of layout objects that are neither floating text blocks nor
922
+ tables but should be accessible via `Page.residual_layouts`.
923
+ include_residual_text_container: This will regard synthetic text line annotations as floating text
924
+ blocks and therefore incorporate all image annotations of category
925
+ `word` when building text strings.
926
+ base_page: For top level objects that are images themselves, pass the page that encloses all objects.
927
+ In doubt, do not populate this value.
928
+
929
+ Returns:
930
+ A `Page` instance with all annotations as `ImageAnnotationBaseView` subclasses.
845
931
  """
846
932
 
847
933
  if text_container is None:
848
- text_container = IMAGE_DEFAULTS["text_container"]
934
+ text_container = IMAGE_DEFAULTS.TEXT_CONTAINER
849
935
 
850
936
  if not floating_text_block_categories:
851
- floating_text_block_categories = IMAGE_DEFAULTS["floating_text_block_categories"]
937
+ floating_text_block_categories = IMAGE_DEFAULTS.FLOATING_TEXT_BLOCK_CATEGORIES
938
+
939
+ if not residual_text_block_categories:
940
+ residual_text_block_categories = IMAGE_DEFAULTS.RESIDUAL_TEXT_BLOCK_CATEGORIES
852
941
 
853
942
  if include_residual_text_container and LayoutType.LINE not in floating_text_block_categories:
854
943
  floating_text_block_categories = tuple(floating_text_block_categories) + (LayoutType.LINE,)
@@ -882,6 +971,7 @@ class Page(Image):
882
971
  image_orig=image,
883
972
  text_container=text_container,
884
973
  floating_text_block_categories=floating_text_block_categories,
974
+ residual_text_block_categories=residual_text_block_categories,
885
975
  include_residual_text_container=include_residual_text_container,
886
976
  base_page=page,
887
977
  )
@@ -891,6 +981,7 @@ class Page(Image):
891
981
  page.summary = CategoryAnnotation.from_dict(**summary_dict)
892
982
  page.summary.category_name = SummaryType.SUMMARY
893
983
  page.floating_text_block_categories = floating_text_block_categories # type: ignore
984
+ page.residual_text_block_categories = residual_text_block_categories # type: ignore
894
985
  page.text_container = text_container
895
986
  page.include_residual_text_container = include_residual_text_container
896
987
  return page
@@ -917,9 +1008,16 @@ class Page(Image):
917
1008
 
918
1009
  @property
919
1010
  def text_(self) -> Text_:
920
- """Returns a dict `{"text": text string,
921
- "text_list": list of single words,
922
- "annotation_ids": word annotation ids`"""
1011
+ """
1012
+ Returns:
1013
+ A dict
1014
+
1015
+ ```python
1016
+ {"text": text string,
1017
+ "text_list": list of single words,
1018
+ "annotation_ids": word annotation ids}
1019
+ ```
1020
+ """
923
1021
  block_with_order = self._order("layouts")
924
1022
  text: list[str] = []
925
1023
  words: list[str] = []
@@ -947,13 +1045,17 @@ class Page(Image):
947
1045
  }
948
1046
 
949
1047
  def get_layout_context(self, annotation_id: str, context_size: int = 3) -> list[ImageAnnotationBaseView]:
950
- """For a given `annotation_id` get a list of `ImageAnnotation` that are nearby in terms of reading order.
1048
+ """
1049
+ For a given `annotation_id` get a list of `ImageAnnotation` that are nearby in terms of `reading_order`.
951
1050
  For a given context_size it will return all layouts with reading_order between
952
- reading_order(annoation_id)-context_size and reading_order(annoation_id)-context_size.
1051
+ `reading_order(annotation_id)-context_size` and `reading_order(annotation_id)-context_size`.
1052
+
1053
+ Args:
1054
+ annotation_id: id of central layout element
1055
+ context_size: number of elements to the left and right of the central element
953
1056
 
954
- :param annotation_id: id of central layout element
955
- :param context_size: number of elements to the left and right of the central element
956
- :return: list of `ImageAnnotationBaseView` objects
1057
+ Returns:
1058
+ List of `ImageAnnotationBaseView` objects
957
1059
  """
958
1060
  ann = self.get_annotation(annotation_ids=annotation_id)[0]
959
1061
  if ann.category_name not in self.floating_text_block_categories:
@@ -971,15 +1073,16 @@ class Page(Image):
971
1073
  @property
972
1074
  def chunks(self) -> Chunks:
973
1075
  """
974
- :return: Returns a "chunk" of a layout element or a table as 6-tuple containing
1076
+ Returns:
1077
+ A `chunk` of a layout element or a table as 6-tuple containing
975
1078
 
976
- - document id
977
- - image id
978
- - page number
979
- - annotation_id
980
- - reading order
981
- - category name
982
- - text string
1079
+ - document id
1080
+ - image id
1081
+ - page number
1082
+ - annotation_id
1083
+ - reading order
1084
+ - category name
1085
+ - text string
983
1086
 
984
1087
  """
985
1088
  block_with_order = self._order("layouts")
@@ -1004,8 +1107,9 @@ class Page(Image):
1004
1107
  @property
1005
1108
  def text_no_line_break(self) -> str:
1006
1109
  """
1007
- Get text of all layouts. While `text` will do a line break for each layout block this here will return the
1008
- string in one single line.
1110
+ Returns:
1111
+ Text of all layouts. While `text` will do a line break for each layout block this here will return the
1112
+ string in one single line.
1009
1113
  """
1010
1114
  return self._make_text(False)
1011
1115
 
@@ -1038,33 +1142,40 @@ class Page(Image):
1038
1142
  """
1039
1143
  Display a page with detected bounding boxes of various types.
1040
1144
 
1041
- **Example:**
1145
+ Example:
1042
1146
 
1043
- from matplotlib import pyplot as plt
1147
+ ```python
1148
+ from matplotlib import pyplot as plt
1044
1149
 
1045
- img = page.viz()
1046
- plt.imshow(img)
1150
+ img = page.viz()
1151
+ plt.imshow(img)
1152
+ ```
1047
1153
 
1048
1154
  In interactive mode it will display the image in a separate window.
1049
1155
 
1050
- **Example:**
1051
-
1052
- page.viz(interactive='True') # will open a new window with the image. Can be closed by pressing 'q'
1156
+ Example:
1053
1157
 
1054
- :param show_tables: Will display all tables boxes as well as cells, rows and columns
1055
- :param show_layouts: Will display all other layout components.
1056
- :param show_figures: Will display all figures
1057
- :param show_residual_layouts: Will display all residual layouts
1058
- :param show_cells: Will display cells within tables. (Only available if `show_tables=True`)
1059
- :param show_table_structure: Will display rows and columns
1060
- :param show_words: Will display bounding boxes around words labeled with token class and bio tag (experimental)
1061
- :param show_token_class: Will display token class instead of token tags (i.e. token classes with tags)
1062
- :param interactive: If set to True will open an interactive image, otherwise it will return a numpy array that
1063
- can be displayed differently.
1064
- :param scaled_width: Width of the image to display
1065
- :param ignore_default_token_class: Will ignore displaying word bounding boxes with default or None token class
1066
- label
1067
- :return: If `interactive=False` will return a numpy array.
1158
+ ```python
1159
+ page.viz(interactive='True') # will open a new window with the image. Can be closed by pressing 'q'
1160
+ ```
1161
+
1162
+ Args:
1163
+ show_tables: Will display all tables boxes as well as cells, rows and columns
1164
+ show_layouts: Will display all other layout components.
1165
+ show_figures: Will display all figures
1166
+ show_residual_layouts: Will display all residual layouts
1167
+ show_cells: Will display cells within tables. (Only available if `show_tables=True`)
1168
+ show_table_structure: Will display rows and columns
1169
+ show_words: Will display bounding boxes around words labeled with token class and bio tag (experimental)
1170
+ show_token_class: Will display token class instead of token tags (i.e. token classes with tags)
1171
+ interactive: If set to `True` will open an interactive image, otherwise it will return a numpy array that
1172
+ can be displayed differently.
1173
+ scaled_width: Width of the image to display
1174
+ ignore_default_token_class: Will ignore displaying word bounding boxes with default or None token class
1175
+ label
1176
+
1177
+ Returns:
1178
+ If `interactive=False` will return a `np.array`.
1068
1179
  """
1069
1180
 
1070
1181
  category_names_list: list[Union[str, None]] = []
@@ -1126,6 +1237,10 @@ class Page(Image):
1126
1237
  all_words.extend(layout.words)
1127
1238
  for table in self.tables:
1128
1239
  all_words.extend(table.words)
1240
+ for figure in self.figures:
1241
+ all_words.extend(figure.words)
1242
+ for res_layout in self.residual_layouts:
1243
+ all_words.extend(res_layout.words)
1129
1244
  if not all_words:
1130
1245
  all_words = self.get_annotation(category_names=LayoutType.WORD)
1131
1246
  if not ignore_default_token_class:
@@ -1166,11 +1281,15 @@ class Page(Image):
1166
1281
  boxes=boxes,
1167
1282
  category_names_list=category_names_list,
1168
1283
  font_scale=1.0,
1169
- rectangle_thickness=4,
1284
+ rectangle_thickness=2,
1170
1285
  )
1171
1286
  else:
1172
1287
  img = draw_boxes(
1173
- np_image=img, boxes=boxes, category_names_list=category_names_list, show_palette=False
1288
+ np_image=img,
1289
+ boxes=boxes,
1290
+ category_names_list=category_names_list,
1291
+ show_palette=False,
1292
+ rectangle_thickness=2,
1174
1293
  )
1175
1294
 
1176
1295
  if interactive:
@@ -1182,25 +1301,31 @@ class Page(Image):
1182
1301
  @classmethod
1183
1302
  def get_attribute_names(cls) -> set[str]:
1184
1303
  """
1185
- :return: A set of registered attributes.
1304
+ Returns:
1305
+ A set of registered attributes.
1186
1306
  """
1187
- return set(PageType).union(cls._attribute_names)
1307
+ attr_names = set(PageType).union(cls._attribute_names)
1308
+ return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
1188
1309
 
1189
1310
  @classmethod
1190
1311
  def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
1191
1312
  """
1192
1313
  Adding a custom attribute name to a Page class.
1193
1314
 
1194
- **Example:**
1315
+ Example:
1195
1316
 
1196
- Page.add_attribute_name("foo")
1317
+ ```python
1318
+ Page.add_attribute_name("foo")
1197
1319
 
1198
- page = Page.from_image(...)
1199
- print(page.foo)
1320
+ page = Page.from_image(...)
1321
+ print(page.foo)
1322
+ ```
1200
1323
 
1201
- Note, that the attribute must be registered as a valid `ObjectTypes`
1324
+ Note:
1325
+ The attribute must be registered as a valid `ObjectTypes`
1202
1326
 
1203
- :param attribute_name: attribute name to add
1327
+ Args:
1328
+ attribute_name: attribute name to add
1204
1329
  """
1205
1330
 
1206
1331
  attribute_name = get_type(attribute_name)
@@ -1215,14 +1340,17 @@ class Page(Image):
1215
1340
  ) -> Optional[Union[ImageDict, str]]:
1216
1341
  """
1217
1342
  Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
1218
- base64 encodings.
1219
- :param image_to_json: If `True` will save the image as b64 encoded string in output
1220
- :param highest_hierarchy_only: If True it will remove all image attributes of ImageAnnotations
1221
- :param path: Path to save the .json file to. If `None` results will be saved in the folder of the original
1222
- document.
1223
- :param dry: Will run dry, i.e. without saving anything but returning the dict
1343
+ `base64` encodings.
1224
1344
 
1225
- :return: optional dict
1345
+ Args:
1346
+ image_to_json: If `True` will save the image as b64 encoded string in output
1347
+ highest_hierarchy_only: If `True` it will remove all image attributes of `ImageAnnotation`s
1348
+ path: Path to save the `.json` file to. If `None` results will be saved in the folder of the original
1349
+ document.
1350
+ dry: Will run dry, i.e. without saving anything but returning the dict
1351
+
1352
+ Returns:
1353
+ optional dict
1226
1354
  """
1227
1355
  return self.image_orig.save(image_to_json, highest_hierarchy_only, path, dry)
1228
1356
 
@@ -1233,21 +1361,39 @@ class Page(Image):
1233
1361
  file_path: str,
1234
1362
  text_container: Optional[ObjectTypes] = None,
1235
1363
  floating_text_block_categories: Optional[list[ObjectTypes]] = None,
1364
+ residual_text_block_categories: Optional[Sequence[ObjectTypes]] = None,
1236
1365
  include_residual_text_container: bool = True,
1237
1366
  ) -> Page:
1238
- """Reading JSON file and building a `Page` object with given config.
1239
- :param file_path: Path to file
1240
- :param text_container: A LayoutType to get the text from. It will steer the output of `Layout.words`.
1241
- :param floating_text_block_categories: A list of top level layout objects
1242
- :param include_residual_text_container: This will regard synthetic text line annotations as floating text
1243
- blocks and therefore incorporate all image annotations of category
1244
- `word` when building text strings.
1367
+ """
1368
+ Reading JSON file and building a `Page` object with given config.
1369
+
1370
+ Args:
1371
+ file_path: Path to file
1372
+ text_container: A `LayoutType` to get the text from. It will steer the output of `Layout.words`.
1373
+ floating_text_block_categories: A list of top level layout objects
1374
+ residual_text_block_categories: A list of layout objects that are neither floating text blocks nor
1375
+ tables but should be accessible via `Page.residual_layouts`.
1376
+ include_residual_text_container: This will regard synthetic text line annotations as floating text
1377
+ blocks and therefore incorporate all image annotations of category
1378
+ `word` when building text strings.
1379
+
1380
+ Returns:
1381
+ A `Page` instance with all annotations as `ImageAnnotationBaseView` subclasses.
1245
1382
  """
1246
1383
  image = Image.from_file(file_path)
1247
- return cls.from_image(image, text_container, floating_text_block_categories, include_residual_text_container)
1384
+ return cls.from_image(
1385
+ image_orig=image,
1386
+ text_container=text_container,
1387
+ floating_text_block_categories=floating_text_block_categories,
1388
+ residual_text_block_categories=residual_text_block_categories,
1389
+ include_residual_text_container=include_residual_text_container,
1390
+ )
1248
1391
 
1249
1392
  def get_token(self) -> list[Mapping[str, str]]:
1250
- """Return a list of tuples with word and non default token tags"""
1393
+ """
1394
+ Returns:
1395
+ A list of tuples with word and non default token tags
1396
+ """
1251
1397
  block_with_order = self._order("layouts")
1252
1398
  all_words = []
1253
1399
  for block in block_with_order:
@@ -1263,5 +1409,6 @@ class Page(Image):
1263
1409
  self.image_orig,
1264
1410
  self.text_container,
1265
1411
  self.floating_text_block_categories,
1412
+ self.residual_text_block_categories,
1266
1413
  self.include_residual_text_container,
1267
1414
  )