deepdoctection 0.37.3__tar.gz → 0.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (154) hide show
  1. {deepdoctection-0.37.3 → deepdoctection-0.38}/PKG-INFO +12 -2
  2. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/analyzer/_config.py +2 -1
  4. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/analyzer/factory.py +9 -4
  5. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/configs/conf_dd_one.yaml +126 -85
  6. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/box.py +2 -4
  7. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/image.py +11 -4
  8. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/view.py +124 -36
  9. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/hfdetr.py +4 -3
  10. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/doctectionpipe.py +1 -1
  11. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/refine.py +6 -13
  12. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/segment.py +229 -46
  13. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/sub_layout.py +40 -22
  14. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection.egg-info/PKG-INFO +12 -2
  15. {deepdoctection-0.37.3 → deepdoctection-0.38}/setup.cfg +1 -1
  16. {deepdoctection-0.37.3 → deepdoctection-0.38}/setup.py +1 -0
  17. {deepdoctection-0.37.3 → deepdoctection-0.38}/LICENSE +0 -0
  18. {deepdoctection-0.37.3 → deepdoctection-0.38}/README.md +0 -0
  19. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/analyzer/__init__.py +0 -0
  20. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/analyzer/dd.py +0 -0
  21. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/configs/__init__.py +0 -0
  22. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  23. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/__init__.py +0 -0
  24. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/base.py +0 -0
  25. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/common.py +0 -0
  26. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/custom.py +0 -0
  27. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/custom_serialize.py +0 -0
  28. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/parallel_map.py +0 -0
  29. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/serialize.py +0 -0
  30. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/dataflow/stats.py +0 -0
  31. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/__init__.py +0 -0
  32. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/annotation.py +0 -0
  33. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datapoint/convert.py +0 -0
  34. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/__init__.py +0 -0
  35. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/adapter.py +0 -0
  36. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/base.py +0 -0
  37. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/dataflow_builder.py +0 -0
  38. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/info.py +0 -0
  39. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/__init__.py +0 -0
  40. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  41. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  42. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/funsd.py +0 -0
  43. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  44. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/layouttest.py +0 -0
  45. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/publaynet.py +0 -0
  46. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  47. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  48. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  49. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/xfund.py +0 -0
  50. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  51. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  52. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/registry.py +0 -0
  53. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/datasets/save.py +0 -0
  54. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/__init__.py +0 -0
  55. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/accmetric.py +0 -0
  56. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/base.py +0 -0
  57. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/cocometric.py +0 -0
  58. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/eval.py +0 -0
  59. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/registry.py +0 -0
  60. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/tedsmetric.py +0 -0
  61. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/eval/tp_eval_callback.py +0 -0
  62. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/__init__.py +0 -0
  63. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/base.py +0 -0
  64. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/d2detect.py +0 -0
  65. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/deskew.py +0 -0
  66. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/doctrocr.py +0 -0
  67. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/fastlang.py +0 -0
  68. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/hflayoutlm.py +0 -0
  69. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/hflm.py +0 -0
  70. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/model.py +0 -0
  71. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/pdftext.py +0 -0
  72. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/pt/__init__.py +0 -0
  73. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/pt/nms.py +0 -0
  74. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/pt/ptutils.py +0 -0
  75. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tessocr.py +0 -0
  76. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/texocr.py +0 -0
  77. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/__init__.py +0 -0
  78. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tfutils.py +0 -0
  79. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpcompat.py +0 -0
  80. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  81. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  82. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  83. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  84. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  85. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  86. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  87. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  88. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  89. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  90. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  91. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  92. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  93. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  94. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  95. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  96. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  97. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  98. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/extern/tpdetect.py +0 -0
  99. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/__init__.py +0 -0
  100. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/cats.py +0 -0
  101. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/cocostruct.py +0 -0
  102. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/d2struct.py +0 -0
  103. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/hfstruct.py +0 -0
  104. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/laylmstruct.py +0 -0
  105. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/maputils.py +0 -0
  106. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/match.py +0 -0
  107. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/misc.py +0 -0
  108. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/pascalstruct.py +0 -0
  109. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/prodigystruct.py +0 -0
  110. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/pubstruct.py +0 -0
  111. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/tpstruct.py +0 -0
  112. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/mapper/xfundstruct.py +0 -0
  113. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/__init__.py +0 -0
  114. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/anngen.py +0 -0
  115. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/base.py +0 -0
  116. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/common.py +0 -0
  117. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/concurrency.py +0 -0
  118. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/language.py +0 -0
  119. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/layout.py +0 -0
  120. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/lm.py +0 -0
  121. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/order.py +0 -0
  122. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/registry.py +0 -0
  123. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/text.py +0 -0
  124. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/pipe/transform.py +0 -0
  125. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/py.typed +0 -0
  126. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/train/__init__.py +0 -0
  127. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/train/d2_frcnn_train.py +0 -0
  128. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/train/hf_detr_train.py +0 -0
  129. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  130. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/train/tp_frcnn_train.py +0 -0
  131. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/__init__.py +0 -0
  132. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/concurrency.py +0 -0
  133. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/context.py +0 -0
  134. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/develop.py +0 -0
  135. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/env_info.py +0 -0
  136. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/error.py +0 -0
  137. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/file_utils.py +0 -0
  138. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/fs.py +0 -0
  139. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/identifier.py +0 -0
  140. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/logger.py +0 -0
  141. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/metacfg.py +0 -0
  142. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/mocks.py +0 -0
  143. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/pdf_utils.py +0 -0
  144. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/settings.py +0 -0
  145. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/tqdm.py +0 -0
  146. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/transform.py +0 -0
  147. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/types.py +0 -0
  148. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/utils.py +0 -0
  149. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection/utils/viz.py +0 -0
  150. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection.egg-info/SOURCES.txt +0 -0
  151. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection.egg-info/dependency_links.txt +0 -0
  152. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection.egg-info/requires.txt +0 -0
  153. {deepdoctection-0.37.3 → deepdoctection-0.38}/deepdoctection.egg-info/top_level.txt +0 -0
  154. {deepdoctection-0.37.3 → deepdoctection-0.38}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.37.3
3
+ Version: 0.38
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -127,6 +127,16 @@ Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
127
127
  Provides-Extra: test
128
128
  Requires-Dist: pytest==8.0.2; extra == "test"
129
129
  Requires-Dist: pytest-cov; extra == "test"
130
+ Dynamic: author
131
+ Dynamic: classifier
132
+ Dynamic: description
133
+ Dynamic: description-content-type
134
+ Dynamic: home-page
135
+ Dynamic: license
136
+ Dynamic: provides-extra
137
+ Dynamic: requires-dist
138
+ Dynamic: requires-python
139
+ Dynamic: summary
130
140
 
131
141
 
132
142
  <p align="center">
@@ -24,7 +24,7 @@ from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = "0.37.3"
27
+ __version__ = "0.38"
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -91,7 +91,8 @@ cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUM
91
91
  cfg.SEGMENTATION.CELL_NAMES = [CellType.HEADER, CellType.BODY, LayoutType.CELL]
92
92
  cfg.SEGMENTATION.ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
93
93
  cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
94
-
94
+ cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES = [CellType.COLUMN_HEADER, CellType.ROW_HEADER]
95
+ cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_THRESHOLDS = [0.6, 0.0001]
95
96
  cfg.SEGMENTATION.STRETCH_RULE = "equal"
96
97
 
97
98
  cfg.USE_TABLE_REFINEMENT = True
@@ -51,7 +51,7 @@ from ..pipe.transform import SimpleTransformService
51
51
  from ..utils.file_utils import detectron2_available
52
52
  from ..utils.fs import get_configs_dir_path
53
53
  from ..utils.metacfg import AttrDict
54
- from ..utils.settings import LayoutType, Relationships
54
+ from ..utils.settings import CellType, LayoutType, Relationships
55
55
  from ..utils.transform import PadTransform
56
56
 
57
57
  with try_import() as image_guard:
@@ -264,14 +264,17 @@ class ServiceFactory:
264
264
  :param mode: either `LAYOUT`,`CELL` or `ITEM`
265
265
  :return: `SubImageLayoutService` instance
266
266
  """
267
- exclude_category_ids = []
267
+ exclude_category_names = []
268
268
  padder = None
269
269
  if mode == "ITEM":
270
270
  if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
271
- exclude_category_ids.extend([1, 3, 4, 5, 6])
271
+ exclude_category_names.extend(
272
+ [LayoutType.TABLE, CellType.COLUMN_HEADER, CellType.PROJECTED_ROW_HEADER, CellType.SPANNING]
273
+ )
272
274
  padder = ServiceFactory.build_padder(config, mode)
273
275
  detect_result_generator = DetectResultGenerator(
274
- categories=detector.categories.categories, exclude_category_ids=exclude_category_ids
276
+ categories_name_as_key=detector.categories.get_categories(as_dict=True, name_as_key=True),
277
+ exclude_category_names=exclude_category_names,
275
278
  )
276
279
  return SubImageLayoutService(
277
280
  sub_image_detector=detector,
@@ -399,6 +402,8 @@ class ServiceFactory:
399
402
  spanning_cell_names=config.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES,
400
403
  item_names=config.SEGMENTATION.PUBTABLES_ITEM_NAMES,
401
404
  sub_item_names=config.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES,
405
+ item_header_cell_names=config.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES,
406
+ item_header_thresholds=config.SEGMENTATION.PUBTABLES_ITEM_HEADER_THRESHOLDS,
402
407
  stretch_rule=config.SEGMENTATION.STRETCH_RULE,
403
408
  )
404
409
 
@@ -1,104 +1,145 @@
1
- USE_ROTATOR: False
2
- USE_LAYOUT: True
3
- USE_TABLE_SEGMENTATION: True
4
- TF:
5
- LAYOUT:
6
- WEIGHTS: layout/model-800000_inf_only.data-00000-of-00001
7
- FILTER:
1
+ DEVICE: null
2
+ LANGUAGE: null
3
+ LAYOUT_LINK:
4
+ CHILD_CATEGORIES: []
5
+ PARENTAL_CATEGORIES: []
6
+ LAYOUT_NMS_PAIRS:
7
+ COMBINATIONS: null
8
+ PRIORITY: null
9
+ THRESHOLDS: null
10
+ LIB: null
11
+ OCR:
12
+ CONFIG:
13
+ TESSERACT: dd/conf_tesseract.yaml
14
+ USE_DOCTR: false
15
+ USE_TESSERACT: true
16
+ USE_TEXTRACT: false
17
+ WEIGHTS:
18
+ DOCTR_RECOGNITION:
19
+ PT: doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt
20
+ TF: doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip
21
+ DOCTR_WORD:
22
+ PT: doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt
23
+ TF: doctr/db_resnet50/tf/db_resnet50-adcafc63.zip
24
+ PDF_MINER:
25
+ X_TOLERANCE: 3
26
+ Y_TOLERANCE: 3
27
+ PT:
8
28
  CELL:
9
- WEIGHTS: cell/model-1800000_inf_only.data-00000-of-00001
10
- FILTER:
29
+ FILTER: null
30
+ WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
31
+ WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
11
32
  ITEM:
12
- WEIGHTS: item/model-1620000_inf_only.data-00000-of-00001
13
- FILTER:
14
- PT:
15
- LAYOUT:
16
- WEIGHTS: layout/d2_model_0829999_layout_inf_only.pt
17
- WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
18
- FILTER:
33
+ FILTER: null
19
34
  PAD:
20
- TOP: 60
21
- RIGHT: 60
22
35
  BOTTOM: 60
23
36
  LEFT: 60
24
- ITEM:
37
+ RIGHT: 60
38
+ TOP: 60
25
39
  WEIGHTS: item/d2_model_1639999_item_inf_only.pt
26
40
  WEIGHTS_TS: item/d2_model_1639999_item_inf_only.ts
27
- FILTER:
41
+ LAYOUT:
42
+ FILTER: null
28
43
  PAD:
29
- TOP: 60
30
- RIGHT: 60
31
44
  BOTTOM: 60
32
45
  LEFT: 60
33
- CELL:
34
- WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
35
- WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
36
- FILTER:
37
- LAYOUT_NMS_PAIRS:
38
- COMBINATIONS:
39
- THRESHOLDS:
40
- PRIORITY:
46
+ RIGHT: 60
47
+ TOP: 60
48
+ WEIGHTS: layout/d2_model_0829999_layout_inf_only.pt
49
+ WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
41
50
  SEGMENTATION:
42
51
  ASSIGNMENT_RULE: ioa
43
- THRESHOLD_ROWS: 0.4
44
- THRESHOLD_COLS: 0.4
45
- FULL_TABLE_TILING: True
46
- REMOVE_IOU_THRESHOLD_ROWS: 0.001
47
- REMOVE_IOU_THRESHOLD_COLS: 0.001
48
52
  CELL_CATEGORY_ID: 12
49
- STRETCH_RULE: equal
50
- USE_TABLE_REFINEMENT: True
51
- USE_PDF_MINER: False
52
- PDF_MINER:
53
- X_TOLERANCE: 3
54
- Y_TOLERANCE: 3
55
- USE_OCR: True
56
- OCR:
57
- USE_TESSERACT: True
58
- USE_DOCTR: False
59
- USE_TEXTRACT: False
60
- CONFIG:
61
- TESSERACT: dd/conf_tesseract.yaml
62
- WEIGHTS:
63
- DOCTR_WORD:
64
- TF: doctr/db_resnet50/tf/db_resnet50-adcafc63.zip
65
- PT: doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt
66
- DOCTR_RECOGNITION:
67
- TF: doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip
68
- PT: doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt
69
- WORD_MATCHING:
70
- PARENTAL_CATEGORIES:
71
- - text
72
- - title
73
- - list
74
- - cell
53
+ CELL_NAMES:
54
+ - header
55
+ - body
56
+ - cell
57
+ FULL_TABLE_TILING: true
58
+ ITEM_NAMES:
59
+ - row
60
+ - column
61
+ PUBTABLES_CELL_NAMES:
62
+ - spanning
63
+ - row_header
64
+ - column_header
65
+ - projected_row_header
66
+ - cell
67
+ PUBTABLES_ITEM_NAMES:
68
+ - row
69
+ - column
70
+ PUBTABLES_SPANNING_CELL_NAMES:
71
+ - spanning
72
+ - row_header
73
+ - column_header
74
+ - projected_row_header
75
+ PUBTABLES_SUB_ITEM_NAMES:
76
+ - row_number
77
+ - column_number
78
+ PUBTABLES_ITEM_HEADER_CELL_NAMES:
75
79
  - column_header
76
- - projected_row_header
77
- - spanning
78
80
  - row_header
79
- RULE: ioa
80
- THRESHOLD: 0.6
81
- MAX_PARENT_ONLY: True
81
+ PUBTABLES_ITEM_HEADER_THRESHOLDS:
82
+ - 0.6
83
+ - 0.0001
84
+ REMOVE_IOU_THRESHOLD_COLS: 0.001
85
+ REMOVE_IOU_THRESHOLD_ROWS: 0.001
86
+ STRETCH_RULE: equal
87
+ SUB_ITEM_NAMES:
88
+ - row_number
89
+ - column_number
90
+ TABLE_NAME: table
91
+ THRESHOLD_COLS: 0.4
92
+ THRESHOLD_ROWS: 0.4
93
+ TEXT_CONTAINER: word
82
94
  TEXT_ORDERING:
83
- TEXT_BLOCK_CATEGORIES:
84
- - title
85
- - text
86
- - list
87
- - cell
88
- - column_header
89
- - projected_row_header
90
- - spanning
91
- - row_header
92
- FLOATING_TEXT_BLOCK_CATEGORIES:
93
- - title
94
- - text
95
- - list
96
- INCLUDE_RESIDUAL_TEXT_CONTAINER: False
97
- STARTING_POINT_TOLERANCE: 0.005
98
95
  BROKEN_LINE_TOLERANCE: 0.003
96
+ FLOATING_TEXT_BLOCK_CATEGORIES:
97
+ - text
98
+ - title
99
+ - figure
100
+ - list
99
101
  HEIGHT_TOLERANCE: 2.0
102
+ INCLUDE_RESIDUAL_TEXT_CONTAINER: false
100
103
  PARAGRAPH_BREAK: 0.035
101
- USE_LAYOUT_LINK: False
102
- LAYOUT_LINK:
103
- PARENTAL_CATEGORIES:
104
- CHILD_CATEGORIES:
104
+ STARTING_POINT_TOLERANCE: 0.005
105
+ TEXT_BLOCK_CATEGORIES:
106
+ - text
107
+ - title
108
+ - list
109
+ - cell
110
+ - figure
111
+ - column_header
112
+ - projected_row_header
113
+ - spanning
114
+ - row_header
115
+ TF:
116
+ CELL:
117
+ FILTER: null
118
+ WEIGHTS: cell/model-1800000_inf_only.data-00000-of-00001
119
+ ITEM:
120
+ FILTER: null
121
+ WEIGHTS: item/model-1620000_inf_only.data-00000-of-00001
122
+ LAYOUT:
123
+ FILTER: null
124
+ WEIGHTS: layout/model-800000_inf_only.data-00000-of-00001
125
+ USE_LAYOUT: true
126
+ USE_LAYOUT_LINK: false
127
+ USE_LAYOUT_NMS: false
128
+ USE_OCR: true
129
+ USE_PDF_MINER: false
130
+ USE_ROTATOR: false
131
+ USE_TABLE_REFINEMENT: true
132
+ USE_TABLE_SEGMENTATION: true
133
+ WORD_MATCHING:
134
+ MAX_PARENT_ONLY: true
135
+ PARENTAL_CATEGORIES:
136
+ - text
137
+ - title
138
+ - list
139
+ - cell
140
+ - column_header
141
+ - projected_row_header
142
+ - spanning
143
+ - row_header
144
+ RULE: ioa
145
+ THRESHOLD: 0.6
@@ -491,10 +491,8 @@ def global_to_local_coords(global_box: BoundingBox, embedding_box: BoundingBox)
491
491
 
492
492
  def merge_boxes(*boxes: BoundingBox) -> BoundingBox:
493
493
  """
494
- Generating the smallest box containing an arbitrary tuple/list of boxes. This function is only implemented for boxes
495
- with absolute coords = "True".
496
-
497
- :param boxes: An arbitrary tuple/list of bounding boxes `BoundingBox` all having absolute_coords="True".
494
+ Generating the smallest box containing an arbitrary tuple/list of boxes.
495
+ :param boxes: An arbitrary tuple/list of bounding boxes `BoundingBox`.
498
496
  """
499
497
  absolute_coords = boxes[0].absolute_coords
500
498
  assert all(box.absolute_coords == absolute_coords for box in boxes), "all boxes must have same absolute_coords"
@@ -428,7 +428,7 @@ class Image:
428
428
  A list of attributes to suspend from as_dict creation.
429
429
  """
430
430
 
431
- return ["_image", "_annotation_ids", "_category_name"]
431
+ return ["_annotation_ids", "_category_name"]
432
432
 
433
433
  def define_annotation_id(self, annotation: Annotation) -> str:
434
434
  """
@@ -572,24 +572,31 @@ class Image:
572
572
  ann = self.get_annotation(annotation_ids=annotation_id)[0]
573
573
  if ann.image is None:
574
574
  raise ImageError("When adding sub images to ImageAnnotation then ImageAnnotation.image must not be None")
575
- assert ann.bounding_box is not None
576
- box = ann.bounding_box.to_list("xyxy")
575
+ box = ann.get_bounding_box(self.image_id).to_list("xyxy")
577
576
  proposals = self.get_annotation(category_names)
578
577
  points = np.array([prop.get_bounding_box(self.image_id).center for prop in proposals])
578
+ if not points.size:
579
+ return
579
580
  ann_ids = np.array([prop.annotation_id for prop in proposals])
580
581
  indices = np.where(
581
582
  (box[0] < points[:, 0]) & (box[1] < points[:, 1]) & (box[2] > points[:, 0]) & (box[3] > points[:, 1])
582
583
  )[0]
583
584
  selected_ids = ann_ids[indices]
584
585
  sub_images = self.get_annotation(annotation_ids=selected_ids.tolist())
586
+ ann_box = ann.get_bounding_box(self.image_id)
587
+ if not ann_box.absolute_coords:
588
+ ann_box = ann_box.transform(self.width, self.height, absolute_coords=True)
585
589
  for sub_image in sub_images:
586
590
  if sub_image.image is None:
587
591
  raise ImageError(
588
592
  "When setting an embedding to ImageAnnotation then ImageAnnotation.image must not be None"
589
593
  )
594
+ sub_image_box = sub_image.get_bounding_box(self.image_id)
595
+ if not sub_image_box.absolute_coords:
596
+ sub_image_box = sub_image_box.transform(self.width, self.height, absolute_coords=True)
590
597
  sub_image.image.set_embedding(
591
598
  annotation_id,
592
- global_to_local_coords(sub_image.get_bounding_box(self.image_id), ann.get_bounding_box(self.image_id)),
599
+ global_to_local_coords(sub_image_box, ann_box),
593
600
  )
594
601
  ann.image.dump(sub_image)
595
602
 
@@ -28,7 +28,7 @@ import numpy as np
28
28
  from typing_extensions import LiteralString
29
29
 
30
30
  from ..utils.error import AnnotationError, ImageError
31
- from ..utils.logger import LoggingRecord, logger
31
+ from ..utils.logger import LoggingRecord, log_once, logger
32
32
  from ..utils.settings import (
33
33
  CellType,
34
34
  LayoutType,
@@ -282,25 +282,103 @@ class Table(Layout):
282
282
  """
283
283
 
284
284
  @property
285
- def cells(self) -> list[ImageAnnotationBaseView]:
285
+ def cells(self) -> list[Cell]:
286
286
  """
287
287
  A list of a table cells.
288
288
  """
289
289
  all_relation_ids = self.get_relationship(Relationships.CHILD)
290
- cell_anns = self.base_page.get_annotation(
290
+ cell_anns: list[Cell] = self.base_page.get_annotation( # type: ignore
291
291
  annotation_ids=all_relation_ids,
292
292
  category_names=[
293
293
  LayoutType.CELL,
294
294
  CellType.HEADER,
295
295
  CellType.BODY,
296
- CellType.PROJECTED_ROW_HEADER,
297
296
  CellType.SPANNING,
298
- CellType.ROW_HEADER,
299
- CellType.COLUMN_HEADER,
300
297
  ],
301
298
  )
302
299
  return cell_anns
303
300
 
301
+ @property
302
+ def column_header_cells(self) -> list[Cell]:
303
+ """
304
+ Retrieve a list of cells that are column headers in the table.
305
+
306
+ This property filters and sorts the cells in the table to return only those that are column headers.
307
+ The cells are sorted by their column number.
308
+
309
+ :return: A list of `Cell` objects that are column headers.
310
+ """
311
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
312
+ all_cells: list[Cell] = self.base_page.get_annotation( # type: ignore
313
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
314
+ )
315
+ headers = list(filter(lambda cell: CellType.COLUMN_HEADER in cell.sub_categories, all_cells))
316
+ headers.sort(key=lambda x: x.column_number) # type: ignore
317
+ return headers
318
+
319
+ @property
320
+ def row_header_cells(self) -> list[Cell]:
321
+ """
322
+ Retrieve a list of cells that are row headers in the table.
323
+
324
+ This property filters and sorts the cells in the table to return only those that are row headers.
325
+ The cells are sorted by their column number.
326
+
327
+ :return: A list of `Cell` objects that are row headers.
328
+ """
329
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
330
+ all_cells: list[Cell] = self.base_page.get_annotation( # type: ignore
331
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
332
+ )
333
+ row_header_cells = list(filter(lambda cell: CellType.ROW_HEADER in cell.sub_categories, all_cells))
334
+ row_header_cells.sort(key=lambda x: x.column_number) # type: ignore
335
+ return row_header_cells
336
+
337
+ def kv_header_rows(self, row_number: int) -> Mapping[str, str]:
338
+ """
339
+ For a given row number, returns a dictionary mapping column headers to cell values in that row.
340
+
341
+ This method retrieves all cells in the specified row and matches them with their corresponding column headers.
342
+ It then creates a key-value pair where the key is a tuple containing the column number and header text,
343
+ and the value is the cell text.
344
+
345
+ :param row_number: The row number for which to retrieve the key-value pairs.
346
+ :return: A dictionary where keys are tuples of (column number, header text) and values are cell texts.
347
+
348
+ Example:
349
+ If the table has the following structure:
350
+ | Header1 | Header2 |
351
+ |---------|---------|
352
+ | Value1 | Value2 |
353
+ | Value3 | Value4 |
354
+
355
+ Calling kv_header_rows(1) would return:
356
+ {
357
+ (1, 'Header1'): 'Value1',
358
+ (2, 'Header2'): 'Value2'
359
+ }
360
+ """
361
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
362
+ all_cells = self.base_page.get_annotation(
363
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
364
+ )
365
+ row_cells = list(
366
+ filter(
367
+ lambda c: row_number in (c.row_number, c.row_number + c.row_span), all_cells # type: ignore
368
+ )
369
+ )
370
+ row_cells.sort(key=lambda c: c.column_number) # type: ignore
371
+ column_header_cells = self.column_header_cells
372
+
373
+ kv_dict: Mapping[str, str] = {}
374
+ for cell in row_cells:
375
+ for header in column_header_cells:
376
+ if (cell.column_number == header.column_number and # type: ignore
377
+ cell.annotation_id != header.annotation_id): # type: ignore
378
+ kv_dict[(header.column_number, header.text)] = cell.text # type: ignore
379
+ break
380
+ return kv_dict
381
+
304
382
  @property
305
383
  def rows(self) -> list[ImageAnnotationBaseView]:
306
384
  """
@@ -335,7 +413,7 @@ class Table(Layout):
335
413
  try:
336
414
  html_index = html_list.index(cell.annotation_id)
337
415
  html_list.pop(html_index)
338
- html_list.insert(html_index, cell.text) # type: ignore
416
+ html_list.insert(html_index, cell.text)
339
417
  except ValueError:
340
418
  logger.warning(LoggingRecord("html construction not possible", {"annotation_id": cell.annotation_id}))
341
419
 
@@ -357,6 +435,12 @@ class Table(Layout):
357
435
  cells = self.cells
358
436
  table_list = [["" for _ in range(self.number_of_columns)] for _ in range(self.number_of_rows)] # type: ignore
359
437
  for cell in cells:
438
+ if cell.category_name == CellType.SPANNING:
439
+ log_once(
440
+ "Table has spanning cells. This implies, that the .csv output will not be correct."
441
+ "To prevent spanning cell table creation set PT.ITEM.FILTER=['table','spanning'] ",
442
+ "error",
443
+ )
360
444
  table_list[cell.row_number - 1][cell.column_number - 1] = ( # type: ignore
361
445
  table_list[cell.row_number - 1][cell.column_number - 1] + cell.text + " " # type: ignore
362
446
  )
@@ -386,13 +470,13 @@ class Table(Layout):
386
470
  token_class_ids: list[str] = []
387
471
  token_tag_ids: list[str] = []
388
472
  for cell in cells:
389
- text.extend(cell.text_["text"]) # type: ignore
390
- words.extend(cell.text_["words"]) # type: ignore
391
- ann_ids.extend(cell.text_["ann_ids"]) # type: ignore
392
- token_classes.extend(cell.text_["token_classes"]) # type: ignore
393
- token_tags.extend(cell.text_["token_tags"]) # type: ignore
394
- token_class_ids.extend(cell.text_["token_class_ids"]) # type: ignore
395
- token_tag_ids.extend(cell.text_["token_tag_ids"]) # type: ignore
473
+ text.extend(cell.text_["text"])
474
+ words.extend(cell.text_["words"])
475
+ ann_ids.extend(cell.text_["ann_ids"])
476
+ token_classes.extend(cell.text_["token_classes"])
477
+ token_tags.extend(cell.text_["token_tags"])
478
+ token_class_ids.extend(cell.text_["token_class_ids"])
479
+ token_tag_ids.extend(cell.text_["token_tag_ids"])
396
480
  return {
397
481
  "text": " ".join(text),
398
482
  "words": words,
@@ -414,7 +498,7 @@ class Table(Layout):
414
498
  if not cells:
415
499
  return super().words
416
500
  for cell in cells:
417
- all_words.extend(cell.words) # type: ignore
501
+ all_words.extend(cell.words)
418
502
  return all_words
419
503
 
420
504
  def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
@@ -424,7 +508,7 @@ class Table(Layout):
424
508
  all_words = []
425
509
  cells.sort(key=lambda x: (x.ROW_NUMBER, x.COLUMN_NUMBER))
426
510
  for cell in cells:
427
- all_words.extend(cell.get_ordered_words()) # type: ignore
511
+ all_words.extend(cell.get_ordered_words())
428
512
  return all_words
429
513
  except (TypeError, AnnotationError):
430
514
  return super().get_ordered_words()
@@ -436,10 +520,10 @@ IMAGE_ANNOTATION_TO_LAYOUTS: dict[ObjectTypes, Type[Union[Layout, Table, Word]]]
436
520
  LayoutType.TABLE_ROTATED: Table,
437
521
  LayoutType.WORD: Word,
438
522
  LayoutType.CELL: Cell,
439
- CellType.PROJECTED_ROW_HEADER: Cell,
440
523
  CellType.SPANNING: Cell,
441
524
  CellType.ROW_HEADER: Cell,
442
525
  CellType.COLUMN_HEADER: Cell,
526
+ CellType.PROJECTED_ROW_HEADER: Cell,
443
527
  }
444
528
 
445
529
 
@@ -465,10 +549,7 @@ IMAGE_DEFAULTS: ImageDefaults = {
465
549
  LayoutType.LIST,
466
550
  LayoutType.CELL,
467
551
  LayoutType.FIGURE,
468
- CellType.COLUMN_HEADER,
469
- CellType.PROJECTED_ROW_HEADER,
470
552
  CellType.SPANNING,
471
- CellType.ROW_HEADER,
472
553
  ),
473
554
  }
474
555
 
@@ -851,6 +932,16 @@ class Page(Image):
851
932
  """
852
933
  return self._make_text(False)
853
934
 
935
+ def _ann_viz_bbox(self, ann: ImageAnnotationBaseView) -> list[float]:
936
+ """
937
+ Get the bounding box as list and in absolute coordinates of the base page.
938
+ """
939
+ bounding_box = ann.get_bounding_box(self.image_id)
940
+
941
+ if not bounding_box.absolute_coords:
942
+ bounding_box = bounding_box.transform(self.width, self.height, absolute_coords=True)
943
+ return bounding_box.to_list(mode="xyxy")
944
+
854
945
  @no_type_check
855
946
  def viz(
856
947
  self,
@@ -886,6 +977,7 @@ class Page(Image):
886
977
  :param show_tables: Will display all tables boxes as well as cells, rows and columns
887
978
  :param show_layouts: Will display all other layout components.
888
979
  :param show_figures: Will display all figures
980
+ :param show_residual_layouts: Will display all residual layouts
889
981
  :param show_cells: Will display cells within tables. (Only available if `show_tables=True`)
890
982
  :param show_table_structure: Will display rows and columns
891
983
  :param show_words: Will display bounding boxes around words labeled with token class and bio tag (experimental)
@@ -910,50 +1002,46 @@ class Page(Image):
910
1002
  if debug_kwargs:
911
1003
  anns = self.get_annotation(category_names=list(debug_kwargs.keys()))
912
1004
  for ann in anns:
913
- box_stack.append(ann.bbox)
1005
+ box_stack.append(self._ann_viz_bbox(ann))
914
1006
  category_names_list.append(str(getattr(ann, debug_kwargs[ann.category_name])))
915
1007
 
916
1008
  if show_layouts and not debug_kwargs:
917
1009
  for item in self.layouts:
918
- box_stack.append(item.bbox)
1010
+ box_stack.append(self._ann_viz_bbox(item))
919
1011
  category_names_list.append(item.category_name.value)
920
1012
 
921
1013
  if show_figures and not debug_kwargs:
922
1014
  for item in self.figures:
923
- box_stack.append(item.bbox)
1015
+ box_stack.append(self._ann_viz_bbox(item))
924
1016
  category_names_list.append(item.category_name.value)
925
1017
 
926
1018
  if show_tables and not debug_kwargs:
927
1019
  for table in self.tables:
928
- box_stack.append(table.bbox)
1020
+ box_stack.append(self._ann_viz_bbox(table))
929
1021
  category_names_list.append(LayoutType.TABLE.value)
930
1022
  if show_cells:
931
1023
  for cell in table.cells:
932
1024
  if cell.category_name in {
933
1025
  LayoutType.CELL,
934
- CellType.PROJECTED_ROW_HEADER,
935
1026
  CellType.SPANNING,
936
- CellType.ROW_HEADER,
937
- CellType.COLUMN_HEADER,
938
1027
  }:
939
1028
  cells_found = True
940
- box_stack.append(cell.bbox)
1029
+ box_stack.append(self._ann_viz_bbox(cell))
941
1030
  category_names_list.append(None)
942
1031
  if show_table_structure:
943
1032
  rows = table.rows
944
1033
  cols = table.columns
945
1034
  for row in rows:
946
- box_stack.append(row.bbox)
1035
+ box_stack.append(self._ann_viz_bbox(row))
947
1036
  category_names_list.append(None)
948
1037
  for col in cols:
949
- box_stack.append(col.bbox)
1038
+ box_stack.append(self._ann_viz_bbox(col))
950
1039
  category_names_list.append(None)
951
1040
 
952
1041
  if show_cells and not cells_found and not debug_kwargs:
953
- for ann in self.annotations:
954
- if isinstance(ann, Cell) and ann.active:
955
- box_stack.append(ann.bbox)
956
- category_names_list.append(None)
1042
+ for ann in self.get_annotation(category_names=[LayoutType.CELL, CellType.SPANNING]):
1043
+ box_stack.append(self._ann_viz_bbox(ann))
1044
+ category_names_list.append(None)
957
1045
 
958
1046
  if show_words and not debug_kwargs:
959
1047
  all_words = []
@@ -965,7 +1053,7 @@ class Page(Image):
965
1053
  all_words = self.get_annotation(category_names=LayoutType.WORD)
966
1054
  if not ignore_default_token_class:
967
1055
  for word in all_words:
968
- box_stack.append(word.bbox)
1056
+ box_stack.append(self._ann_viz_bbox(word))
969
1057
  if show_token_class:
970
1058
  category_names_list.append(word.token_class.value if word.token_class is not None else None)
971
1059
  else:
@@ -973,7 +1061,7 @@ class Page(Image):
973
1061
  else:
974
1062
  for word in all_words:
975
1063
  if word.token_class is not None and word.token_class != TokenClasses.OTHER:
976
- box_stack.append(word.bbox)
1064
+ box_stack.append(self._ann_viz_bbox(word))
977
1065
  if show_token_class:
978
1066
  category_names_list.append(word.token_class.value if word.token_class is not None else None)
979
1067
  else: