deepdoctection 0.43.2__tar.gz → 0.43.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show
  1. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/PKG-INFO +1 -1
  2. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/analyzer/config.py +1 -0
  4. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/image.py +2 -2
  5. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/view.py +45 -6
  6. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/base.py +2 -0
  7. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/doctrocr.py +1 -4
  8. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/xfundstruct.py +1 -1
  9. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/anngen.py +14 -1
  10. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/order.py +7 -8
  11. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/develop.py +0 -1
  12. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/settings.py +2 -1
  13. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection.egg-info/PKG-INFO +1 -1
  14. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/LICENSE +0 -0
  15. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/README.md +0 -0
  16. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/analyzer/__init__.py +0 -0
  17. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/analyzer/dd.py +0 -0
  18. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/analyzer/factory.py +0 -0
  19. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/configs/__init__.py +0 -0
  20. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  21. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  22. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/configs/profiles.jsonl +0 -0
  23. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/__init__.py +0 -0
  24. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/base.py +0 -0
  25. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/common.py +0 -0
  26. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/custom.py +0 -0
  27. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/custom_serialize.py +0 -0
  28. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/parallel_map.py +0 -0
  29. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/serialize.py +0 -0
  30. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/dataflow/stats.py +0 -0
  31. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/__init__.py +0 -0
  32. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/annotation.py +0 -0
  33. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/box.py +0 -0
  34. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datapoint/convert.py +0 -0
  35. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/__init__.py +0 -0
  36. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/adapter.py +0 -0
  37. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/base.py +0 -0
  38. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/dataflow_builder.py +0 -0
  39. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/info.py +0 -0
  40. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/__init__.py +0 -0
  41. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  42. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  43. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/funsd.py +0 -0
  44. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  45. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/layouttest.py +0 -0
  46. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/publaynet.py +0 -0
  47. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  48. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  49. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  50. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/xfund.py +0 -0
  51. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  52. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  53. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/registry.py +0 -0
  54. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/datasets/save.py +0 -0
  55. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/__init__.py +0 -0
  56. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/accmetric.py +0 -0
  57. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/base.py +0 -0
  58. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/cocometric.py +0 -0
  59. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/eval.py +0 -0
  60. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/registry.py +0 -0
  61. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/tedsmetric.py +0 -0
  62. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/eval/tp_eval_callback.py +0 -0
  63. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/__init__.py +0 -0
  64. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/d2detect.py +0 -0
  65. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/deskew.py +0 -0
  66. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/fastlang.py +0 -0
  67. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/hfdetr.py +0 -0
  68. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/hflayoutlm.py +0 -0
  69. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/hflm.py +0 -0
  70. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/model.py +0 -0
  71. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/pdftext.py +0 -0
  72. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/pt/__init__.py +0 -0
  73. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/pt/nms.py +0 -0
  74. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/pt/ptutils.py +0 -0
  75. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tessocr.py +0 -0
  76. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/texocr.py +0 -0
  77. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/__init__.py +0 -0
  78. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tfutils.py +0 -0
  79. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpcompat.py +0 -0
  80. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  81. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  82. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  83. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  84. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  85. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  86. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  87. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  88. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  89. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  90. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  91. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  92. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  93. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  94. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  95. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  96. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  97. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  98. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/extern/tpdetect.py +0 -0
  99. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/__init__.py +0 -0
  100. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/cats.py +0 -0
  101. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/cocostruct.py +0 -0
  102. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/d2struct.py +0 -0
  103. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/hfstruct.py +0 -0
  104. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/laylmstruct.py +0 -0
  105. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/maputils.py +0 -0
  106. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/match.py +0 -0
  107. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/misc.py +0 -0
  108. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/pascalstruct.py +0 -0
  109. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/prodigystruct.py +0 -0
  110. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/pubstruct.py +0 -0
  111. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/mapper/tpstruct.py +0 -0
  112. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/__init__.py +0 -0
  113. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/base.py +0 -0
  114. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/common.py +0 -0
  115. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/concurrency.py +0 -0
  116. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/doctectionpipe.py +0 -0
  117. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/language.py +0 -0
  118. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/layout.py +0 -0
  119. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/lm.py +0 -0
  120. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/refine.py +0 -0
  121. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/registry.py +0 -0
  122. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/segment.py +0 -0
  123. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/sub_layout.py +0 -0
  124. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/text.py +0 -0
  125. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/pipe/transform.py +0 -0
  126. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/py.typed +0 -0
  127. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/train/__init__.py +0 -0
  128. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/train/d2_frcnn_train.py +0 -0
  129. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/train/hf_detr_train.py +0 -0
  130. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  131. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/train/tp_frcnn_train.py +0 -0
  132. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/__init__.py +0 -0
  133. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/concurrency.py +0 -0
  134. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/context.py +0 -0
  135. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/env_info.py +0 -0
  136. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/error.py +0 -0
  137. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/file_utils.py +0 -0
  138. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/fs.py +0 -0
  139. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/identifier.py +0 -0
  140. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/logger.py +0 -0
  141. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/metacfg.py +0 -0
  142. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/mocks.py +0 -0
  143. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/pdf_utils.py +0 -0
  144. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/tqdm.py +0 -0
  145. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/transform.py +0 -0
  146. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/types.py +0 -0
  147. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/utils.py +0 -0
  148. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection/utils/viz.py +0 -0
  149. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection.egg-info/requires.txt +0 -0
  152. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/setup.cfg +0 -0
  154. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/setup.py +0 -0
  155. {deepdoctection-0.43.2 → deepdoctection-0.43.4}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.2
3
+ Version: 0.43.4
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43.2"
28
+ __version__ = "0.43.4"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -903,6 +903,7 @@ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
903
903
  # This prevents accidental modification at runtime.
904
904
  cfg.freeze()
905
905
 
906
+
906
907
  def update_cfg_from_defaults() -> None:
907
908
  """
908
909
  Update the configuration with current values from IMAGE_DEFAULTS.
@@ -479,8 +479,8 @@ class Image:
479
479
 
480
480
  def remove(
481
481
  self,
482
- annotation_ids: Optional[Union[str, list[str]]] = None,
483
- service_ids: Optional[Union[str, list[str]]] = None,
482
+ annotation_ids: Optional[Union[str, Sequence[str]]] = None,
483
+ service_ids: Optional[Union[str, Sequence[str]]] = None,
484
484
  ) -> None:
485
485
  """
486
486
  Instead of removing consider deactivating annotations.
@@ -72,6 +72,18 @@ class ImageAnnotationBaseView(ImageAnnotation):
72
72
 
73
73
  base_page: Page
74
74
 
75
+ @property
76
+ def b64_image(self) -> Optional[str]:
77
+ """
78
+ Returns:
79
+ The base64 encoded image of the page if available, otherwise None.
80
+ """
81
+
82
+ if self.image is not None:
83
+ if self.image.image is not None:
84
+ return viz_handler.convert_np_to_b64(self.image.image)
85
+ return None
86
+
75
87
  @property
76
88
  def bbox(self) -> list[float]:
77
89
  """
@@ -157,7 +169,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
157
169
  """
158
170
 
159
171
  # sub categories and summary sub categories are valid attribute names
160
- attr_names = {"bbox", "np_image"}.union({cat.value for cat in self.sub_categories})
172
+ attr_names = {"bbox", "np_image", "b64_image"}.union({cat.value for cat in self.sub_categories})
161
173
  if self.image:
162
174
  attr_names = attr_names.union({cat.value for cat in self.image.summary.sub_categories.keys()})
163
175
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
@@ -183,7 +195,7 @@ class Word(ImageAnnotationBaseView):
183
195
  attr_names = (
184
196
  set(WordType)
185
197
  .union(super().get_attribute_names())
186
- .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
198
+ .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK})
187
199
  )
188
200
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
189
201
 
@@ -774,6 +786,7 @@ class Page(Image):
774
786
  "residual_layouts",
775
787
  "document_summary",
776
788
  "document_mapping",
789
+ "b64_image",
777
790
  }
778
791
  include_residual_text_container: bool = True
779
792
 
@@ -902,6 +915,17 @@ class Page(Image):
902
915
  """
903
916
  return self.get_annotation(category_names=self.residual_text_block_categories)
904
917
 
918
+ @property
919
+ def b64_image(self) -> Optional[str]:
920
+ """
921
+ Returns:
922
+ The base64 encoded image of the page if available, otherwise None.
923
+ """
924
+
925
+ if self.image_orig.image is not None:
926
+ return viz_handler.convert_np_to_b64(self.image_orig.image)
927
+ return None
928
+
905
929
  @classmethod
906
930
  def from_image(
907
931
  cls,
@@ -1390,19 +1414,34 @@ class Page(Image):
1390
1414
  include_residual_text_container=include_residual_text_container,
1391
1415
  )
1392
1416
 
1393
- def get_token(self) -> list[Mapping[str, str]]:
1417
+ def get_entities(self) -> list[Mapping[str, str]]:
1394
1418
  """
1395
1419
  Returns:
1396
- A list of tuples with word and non default token tags
1420
+ A list of dicts with the following structure:
1421
+
1422
+ ```python
1423
+ {
1424
+ "word": str, # word characters
1425
+ "entity": str, # token tag
1426
+ "annotation_id": str, # annotation id of the word
1427
+ "successor_annotation_id": Optional[str] # annotation_id of the successor word, if any
1428
+ }
1429
+ ```
1430
+
1397
1431
  """
1398
1432
  block_with_order = self._order("layouts")
1399
1433
  all_words = []
1400
1434
  for block in block_with_order:
1401
1435
  all_words.extend(block.get_ordered_words()) # type: ignore
1402
1436
  return [
1403
- {"word": word.CHARACTERS, "entity": word.TOKEN_TAG}
1437
+ {
1438
+ "word": word.characters,
1439
+ "entity": word.token_tag.value,
1440
+ "annotation_id": word.annotation_id,
1441
+ "successor_annotation_id": word.successor[0].annotation_id if word.successor else None,
1442
+ }
1404
1443
  for word in all_words
1405
- if word.TOKEN_TAG not in (TokenClasses.OTHER, None)
1444
+ if word.token_tag not in (TokenClasses.OTHER, None)
1406
1445
  ]
1407
1446
 
1408
1447
  def __copy__(self) -> Page:
@@ -502,6 +502,7 @@ class TokenClassResult:
502
502
  semantic_name: semantic name
503
503
  bio_tag: bio tag
504
504
  score: prediction score
505
+ successor_uuid: uuid of the next token in the sequence
505
506
  """
506
507
 
507
508
  uuid: str
@@ -512,6 +513,7 @@ class TokenClassResult:
512
513
  bio_tag: ObjectTypes = DefaultType.DEFAULT_TYPE
513
514
  score: Optional[float] = None
514
515
  token_id: Optional[int] = None
516
+ successor_uuid: Optional[str] = None
515
517
 
516
518
 
517
519
  @dataclass
@@ -269,10 +269,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
269
269
  if self.lib == "PT":
270
270
  self.device = get_torch_device(device)
271
271
 
272
- self.doctr_predictor = self.get_wrapped_model(self.architecture,
273
- self.path_weights,
274
- self.device,
275
- self.lib)
272
+ self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
276
273
 
277
274
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
278
275
  """
@@ -200,7 +200,7 @@ def xfund_to_image(
200
200
  ann_ids.extend(entity_id_to_ann_id[linked_entity])
201
201
  for ann_id in ann_ids:
202
202
  if ann_id != word.annotation_id:
203
- word.dump_relationship(Relationships.SEMANTIC_ENTITY_LINK, ann_id)
203
+ word.dump_relationship(Relationships.LINK, ann_id)
204
204
 
205
205
  if mapping_context.context_error:
206
206
  return None
@@ -19,7 +19,7 @@
19
19
  Datapoint manager
20
20
  """
21
21
  from dataclasses import asdict
22
- from typing import Optional, Union
22
+ from typing import Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
25
 
@@ -372,6 +372,19 @@ class DatapointManager:
372
372
  return None
373
373
  return ann.annotation_id
374
374
 
375
+ def remove_annotations(self, annotation_ids: Sequence[str]) -> None:
376
+ """
377
+ Removes the annotation by the given `annotation_id`.
378
+
379
+ Args:
380
+ annotation_ids: The `annotation_id` to remove.
381
+ """
382
+ self.assert_datapoint_passed()
383
+ self.datapoint.remove(annotation_ids)
384
+ for ann_id in annotation_ids:
385
+ if ann_id in self._cache_anns:
386
+ self._cache_anns.pop(ann_id)
387
+
375
388
  def deactivate_annotation(self, annotation_id: str) -> None:
376
389
  """
377
390
  Deactivates the annotation by the given `annotation_id`.
@@ -801,14 +801,13 @@ class TextOrderService(TextLineServiceMixin):
801
801
  if self.include_residual_text_container:
802
802
  add_category.append(LayoutType.LINE)
803
803
 
804
- if set(self.floating_text_block_categories) <= set(
805
- self.text_block_categories + tuple(add_category)
806
- ):
807
- logger.warning("In most cases floating_text_block_categories must be a subset of text_block_categories. "
808
- "Adding categories to floating_text_block_categories, that do not belong to "
809
- "text_block_categories makes only sense for categories set have CHILD relationships with"
810
- " annotations that belong to text_block_categories.")
811
-
804
+ if set(self.floating_text_block_categories) <= set(self.text_block_categories + tuple(add_category)):
805
+ logger.warning(
806
+ "In most cases floating_text_block_categories must be a subset of text_block_categories. "
807
+ "Adding categories to floating_text_block_categories, that do not belong to "
808
+ "text_block_categories makes only sense for categories set have CHILD relationships with"
809
+ " annotations that belong to text_block_categories."
810
+ )
812
811
 
813
812
  def get_meta_annotation(self) -> MetaAnnotation:
814
813
  add_category = [self.text_container]
@@ -71,7 +71,6 @@ def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Option
71
71
  logger.info(LoggingRecord(f"[Deprecated] {info_msg}"))
72
72
 
73
73
 
74
-
75
74
  def deprecated(
76
75
  text: str = "", eos: str = "", max_num_warnings: Optional[int] = None
77
76
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
@@ -229,8 +229,9 @@ class Relationships(ObjectTypes):
229
229
 
230
230
  CHILD = "child"
231
231
  READING_ORDER = "reading_order"
232
- SEMANTIC_ENTITY_LINK = "semantic_entity_link"
232
+ LINK = "link"
233
233
  LAYOUT_LINK = "layout_link"
234
+ SUCCESSOR = "successor"
234
235
 
235
236
 
236
237
  @object_types_registry.register("Languages")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.2
3
+ Version: 0.43.4
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
File without changes