deepdoctection 0.39.1__tar.gz → 0.39.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (154) hide show
  1. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/PKG-INFO +1 -1
  2. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/analyzer/_config.py +1 -0
  4. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/analyzer/factory.py +5 -1
  5. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/image.py +18 -6
  6. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/view.py +29 -0
  7. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/registry.py +5 -1
  8. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/cats.py +4 -3
  9. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/base.py +2 -3
  10. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/common.py +1 -1
  11. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/lm.py +5 -6
  12. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/segment.py +8 -6
  13. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/train/hf_layoutlm_train.py +1 -3
  14. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection.egg-info/PKG-INFO +1 -1
  15. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/setup.py +0 -1
  16. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/LICENSE +0 -0
  17. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/README.md +0 -0
  18. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/analyzer/__init__.py +0 -0
  19. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/analyzer/dd.py +0 -0
  20. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/configs/__init__.py +0 -0
  21. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  22. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  23. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/__init__.py +0 -0
  24. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/base.py +0 -0
  25. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/common.py +0 -0
  26. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/custom.py +0 -0
  27. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/custom_serialize.py +0 -0
  28. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/parallel_map.py +0 -0
  29. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/serialize.py +0 -0
  30. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/dataflow/stats.py +0 -0
  31. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/__init__.py +0 -0
  32. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/annotation.py +0 -0
  33. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/box.py +0 -0
  34. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datapoint/convert.py +0 -0
  35. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/__init__.py +0 -0
  36. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/adapter.py +0 -0
  37. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/base.py +0 -0
  38. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/dataflow_builder.py +0 -0
  39. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/info.py +0 -0
  40. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/__init__.py +0 -0
  41. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  42. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  43. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/funsd.py +0 -0
  44. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  45. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/layouttest.py +0 -0
  46. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/publaynet.py +0 -0
  47. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  48. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  49. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  50. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/xfund.py +0 -0
  51. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  52. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  53. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/datasets/save.py +0 -0
  54. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/__init__.py +0 -0
  55. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/accmetric.py +0 -0
  56. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/base.py +0 -0
  57. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/cocometric.py +0 -0
  58. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/eval.py +0 -0
  59. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/registry.py +0 -0
  60. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/tedsmetric.py +0 -0
  61. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/eval/tp_eval_callback.py +0 -0
  62. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/__init__.py +0 -0
  63. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/base.py +0 -0
  64. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/d2detect.py +0 -0
  65. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/deskew.py +0 -0
  66. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/doctrocr.py +0 -0
  67. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/fastlang.py +0 -0
  68. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/hfdetr.py +0 -0
  69. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/hflayoutlm.py +0 -0
  70. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/hflm.py +0 -0
  71. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/model.py +0 -0
  72. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/pdftext.py +0 -0
  73. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/pt/__init__.py +0 -0
  74. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/pt/nms.py +0 -0
  75. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/pt/ptutils.py +0 -0
  76. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tessocr.py +0 -0
  77. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/texocr.py +0 -0
  78. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/__init__.py +0 -0
  79. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tfutils.py +0 -0
  80. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpcompat.py +0 -0
  81. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  82. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  83. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  84. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  85. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  86. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  87. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  88. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  89. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  90. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  91. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  92. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  93. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  94. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  95. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  96. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  97. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  98. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  99. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/extern/tpdetect.py +0 -0
  100. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/__init__.py +0 -0
  101. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/cocostruct.py +0 -0
  102. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/d2struct.py +0 -0
  103. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/hfstruct.py +0 -0
  104. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/laylmstruct.py +0 -0
  105. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/maputils.py +0 -0
  106. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/match.py +0 -0
  107. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/misc.py +0 -0
  108. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/pascalstruct.py +0 -0
  109. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/prodigystruct.py +0 -0
  110. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/pubstruct.py +0 -0
  111. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/tpstruct.py +0 -0
  112. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/mapper/xfundstruct.py +0 -0
  113. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/__init__.py +0 -0
  114. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/anngen.py +0 -0
  115. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/concurrency.py +0 -0
  116. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/doctectionpipe.py +0 -0
  117. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/language.py +0 -0
  118. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/layout.py +0 -0
  119. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/order.py +0 -0
  120. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/refine.py +0 -0
  121. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/registry.py +0 -0
  122. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/sub_layout.py +0 -0
  123. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/text.py +0 -0
  124. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/pipe/transform.py +0 -0
  125. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/py.typed +0 -0
  126. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/train/__init__.py +0 -0
  127. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/train/d2_frcnn_train.py +0 -0
  128. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/train/hf_detr_train.py +0 -0
  129. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/train/tp_frcnn_train.py +0 -0
  130. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/__init__.py +0 -0
  131. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/concurrency.py +0 -0
  132. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/context.py +0 -0
  133. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/develop.py +0 -0
  134. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/env_info.py +0 -0
  135. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/error.py +0 -0
  136. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/file_utils.py +0 -0
  137. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/fs.py +0 -0
  138. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/identifier.py +0 -0
  139. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/logger.py +0 -0
  140. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/metacfg.py +0 -0
  141. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/mocks.py +0 -0
  142. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/pdf_utils.py +0 -0
  143. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/settings.py +0 -0
  144. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/tqdm.py +0 -0
  145. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/transform.py +0 -0
  146. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/types.py +0 -0
  147. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/utils.py +0 -0
  148. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection/utils/viz.py +0 -0
  149. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection.egg-info/requires.txt +0 -0
  152. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/setup.cfg +0 -0
  154. {deepdoctection-0.39.1 → deepdoctection-0.39.3}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39.1
3
+ Version: 0.39.3
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.39.1"
28
+ __version__ = "0.39.3"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -40,6 +40,7 @@ cfg.TF.CELL.FILTER = None
40
40
  cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
41
41
  cfg.TF.ITEM.FILTER = None
42
42
 
43
+ cfg.PT.ENFORCE_WEIGHTS = False
43
44
  cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
44
45
  cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
45
46
  cfg.PT.LAYOUT.FILTER = None
@@ -98,7 +98,11 @@ class ServiceFactory:
98
98
  weights = (
99
99
  getattr(config.TF, mode).WEIGHTS
100
100
  if config.LIB == "TF"
101
- else (getattr(config.PT, mode).WEIGHTS if detectron2_available() else getattr(config.PT, mode).WEIGHTS_TS)
101
+ else (
102
+ getattr(config.PT, mode).WEIGHTS
103
+ if detectron2_available() or config.PT.ENFORCE_WEIGHTS
104
+ else getattr(config.PT, mode).WEIGHTS_TS
105
+ )
102
106
  )
103
107
  filter_categories = (
104
108
  getattr(getattr(config.TF, mode), "FILTER")
@@ -702,11 +702,11 @@ class Image:
702
702
  return get_uuid(self.image_id, *container_ids)
703
703
 
704
704
  def save(
705
- self,
706
- image_to_json: bool = True,
707
- highest_hierarchy_only: bool = False,
708
- path: Optional[PathLikeOrStr] = None,
709
- dry: bool = False,
705
+ self,
706
+ image_to_json: bool = True,
707
+ highest_hierarchy_only: bool = False,
708
+ path: Optional[PathLikeOrStr] = None,
709
+ dry: bool = False,
710
710
  ) -> Optional[Union[ImageDict, str]]:
711
711
  """
712
712
  Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
@@ -719,6 +719,18 @@ class Image:
719
719
 
720
720
  :return: optional dict
721
721
  """
722
+
723
+ def set_image_keys_to_none(d): # type: ignore
724
+ if isinstance(d, dict):
725
+ for key, value in d.items():
726
+ if key == '_image':
727
+ d[key] = None
728
+ else:
729
+ set_image_keys_to_none(value)
730
+ elif isinstance(d, list):
731
+ for item in d:
732
+ set_image_keys_to_none(item)
733
+
722
734
  if path is None:
723
735
  path = Path(self.location)
724
736
  path = Path(path)
@@ -734,7 +746,7 @@ class Image:
734
746
  export_dict = self.as_dict()
735
747
  export_dict["location"] = fspath(export_dict["location"])
736
748
  if not image_to_json:
737
- export_dict["_image"] = None
749
+ set_image_keys_to_none(export_dict)
738
750
  if dry:
739
751
  return export_dict
740
752
  with open(path_json, "w", encoding="UTF-8") as file:
@@ -407,6 +407,35 @@ class Table(Layout):
407
407
  col_anns = self.base_page.get_annotation(annotation_ids=all_relation_ids, category_names=[LayoutType.COLUMN])
408
408
  return col_anns
409
409
 
410
+ def row(self, row_number: int) -> list[ImageAnnotationBaseView]:
411
+ """
412
+ Get a list of cells in a row.
413
+ """
414
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
415
+ all_cells = self.base_page.get_annotation(
416
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
417
+ )
418
+ row_cells = list(
419
+ filter(lambda c: row_number in (c.row_number, c.row_number + c.row_span - 1), all_cells) # type: ignore
420
+ )
421
+ row_cells.sort(key=lambda c: c.column_number) # type: ignore
422
+ return row_cells # type: ignore
423
+
424
+ def column(self, column_number: int) -> list[ImageAnnotationBaseView]:
425
+ """
426
+ Get a list of cells in a column.
427
+ """
428
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
429
+ all_cells = self.base_page.get_annotation(
430
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
431
+ )
432
+ column_cells = list(
433
+ filter(lambda c: column_number in # type: ignore
434
+ (c.column_number, c.column_number + c.column_span - 1), all_cells) # type: ignore
435
+ )
436
+ column_cells.sort(key=lambda c: c.row_number) # type: ignore
437
+ return column_cells # type: ignore
438
+
410
439
  @property
411
440
  def html(self) -> HTML:
412
441
  """
@@ -18,6 +18,7 @@
18
18
  """
19
19
  Module for DatasetRegistry
20
20
  """
21
+ import inspect
21
22
 
22
23
  import catalogue # type: ignore
23
24
  from tabulate import tabulate
@@ -47,7 +48,10 @@ def get_dataset(name: str) -> DatasetBase:
47
48
  :param name: A dataset name
48
49
  :return: An instance of a dataset
49
50
  """
50
- return dataset_registry.get(name)()
51
+ ds = dataset_registry.get(name)
52
+ if inspect.isclass(ds):
53
+ return ds()
54
+ return ds
51
55
 
52
56
 
53
57
  def print_dataset_infos(add_license: bool = True, add_info: bool = True) -> None:
@@ -32,7 +32,7 @@ from .maputils import LabelSummarizer, curry
32
32
  @curry
33
33
  def cat_to_sub_cat(
34
34
  dp: Image,
35
- categories_dict_names_as_key: dict[TypeOrStr, int],
35
+ categories_dict_names_as_key: Optional[dict[TypeOrStr, int]] = None,
36
36
  cat_to_sub_cat_dict: Optional[dict[TypeOrStr, TypeOrStr]] = None,
37
37
  ) -> Image:
38
38
  """
@@ -45,7 +45,8 @@ def cat_to_sub_cat(
45
45
  :param cat_to_sub_cat_dict: e.g. {'foo': 'sub_cat_1', 'bak': 'sub_cat_2'}
46
46
  :return: Image with updated Annotations
47
47
  """
48
-
48
+ if categories_dict_names_as_key is None:
49
+ categories_dict_names_as_key = {}
49
50
  if cat_to_sub_cat_dict is None:
50
51
  return dp
51
52
  cat_to_sub_cat_dict_obj_type = {get_type(key): get_type(value) for key, value in cat_to_sub_cat_dict.items()}
@@ -54,7 +55,7 @@ def cat_to_sub_cat(
54
55
  sub_cat = ann.get_sub_category(sub_cat_type)
55
56
  if sub_cat:
56
57
  ann.category_name = sub_cat.category_name
57
- ann.category_id = categories_dict_names_as_key[ann.category_name]
58
+ ann.category_id = categories_dict_names_as_key.get(ann.category_name,DEFAULT_CATEGORY_ID)
58
59
 
59
60
  return dp
60
61
 
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
  from abc import ABC, abstractmethod
25
25
  from collections import defaultdict
26
26
  from dataclasses import dataclass, field
27
- from typing import Any, Mapping, Optional, Union, Callable
27
+ from typing import Any, Callable, Mapping, Optional, Union
28
28
  from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
@@ -100,7 +100,7 @@ class PipelineComponent(ABC):
100
100
 
101
101
  :param filter_func: A function that takes an image datapoint and returns a boolean value
102
102
  """
103
- self.filter_func = filter_func # type: ignore
103
+ self.filter_func = filter_func # type: ignore
104
104
 
105
105
  @abstractmethod
106
106
  def serve(self, dp: Image) -> None:
@@ -122,7 +122,6 @@ class PipelineComponent(ABC):
122
122
  if not self.filter_func(dp):
123
123
  self.serve(dp)
124
124
 
125
-
126
125
  def pass_datapoint(self, dp: Image) -> Image:
127
126
  """
128
127
  Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
@@ -362,7 +362,7 @@ class AnnotationNmsService(PipelineComponent):
362
362
  self.threshold = [thresholds for _ in self.nms_pairs]
363
363
  else:
364
364
  assert len(self.nms_pairs) == len(thresholds), "Sequences of nms_pairs and thresholds must have same length"
365
- self.threshold = thresholds # type: ignore
365
+ self.threshold = thresholds # type: ignore
366
366
  if priority:
367
367
  assert len(self.nms_pairs) == len(priority), "Sequences of nms_pairs and priority must have same length"
368
368
 
@@ -265,7 +265,7 @@ class LMSequenceClassifierService(PipelineComponent):
265
265
  padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
266
266
  truncation: bool = True,
267
267
  return_overflowing_tokens: bool = False,
268
- use_other_as_default_category: bool = False
268
+ use_other_as_default_category: bool = False,
269
269
  ) -> None:
270
270
  """
271
271
  :param tokenizer: Tokenizer, typing allows currently anything. This will be changed in the future
@@ -309,11 +309,10 @@ class LMSequenceClassifierService(PipelineComponent):
309
309
  lm_output = None
310
310
  if lm_input is None:
311
311
  if self.use_other_as_default_category:
312
- class_id = self.language_model.categories.get_categories(as_dict=True,
313
- name_as_key=True).get(TokenClasses.OTHER, 1)
314
- lm_output = SequenceClassResult(class_name=TokenClasses.OTHER,
315
- class_id = class_id,
316
- score=-1.)
312
+ class_id = self.language_model.categories.get_categories(as_dict=True, name_as_key=True).get(
313
+ TokenClasses.OTHER, 1
314
+ )
315
+ lm_output = SequenceClassResult(class_name=TokenClasses.OTHER, class_id=class_id, score=-1.0)
317
316
  else:
318
317
  lm_output = self.language_model.predict(**lm_input)
319
318
  if lm_output:
@@ -1190,14 +1190,16 @@ class PubtablesSegmentationService(PipelineComponent):
1190
1190
  for key, value in cell_rn_cn_to_ann_id.items():
1191
1191
  if key[idx] == item_number:
1192
1192
  cell_ann = dp.get_annotation(annotation_ids=value)[0]
1193
- self.dp_manager.set_category_annotation(
1194
- item_header_cell_name, None, item_header_cell_name, cell_ann.annotation_id
1195
- )
1193
+ if item_header_cell_name not in cell_ann.sub_categories:
1194
+ self.dp_manager.set_category_annotation(
1195
+ item_header_cell_name, None, item_header_cell_name, cell_ann.annotation_id
1196
+ )
1196
1197
  else:
1197
1198
  cell_ann = dp.get_annotation(annotation_ids=value)[0]
1198
- self.dp_manager.set_category_annotation(
1199
- item_header_cell_name, None, CellType.BODY, cell_ann.annotation_id
1200
- )
1199
+ if CellType.BODY not in cell_ann.sub_categories:
1200
+ self.dp_manager.set_category_annotation(
1201
+ item_header_cell_name, None, CellType.BODY, cell_ann.annotation_id
1202
+ )
1201
1203
 
1202
1204
  # TODO: the summaries should be sub categories of the underlying ann
1203
1205
  self.dp_manager.set_summary_annotation(
@@ -499,9 +499,7 @@ def train_hf_layoutlm(
499
499
  )
500
500
  pipeline_component_cls = pipeline_component_registry.get(pipeline_component_name)
501
501
  if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
502
- pipeline_component = pipeline_component_cls(tokenizer_fast,
503
- dd_model,
504
- use_other_as_default_category=True)
502
+ pipeline_component = pipeline_component_cls(tokenizer_fast, dd_model, use_other_as_default_category=True)
505
503
  else:
506
504
  pipeline_component = pipeline_component_cls(
507
505
  tokenizer_fast,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39.1
3
+ Version: 0.39.3
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -90,7 +90,6 @@ _DEPS = [
90
90
  "tensorflow-addons>=0.17.1",
91
91
  "tf2onnx>=1.9.2",
92
92
  "python-doctr==0.8.1",
93
- "fasttext==0.9.2",
94
93
  "fasttext-wheel",
95
94
  # dev dependencies
96
95
  "python-dotenv==1.0.0",
File without changes