deepdoctection 0.37__tar.gz → 0.37.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (154) hide show
  1. {deepdoctection-0.37 → deepdoctection-0.37.2}/PKG-INFO +1 -1
  2. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/analyzer/factory.py +3 -3
  4. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/image.py +9 -3
  5. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/hflayoutlm.py +1 -1
  6. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/base.py +29 -9
  7. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/doctectionpipe.py +6 -5
  8. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/train/hf_layoutlm_train.py +4 -3
  9. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection.egg-info/PKG-INFO +1 -1
  10. {deepdoctection-0.37 → deepdoctection-0.37.2}/LICENSE +0 -0
  11. {deepdoctection-0.37 → deepdoctection-0.37.2}/README.md +0 -0
  12. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/analyzer/__init__.py +0 -0
  13. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/analyzer/_config.py +0 -0
  14. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/analyzer/dd.py +0 -0
  15. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/configs/__init__.py +0 -0
  16. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  17. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  18. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/__init__.py +0 -0
  19. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/base.py +0 -0
  20. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/common.py +0 -0
  21. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/custom.py +0 -0
  22. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/custom_serialize.py +0 -0
  23. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/parallel_map.py +0 -0
  24. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/serialize.py +0 -0
  25. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/dataflow/stats.py +0 -0
  26. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/__init__.py +0 -0
  27. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/annotation.py +0 -0
  28. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/box.py +0 -0
  29. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/convert.py +0 -0
  30. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datapoint/view.py +0 -0
  31. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/__init__.py +0 -0
  32. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/adapter.py +0 -0
  33. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/base.py +0 -0
  34. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/dataflow_builder.py +0 -0
  35. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/info.py +0 -0
  36. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/__init__.py +0 -0
  37. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  38. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  39. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/funsd.py +0 -0
  40. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  41. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/layouttest.py +0 -0
  42. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/publaynet.py +0 -0
  43. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  44. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  45. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  46. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/xfund.py +0 -0
  47. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  48. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  49. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/registry.py +0 -0
  50. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/datasets/save.py +0 -0
  51. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/__init__.py +0 -0
  52. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/accmetric.py +0 -0
  53. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/base.py +0 -0
  54. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/cocometric.py +0 -0
  55. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/eval.py +0 -0
  56. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/registry.py +0 -0
  57. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/tedsmetric.py +0 -0
  58. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/eval/tp_eval_callback.py +0 -0
  59. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/__init__.py +0 -0
  60. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/base.py +0 -0
  61. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/d2detect.py +0 -0
  62. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/deskew.py +0 -0
  63. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/doctrocr.py +0 -0
  64. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/fastlang.py +0 -0
  65. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/hfdetr.py +0 -0
  66. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/hflm.py +0 -0
  67. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/model.py +0 -0
  68. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/pdftext.py +0 -0
  69. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/pt/__init__.py +0 -0
  70. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/pt/nms.py +0 -0
  71. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/pt/ptutils.py +0 -0
  72. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tessocr.py +0 -0
  73. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/texocr.py +0 -0
  74. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/__init__.py +0 -0
  75. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tfutils.py +0 -0
  76. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpcompat.py +0 -0
  77. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  78. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  79. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  80. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  81. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  82. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  83. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  84. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  85. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  86. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  87. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  88. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  89. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  90. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  91. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  92. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  93. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  94. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  95. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/extern/tpdetect.py +0 -0
  96. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/__init__.py +0 -0
  97. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/cats.py +0 -0
  98. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/cocostruct.py +0 -0
  99. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/d2struct.py +0 -0
  100. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/hfstruct.py +0 -0
  101. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/laylmstruct.py +0 -0
  102. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/maputils.py +0 -0
  103. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/match.py +0 -0
  104. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/misc.py +0 -0
  105. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/pascalstruct.py +0 -0
  106. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/prodigystruct.py +0 -0
  107. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/pubstruct.py +0 -0
  108. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/tpstruct.py +0 -0
  109. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/mapper/xfundstruct.py +0 -0
  110. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/__init__.py +0 -0
  111. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/anngen.py +0 -0
  112. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/common.py +0 -0
  113. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/concurrency.py +0 -0
  114. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/language.py +0 -0
  115. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/layout.py +0 -0
  116. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/lm.py +0 -0
  117. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/order.py +0 -0
  118. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/refine.py +0 -0
  119. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/registry.py +0 -0
  120. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/segment.py +0 -0
  121. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/sub_layout.py +0 -0
  122. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/text.py +0 -0
  123. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/pipe/transform.py +0 -0
  124. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/py.typed +0 -0
  125. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/train/__init__.py +0 -0
  126. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/train/d2_frcnn_train.py +0 -0
  127. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/train/hf_detr_train.py +0 -0
  128. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/train/tp_frcnn_train.py +0 -0
  129. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/__init__.py +0 -0
  130. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/concurrency.py +0 -0
  131. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/context.py +0 -0
  132. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/develop.py +0 -0
  133. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/env_info.py +0 -0
  134. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/error.py +0 -0
  135. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/file_utils.py +0 -0
  136. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/fs.py +0 -0
  137. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/identifier.py +0 -0
  138. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/logger.py +0 -0
  139. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/metacfg.py +0 -0
  140. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/mocks.py +0 -0
  141. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/pdf_utils.py +0 -0
  142. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/settings.py +0 -0
  143. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/tqdm.py +0 -0
  144. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/transform.py +0 -0
  145. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/types.py +0 -0
  146. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/utils.py +0 -0
  147. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection/utils/viz.py +0 -0
  148. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection.egg-info/SOURCES.txt +0 -0
  149. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection.egg-info/dependency_links.txt +0 -0
  150. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection.egg-info/requires.txt +0 -0
  151. {deepdoctection-0.37 → deepdoctection-0.37.2}/deepdoctection.egg-info/top_level.txt +0 -0
  152. {deepdoctection-0.37 → deepdoctection-0.37.2}/setup.cfg +0 -0
  153. {deepdoctection-0.37 → deepdoctection-0.37.2}/setup.py +0 -0
  154. {deepdoctection-0.37 → deepdoctection-0.37.2}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.37
3
+ Version: 0.37.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -24,7 +24,7 @@ from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = 0.37
27
+ __version__ = "0.37.2"
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -327,9 +327,9 @@ class ServiceFactory:
327
327
  )
328
328
  if config.OCR.USE_TEXTRACT:
329
329
  credentials_kwargs = {
330
- "aws_access_key_id": environ.get("ACCESS_KEY", None),
331
- "aws_secret_access_key": environ.get("SECRET_KEY", None),
332
- "config": Config(region_name=environ.get("REGION", None)),
330
+ "aws_access_key_id": environ.get("AWS_ACCESS_KEY", None),
331
+ "aws_secret_access_key": environ.get("AWS_SECRET_KEY", None),
332
+ "config": Config(region_name=environ.get("AWS_REGION", None)),
333
333
  }
334
334
  return TextractOcrDetector(**credentials_kwargs)
335
335
  raise ValueError("You have set USE_OCR=True but any of USE_TESSERACT, USE_DOCTR, USE_TEXTRACT is set to False")
@@ -32,6 +32,7 @@ from numpy import uint8
32
32
 
33
33
  from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
34
34
  from ..utils.identifier import get_uuid, is_uuid_like
35
+ from ..utils.logger import LoggingRecord, logger
35
36
  from ..utils.settings import ObjectTypes, SummaryType, get_type
36
37
  from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
37
38
  from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
@@ -474,8 +475,13 @@ class Image:
474
475
 
475
476
  for service_id in service_ids:
476
477
  if service_id not in service_id_to_annotation_id:
477
- raise ImageError(f"Service id {service_id} not found")
478
- annotation_ids = service_id_to_annotation_id[service_id]
478
+ logger.info(
479
+ LoggingRecord(
480
+ f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."
481
+ )
482
+ )
483
+
484
+ annotation_ids = service_id_to_annotation_id.get(service_id, [])
479
485
 
480
486
  for ann_id in annotation_ids:
481
487
  if ann_id not in ann_id_to_annotation_maps:
@@ -747,7 +753,7 @@ class Image:
747
753
  if sub_cat.service_id:
748
754
  service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
749
755
  if ann.image is not None:
750
- for summary_cat_key in ann.image.summary:
756
+ for summary_cat_key in ann.image.summary.sub_categories:
751
757
  summary_cat = ann.get_summary(summary_cat_key)
752
758
  if summary_cat.service_id:
753
759
  service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
48
48
  import torch.nn.functional as F
49
49
 
50
50
  with try_import() as tr_import_guard:
51
- from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type: ignore
51
+ from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type:ignore
52
52
  from transformers import (
53
53
  LayoutLMForSequenceClassification,
54
54
  LayoutLMForTokenClassification,
@@ -29,6 +29,7 @@ from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
31
31
  from ..datapoint.image import Image
32
+ from ..mapper.misc import curry
32
33
  from ..utils.context import timed_operation
33
34
  from ..utils.identifier import get_uuid_from_str
34
35
  from ..utils.settings import ObjectTypes
@@ -247,17 +248,24 @@ class Pipeline(ABC):
247
248
  """
248
249
  raise NotImplementedError()
249
250
 
250
- def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
251
+ @staticmethod
252
+ @curry
253
+ def _undo(dp: Image, service_ids: Optional[list[str]] = None) -> Image:
251
254
  """
252
- Composition of the backbone
255
+ Remove annotations from a datapoint
253
256
  """
254
- if session_id is None and self.set_session_id:
255
- session_id = self.get_session_id()
256
- for component in self.pipe_component_list:
257
- component.timer_on = True
258
- component.dp_manager.session_id = session_id
259
- df = component.predict_dataflow(df)
260
- return df
257
+ dp.remove(service_ids=service_ids)
258
+ return dp
259
+
260
+ def undo(self, df: DataFlow, service_ids: Optional[set[str]] = None) -> DataFlow:
261
+ """
262
+ Mapping a datapoint via `_undo` within a dataflow pipeline
263
+
264
+ :param df: An input dataflow of Images
265
+ :param service_ids: A set of service ids to remove
266
+ :return: A output dataflow of Images
267
+ """
268
+ return MapData(df, self._undo(service_ids=service_ids))
261
269
 
262
270
  @abstractmethod
263
271
  def analyze(self, **kwargs: Any) -> DataFlow:
@@ -273,6 +281,18 @@ class Pipeline(ABC):
273
281
  """
274
282
  raise NotImplementedError()
275
283
 
284
+ def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
285
+ """
286
+ Composition of the backbone
287
+ """
288
+ if session_id is None and self.set_session_id:
289
+ session_id = self.get_session_id()
290
+ for component in self.pipe_component_list:
291
+ component.timer_on = True
292
+ component.dp_manager.session_id = session_id
293
+ df = component.predict_dataflow(df)
294
+ return df
295
+
276
296
  def get_meta_annotation(self) -> MetaAnnotation:
277
297
  """
278
298
  Collects meta annotations from all pipeline components and summarizes the returned results
@@ -161,16 +161,17 @@ class DoctectionPipe(Pipeline):
161
161
 
162
162
  super().__init__(pipeline_component_list)
163
163
 
164
- def _entry(self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) \
165
- -> DataFlow:
164
+ def _entry(
165
+ self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
166
+ ) -> DataFlow:
166
167
  path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow, b_bytes = _collect_from_kwargs(**kwargs)
167
168
 
168
169
  df: DataFlow
169
170
 
170
171
  if isinstance(b_bytes, bytes):
171
- df = DoctectionPipe.bytes_to_dataflow(path=doc_path if path is None else path,
172
- b_bytes=b_bytes,
173
- file_type=file_type)
172
+ df = DoctectionPipe.bytes_to_dataflow(
173
+ path=doc_path if path is None else path, b_bytes=b_bytes, file_type=file_type
174
+ )
174
175
 
175
176
  elif isinstance(path, (str, Path)):
176
177
  if not isinstance(file_type, (str, list)):
@@ -161,11 +161,12 @@ class LayoutLMTrainer(Trainer):
161
161
  model: Union[PreTrainedModel, nn.Module],
162
162
  args: TrainingArguments,
163
163
  data_collator: LayoutLMDataCollator,
164
- train_dataset: Dataset[Any],
164
+ train_dataset: DatasetAdapter,
165
+ eval_dataset: Optional[DatasetBase] = None,
165
166
  ):
166
167
  self.evaluator: Optional[Evaluator] = None
167
168
  self.build_eval_kwargs: Optional[dict[str, Any]] = None
168
- super().__init__(model, args, data_collator, train_dataset)
169
+ super().__init__(model, args, data_collator, train_dataset, eval_dataset=eval_dataset)
169
170
 
170
171
  def setup_evaluator(
171
172
  self,
@@ -472,7 +473,7 @@ def train_hf_layoutlm(
472
473
  max_batch_size=max_batch_size, # type: ignore
473
474
  remove_bounding_box_features=remove_box_features,
474
475
  )
475
- trainer = LayoutLMTrainer(model, arguments, data_collator, dataset)
476
+ trainer = LayoutLMTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
476
477
 
477
478
  if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
478
479
  assert metric is not None # silence mypy
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.37
3
+ Version: 0.37.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
File without changes
File without changes
File without changes
File without changes