deepdoctection 0.37__py3-none-any.whl → 0.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -24,7 +24,7 @@ from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = 0.37
27
+ __version__ = "0.37.1"
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -327,9 +327,9 @@ class ServiceFactory:
327
327
  )
328
328
  if config.OCR.USE_TEXTRACT:
329
329
  credentials_kwargs = {
330
- "aws_access_key_id": environ.get("ACCESS_KEY", None),
331
- "aws_secret_access_key": environ.get("SECRET_KEY", None),
332
- "config": Config(region_name=environ.get("REGION", None)),
330
+ "aws_access_key_id": environ.get("AWS_ACCESS_KEY", None),
331
+ "aws_secret_access_key": environ.get("AWS_SECRET_KEY", None),
332
+ "config": Config(region_name=environ.get("AWS_REGION", None)),
333
333
  }
334
334
  return TextractOcrDetector(**credentials_kwargs)
335
335
  raise ValueError("You have set USE_OCR=True but any of USE_TESSERACT, USE_DOCTR, USE_TEXTRACT is set to False")
@@ -34,6 +34,7 @@ from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDErr
34
34
  from ..utils.identifier import get_uuid, is_uuid_like
35
35
  from ..utils.settings import ObjectTypes, SummaryType, get_type
36
36
  from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
37
+ from ..utils.logger import LoggingRecord, logger
37
38
  from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
38
39
  from .box import crop_box_from_image, global_to_local_coords, intersection_box
39
40
  from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
@@ -474,8 +475,11 @@ class Image:
474
475
 
475
476
  for service_id in service_ids:
476
477
  if service_id not in service_id_to_annotation_id:
477
- raise ImageError(f"Service id {service_id} not found")
478
- annotation_ids = service_id_to_annotation_id[service_id]
478
+ logger.info(
479
+ LoggingRecord(
480
+ f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."))
481
+
482
+ annotation_ids = service_id_to_annotation_id.get(service_id, [])
479
483
 
480
484
  for ann_id in annotation_ids:
481
485
  if ann_id not in ann_id_to_annotation_maps:
@@ -747,7 +751,7 @@ class Image:
747
751
  if sub_cat.service_id:
748
752
  service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
749
753
  if ann.image is not None:
750
- for summary_cat_key in ann.image.summary:
754
+ for summary_cat_key in ann.image.summary.sub_categories:
751
755
  summary_cat = ann.get_summary(summary_cat_key)
752
756
  if summary_cat.service_id:
753
757
  service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
48
48
  import torch.nn.functional as F
49
49
 
50
50
  with try_import() as tr_import_guard:
51
- from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type: ignore
51
+ from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
52
52
  from transformers import (
53
53
  LayoutLMForSequenceClassification,
54
54
  LayoutLMForTokenClassification,
@@ -29,6 +29,7 @@ from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
31
31
  from ..datapoint.image import Image
32
+ from ..mapper.misc import curry
32
33
  from ..utils.context import timed_operation
33
34
  from ..utils.identifier import get_uuid_from_str
34
35
  from ..utils.settings import ObjectTypes
@@ -247,17 +248,24 @@ class Pipeline(ABC):
247
248
  """
248
249
  raise NotImplementedError()
249
250
 
250
- def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
251
+ @staticmethod
252
+ @curry
253
+ def _undo(dp: Image, service_ids: Optional[list[str]] = None) -> Image:
251
254
  """
252
- Composition of the backbone
255
+ Remove annotations from a datapoint
253
256
  """
254
- if session_id is None and self.set_session_id:
255
- session_id = self.get_session_id()
256
- for component in self.pipe_component_list:
257
- component.timer_on = True
258
- component.dp_manager.session_id = session_id
259
- df = component.predict_dataflow(df)
260
- return df
257
+ dp.remove(service_ids=service_ids)
258
+ return dp
259
+
260
+ def undo(self, df: DataFlow, service_ids: Optional[set[str]] = None) -> DataFlow:
261
+ """
262
+ Mapping a datapoint via `_undo` within a dataflow pipeline
263
+
264
+ :param df: An input dataflow of Images
265
+ :param service_ids: A set of service ids to remove
266
+ :return: A output dataflow of Images
267
+ """
268
+ return MapData(df, self._undo(service_ids=service_ids))
261
269
 
262
270
  @abstractmethod
263
271
  def analyze(self, **kwargs: Any) -> DataFlow:
@@ -273,6 +281,18 @@ class Pipeline(ABC):
273
281
  """
274
282
  raise NotImplementedError()
275
283
 
284
+ def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
285
+ """
286
+ Composition of the backbone
287
+ """
288
+ if session_id is None and self.set_session_id:
289
+ session_id = self.get_session_id()
290
+ for component in self.pipe_component_list:
291
+ component.timer_on = True
292
+ component.dp_manager.session_id = session_id
293
+ df = component.predict_dataflow(df)
294
+ return df
295
+
276
296
  def get_meta_annotation(self) -> MetaAnnotation:
277
297
  """
278
298
  Collects meta annotations from all pipeline components and summarizes the returned results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.37
3
+ Version: 0.37.1
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -1,9 +1,9 @@
1
- deepdoctection/__init__.py,sha256=7VELexCFRaBTCXHQpBoKhVi4hqUUgpcsLTqvHXHjufQ,12651
1
+ deepdoctection/__init__.py,sha256=i23UZBqMlkcvUILJxvUQAdj-3d2yV9edzxFsC5RoMHA,12655
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
4
  deepdoctection/analyzer/_config.py,sha256=NZl_REM8Ge2xfxvHN-mZR5KURcHfZii3xfMlKQwckbA,4864
5
5
  deepdoctection/analyzer/dd.py,sha256=DUOhOtwipHw5nabYqn3WGR9aZcgP0ma_bi_tjf9xscw,5973
6
- deepdoctection/analyzer/factory.py,sha256=xmo5F9X7I6lp0ZWJv8QavpMyG8UWYLvMi4qogsZV1_s,31507
6
+ deepdoctection/analyzer/factory.py,sha256=dEUOtdBS3yQGLqMqLR_kq5EYCR3IE30DjHNzE0spoQE,31519
7
7
  deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
8
8
  deepdoctection/configs/conf_dd_one.yaml,sha256=td7XsyVhdXkhh5Pie7sT_WNjGTaxBOWgpxhkobHd1H0,2325
9
9
  deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -19,7 +19,7 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
19
19
  deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
20
  deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
21
21
  deepdoctection/datapoint/convert.py,sha256=O7920pIomyEkzXwxpFsrzfhn7Pl6UzVGhNzv90VcuKU,7099
22
- deepdoctection/datapoint/image.py,sha256=DIXXXD2yKsacg47Wt_GEYEIe1MQkrd06Yr5xAWv_n64,33047
22
+ deepdoctection/datapoint/image.py,sha256=AM34br9eM1syTIUXcJIrAaP7pEnejbUl-w-CK5pr9z8,33233
23
23
  deepdoctection/datapoint/view.py,sha256=1rVMuqucCrI5zlwyXMADJQBV38V_zSNFqFyBi3cMA1E,44914
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
@@ -56,7 +56,7 @@ deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1
56
56
  deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
57
57
  deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
58
58
  deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0oM,12055
59
- deepdoctection/extern/hflayoutlm.py,sha256=KfoWx9_Rpa1Y2L51HLrYvenfWaTB4SVTmVJH00Cqb-s,56510
59
+ deepdoctection/extern/hflayoutlm.py,sha256=_OUeQsbNgfjbV7TPYBjkqc4HoTBQqkOINnwpewPJpl8,56494
60
60
  deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
61
61
  deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
62
62
  deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
@@ -103,7 +103,7 @@ deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_
103
103
  deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
104
104
  deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
105
105
  deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
106
- deepdoctection/pipe/base.py,sha256=Davjkf3D837y9AIITcx7yXdebmVaz6Moyw_5Wi3nfmg,13561
106
+ deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
107
107
  deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
108
108
  deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
109
  deepdoctection/pipe/doctectionpipe.py,sha256=uhsrSuwaHcOMj8b8i6wCpPaZlSxCTaeHVhMokJ8vRSI,11835
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
141
141
  deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
143
  deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.37.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.37.dist-info/METADATA,sha256=0qGgmf07xmNRJx55yfMagHcfAoQG6GO9KTw6b0tv0uA,19543
146
- deepdoctection-0.37.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
- deepdoctection-0.37.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.37.dist-info/RECORD,,
144
+ deepdoctection-0.37.1.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.37.1.dist-info/METADATA,sha256=M-HjpJpxuM4tHN0ld8DscsZPgKRUoNmsbx9slFkj6tg,19545
146
+ deepdoctection-0.37.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
+ deepdoctection-0.37.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.37.1.dist-info/RECORD,,