deepdoctection 0.37__py3-none-any.whl → 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +1 -1
- deepdoctection/analyzer/factory.py +3 -3
- deepdoctection/datapoint/image.py +7 -3
- deepdoctection/extern/hflayoutlm.py +1 -1
- deepdoctection/pipe/base.py +29 -9
- {deepdoctection-0.37.dist-info → deepdoctection-0.37.1.dist-info}/METADATA +1 -1
- {deepdoctection-0.37.dist-info → deepdoctection-0.37.1.dist-info}/RECORD +10 -10
- {deepdoctection-0.37.dist-info → deepdoctection-0.37.1.dist-info}/LICENSE +0 -0
- {deepdoctection-0.37.dist-info → deepdoctection-0.37.1.dist-info}/WHEEL +0 -0
- {deepdoctection-0.37.dist-info → deepdoctection-0.37.1.dist-info}/top_level.txt +0 -0
deepdoctection/__init__.py
CHANGED
|
@@ -327,9 +327,9 @@ class ServiceFactory:
|
|
|
327
327
|
)
|
|
328
328
|
if config.OCR.USE_TEXTRACT:
|
|
329
329
|
credentials_kwargs = {
|
|
330
|
-
"aws_access_key_id": environ.get("
|
|
331
|
-
"aws_secret_access_key": environ.get("
|
|
332
|
-
"config": Config(region_name=environ.get("
|
|
330
|
+
"aws_access_key_id": environ.get("AWS_ACCESS_KEY", None),
|
|
331
|
+
"aws_secret_access_key": environ.get("AWS_SECRET_KEY", None),
|
|
332
|
+
"config": Config(region_name=environ.get("AWS_REGION", None)),
|
|
333
333
|
}
|
|
334
334
|
return TextractOcrDetector(**credentials_kwargs)
|
|
335
335
|
raise ValueError("You have set USE_OCR=True but any of USE_TESSERACT, USE_DOCTR, USE_TEXTRACT is set to False")
|
|
@@ -34,6 +34,7 @@ from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDErr
|
|
|
34
34
|
from ..utils.identifier import get_uuid, is_uuid_like
|
|
35
35
|
from ..utils.settings import ObjectTypes, SummaryType, get_type
|
|
36
36
|
from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
|
|
37
|
+
from ..utils.logger import LoggingRecord, logger
|
|
37
38
|
from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
|
|
38
39
|
from .box import crop_box_from_image, global_to_local_coords, intersection_box
|
|
39
40
|
from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
|
|
@@ -474,8 +475,11 @@ class Image:
|
|
|
474
475
|
|
|
475
476
|
for service_id in service_ids:
|
|
476
477
|
if service_id not in service_id_to_annotation_id:
|
|
477
|
-
|
|
478
|
-
|
|
478
|
+
logger.info(
|
|
479
|
+
LoggingRecord(
|
|
480
|
+
f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."))
|
|
481
|
+
|
|
482
|
+
annotation_ids = service_id_to_annotation_id.get(service_id, [])
|
|
479
483
|
|
|
480
484
|
for ann_id in annotation_ids:
|
|
481
485
|
if ann_id not in ann_id_to_annotation_maps:
|
|
@@ -747,7 +751,7 @@ class Image:
|
|
|
747
751
|
if sub_cat.service_id:
|
|
748
752
|
service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
|
|
749
753
|
if ann.image is not None:
|
|
750
|
-
for summary_cat_key in ann.image.summary:
|
|
754
|
+
for summary_cat_key in ann.image.summary.sub_categories:
|
|
751
755
|
summary_cat = ann.get_summary(summary_cat_key)
|
|
752
756
|
if summary_cat.service_id:
|
|
753
757
|
service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
|
|
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
|
|
|
48
48
|
import torch.nn.functional as F
|
|
49
49
|
|
|
50
50
|
with try_import() as tr_import_guard:
|
|
51
|
-
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
|
|
51
|
+
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
|
|
52
52
|
from transformers import (
|
|
53
53
|
LayoutLMForSequenceClassification,
|
|
54
54
|
LayoutLMForTokenClassification,
|
deepdoctection/pipe/base.py
CHANGED
|
@@ -29,6 +29,7 @@ from uuid import uuid1
|
|
|
29
29
|
|
|
30
30
|
from ..dataflow import DataFlow, MapData
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
|
+
from ..mapper.misc import curry
|
|
32
33
|
from ..utils.context import timed_operation
|
|
33
34
|
from ..utils.identifier import get_uuid_from_str
|
|
34
35
|
from ..utils.settings import ObjectTypes
|
|
@@ -247,17 +248,24 @@ class Pipeline(ABC):
|
|
|
247
248
|
"""
|
|
248
249
|
raise NotImplementedError()
|
|
249
250
|
|
|
250
|
-
|
|
251
|
+
@staticmethod
|
|
252
|
+
@curry
|
|
253
|
+
def _undo(dp: Image, service_ids: Optional[list[str]] = None) -> Image:
|
|
251
254
|
"""
|
|
252
|
-
|
|
255
|
+
Remove annotations from a datapoint
|
|
253
256
|
"""
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
257
|
+
dp.remove(service_ids=service_ids)
|
|
258
|
+
return dp
|
|
259
|
+
|
|
260
|
+
def undo(self, df: DataFlow, service_ids: Optional[set[str]] = None) -> DataFlow:
|
|
261
|
+
"""
|
|
262
|
+
Mapping a datapoint via `_undo` within a dataflow pipeline
|
|
263
|
+
|
|
264
|
+
:param df: An input dataflow of Images
|
|
265
|
+
:param service_ids: A set of service ids to remove
|
|
266
|
+
:return: A output dataflow of Images
|
|
267
|
+
"""
|
|
268
|
+
return MapData(df, self._undo(service_ids=service_ids))
|
|
261
269
|
|
|
262
270
|
@abstractmethod
|
|
263
271
|
def analyze(self, **kwargs: Any) -> DataFlow:
|
|
@@ -273,6 +281,18 @@ class Pipeline(ABC):
|
|
|
273
281
|
"""
|
|
274
282
|
raise NotImplementedError()
|
|
275
283
|
|
|
284
|
+
def _build_pipe(self, df: DataFlow, session_id: Optional[str] = None) -> DataFlow:
|
|
285
|
+
"""
|
|
286
|
+
Composition of the backbone
|
|
287
|
+
"""
|
|
288
|
+
if session_id is None and self.set_session_id:
|
|
289
|
+
session_id = self.get_session_id()
|
|
290
|
+
for component in self.pipe_component_list:
|
|
291
|
+
component.timer_on = True
|
|
292
|
+
component.dp_manager.session_id = session_id
|
|
293
|
+
df = component.predict_dataflow(df)
|
|
294
|
+
return df
|
|
295
|
+
|
|
276
296
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
277
297
|
"""
|
|
278
298
|
Collects meta annotations from all pipeline components and summarizes the returned results
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=i23UZBqMlkcvUILJxvUQAdj-3d2yV9edzxFsC5RoMHA,12655
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
|
|
4
4
|
deepdoctection/analyzer/_config.py,sha256=NZl_REM8Ge2xfxvHN-mZR5KURcHfZii3xfMlKQwckbA,4864
|
|
5
5
|
deepdoctection/analyzer/dd.py,sha256=DUOhOtwipHw5nabYqn3WGR9aZcgP0ma_bi_tjf9xscw,5973
|
|
6
|
-
deepdoctection/analyzer/factory.py,sha256=
|
|
6
|
+
deepdoctection/analyzer/factory.py,sha256=dEUOtdBS3yQGLqMqLR_kq5EYCR3IE30DjHNzE0spoQE,31519
|
|
7
7
|
deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
8
8
|
deepdoctection/configs/conf_dd_one.yaml,sha256=td7XsyVhdXkhh5Pie7sT_WNjGTaxBOWgpxhkobHd1H0,2325
|
|
9
9
|
deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
|
|
@@ -19,7 +19,7 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
|
|
|
19
19
|
deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
|
|
20
20
|
deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
|
|
21
21
|
deepdoctection/datapoint/convert.py,sha256=O7920pIomyEkzXwxpFsrzfhn7Pl6UzVGhNzv90VcuKU,7099
|
|
22
|
-
deepdoctection/datapoint/image.py,sha256=
|
|
22
|
+
deepdoctection/datapoint/image.py,sha256=AM34br9eM1syTIUXcJIrAaP7pEnejbUl-w-CK5pr9z8,33233
|
|
23
23
|
deepdoctection/datapoint/view.py,sha256=1rVMuqucCrI5zlwyXMADJQBV38V_zSNFqFyBi3cMA1E,44914
|
|
24
24
|
deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
|
|
25
25
|
deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
|
|
@@ -56,7 +56,7 @@ deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1
|
|
|
56
56
|
deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
|
|
57
57
|
deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
|
|
58
58
|
deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0oM,12055
|
|
59
|
-
deepdoctection/extern/hflayoutlm.py,sha256=
|
|
59
|
+
deepdoctection/extern/hflayoutlm.py,sha256=_OUeQsbNgfjbV7TPYBjkqc4HoTBQqkOINnwpewPJpl8,56494
|
|
60
60
|
deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
|
|
61
61
|
deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
|
|
62
62
|
deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
|
|
@@ -103,7 +103,7 @@ deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_
|
|
|
103
103
|
deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
|
|
104
104
|
deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
|
|
105
105
|
deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
|
|
106
|
-
deepdoctection/pipe/base.py,sha256=
|
|
106
|
+
deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
|
|
107
107
|
deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
|
|
108
108
|
deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
|
|
109
109
|
deepdoctection/pipe/doctectionpipe.py,sha256=uhsrSuwaHcOMj8b8i6wCpPaZlSxCTaeHVhMokJ8vRSI,11835
|
|
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
|
|
|
141
141
|
deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
|
|
142
142
|
deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
|
|
143
143
|
deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
|
|
144
|
-
deepdoctection-0.37.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
145
|
-
deepdoctection-0.37.dist-info/METADATA,sha256=
|
|
146
|
-
deepdoctection-0.37.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
147
|
-
deepdoctection-0.37.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
148
|
-
deepdoctection-0.37.dist-info/RECORD,,
|
|
144
|
+
deepdoctection-0.37.1.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
145
|
+
deepdoctection-0.37.1.dist-info/METADATA,sha256=M-HjpJpxuM4tHN0ld8DscsZPgKRUoNmsbx9slFkj6tg,19545
|
|
146
|
+
deepdoctection-0.37.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
147
|
+
deepdoctection-0.37.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
148
|
+
deepdoctection-0.37.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|