deepdoctection 0.39__tar.gz → 0.39.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.39 → deepdoctection-0.39.1}/PKG-INFO +5 -5
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/__init__.py +1 -1
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/base.py +38 -5
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/common.py +3 -3
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/hf_detr_train.py +1 -1
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/PKG-INFO +5 -5
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/requires.txt +4 -4
- {deepdoctection-0.39 → deepdoctection-0.39.1}/setup.py +3 -2
- {deepdoctection-0.39 → deepdoctection-0.39.1}/LICENSE +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/README.md +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/_config.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/factory.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/image.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/view.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/doctrocr.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/segment.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/settings.py +1 -1
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/setup.cfg +0 -0
- {deepdoctection-0.39 → deepdoctection-0.39.1}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.39
|
|
3
|
+
Version: 0.39.1
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
|
|
|
61
61
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
62
62
|
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
63
63
|
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
64
|
-
Requires-Dist: fasttext
|
|
64
|
+
Requires-Dist: fasttext-wheel; extra == "tf"
|
|
65
65
|
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
66
66
|
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
67
67
|
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
|
86
86
|
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
87
87
|
Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
88
88
|
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
89
|
-
Requires-Dist: transformers>=4.
|
|
89
|
+
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
90
90
|
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
91
91
|
Requires-Dist: python-doctr==0.8.1; extra == "pt"
|
|
92
92
|
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
93
93
|
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
94
|
-
Requires-Dist: fasttext
|
|
94
|
+
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
95
95
|
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
96
96
|
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
97
97
|
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
|
99
99
|
Provides-Extra: docs
|
|
100
100
|
Requires-Dist: tensorpack==0.11; extra == "docs"
|
|
101
101
|
Requires-Dist: boto3==1.34.102; extra == "docs"
|
|
102
|
-
Requires-Dist: transformers>=4.
|
|
102
|
+
Requires-Dist: transformers>=4.48.0; extra == "docs"
|
|
103
103
|
Requires-Dist: accelerate>=0.29.1; extra == "docs"
|
|
104
104
|
Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
|
|
105
105
|
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
@@ -24,7 +24,7 @@ from __future__ import annotations
|
|
|
24
24
|
from abc import ABC, abstractmethod
|
|
25
25
|
from collections import defaultdict
|
|
26
26
|
from dataclasses import dataclass, field
|
|
27
|
-
from typing import Any, Mapping, Optional, Union
|
|
27
|
+
from typing import Any, Mapping, Optional, Union, Callable
|
|
28
28
|
from uuid import uuid1
|
|
29
29
|
|
|
30
30
|
from ..dataflow import DataFlow, MapData
|
|
@@ -33,6 +33,7 @@ from ..mapper.misc import curry
|
|
|
33
33
|
from ..utils.context import timed_operation
|
|
34
34
|
from ..utils.identifier import get_uuid_from_str
|
|
35
35
|
from ..utils.settings import ObjectTypes
|
|
36
|
+
from ..utils.types import DP
|
|
36
37
|
from .anngen import DatapointManager
|
|
37
38
|
|
|
38
39
|
|
|
@@ -76,6 +77,30 @@ class PipelineComponent(ABC):
|
|
|
76
77
|
self.service_id = self.get_service_id()
|
|
77
78
|
self.dp_manager = DatapointManager(self.service_id, model_id)
|
|
78
79
|
self.timer_on = False
|
|
80
|
+
self.filter_func: Callable[[DP], bool] = lambda dp: False
|
|
81
|
+
|
|
82
|
+
def set_inbound_filter(self, filter_func: Callable[[DP], bool]) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Set a filter function to decide, if an image of the inbound dataflow should be passed to self.serve.
|
|
85
|
+
The filter function should return a boolean value. If the function returns True, the image will not be processed
|
|
86
|
+
by this pipeline component.
|
|
87
|
+
|
|
88
|
+
**Example:**
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
def do_not_process_tables(dp: Image) -> bool:
|
|
92
|
+
if "table" not in dp.get_categories_from_current_state():
|
|
93
|
+
return True
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
layout_component = ImageLayoutService(...)
|
|
97
|
+
layout_component.set_inbound_filter(do_not_process_tables)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
:param filter_func: A function that takes an image datapoint and returns a boolean value
|
|
102
|
+
"""
|
|
103
|
+
self.filter_func = filter_func # type: ignore
|
|
79
104
|
|
|
80
105
|
@abstractmethod
|
|
81
106
|
def serve(self, dp: Image) -> None:
|
|
@@ -92,6 +117,12 @@ class PipelineComponent(ABC):
|
|
|
92
117
|
"""
|
|
93
118
|
raise NotImplementedError()
|
|
94
119
|
|
|
120
|
+
def _pass_datapoint(self, dp: Image) -> None:
|
|
121
|
+
self.dp_manager.datapoint = dp
|
|
122
|
+
if not self.filter_func(dp):
|
|
123
|
+
self.serve(dp)
|
|
124
|
+
|
|
125
|
+
|
|
95
126
|
def pass_datapoint(self, dp: Image) -> Image:
|
|
96
127
|
"""
|
|
97
128
|
Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
|
|
@@ -103,11 +134,9 @@ class PipelineComponent(ABC):
|
|
|
103
134
|
"""
|
|
104
135
|
if self.timer_on:
|
|
105
136
|
with timed_operation(self.__class__.__name__):
|
|
106
|
-
self.
|
|
107
|
-
self.serve(dp)
|
|
137
|
+
self._pass_datapoint(dp)
|
|
108
138
|
else:
|
|
109
|
-
self.
|
|
110
|
-
self.serve(dp)
|
|
139
|
+
self._pass_datapoint(dp)
|
|
111
140
|
return self.dp_manager.datapoint
|
|
112
141
|
|
|
113
142
|
def predict_dataflow(self, df: DataFlow) -> DataFlow:
|
|
@@ -205,6 +234,7 @@ class Pipeline(ABC):
|
|
|
205
234
|
|
|
206
235
|
**Example:**
|
|
207
236
|
|
|
237
|
+
```python
|
|
208
238
|
layout = LayoutPipeComponent(layout_detector ...)
|
|
209
239
|
text = TextExtractPipeComponent(text_detector ...)
|
|
210
240
|
simple_pipe = MyPipeline(pipeline_component = [layout, text])
|
|
@@ -212,6 +242,7 @@ class Pipeline(ABC):
|
|
|
212
242
|
|
|
213
243
|
for page in doc_dataflow:
|
|
214
244
|
print(page)
|
|
245
|
+
```
|
|
215
246
|
|
|
216
247
|
In doing so, page contains all document structures determined via the pipeline (either directly from the Image core
|
|
217
248
|
model or already processed further).
|
|
@@ -225,10 +256,12 @@ class Pipeline(ABC):
|
|
|
225
256
|
|
|
226
257
|
**Example:**
|
|
227
258
|
|
|
259
|
+
```python
|
|
228
260
|
pipe = MyPipeline(pipeline_component = [layout, text])
|
|
229
261
|
pipe.set_session_id = True
|
|
230
262
|
|
|
231
263
|
df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
|
|
264
|
+
```
|
|
232
265
|
"""
|
|
233
266
|
|
|
234
267
|
def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
|
|
@@ -349,8 +349,8 @@ class AnnotationNmsService(PipelineComponent):
|
|
|
349
349
|
def __init__(
|
|
350
350
|
self,
|
|
351
351
|
nms_pairs: Sequence[Sequence[TypeOrStr]],
|
|
352
|
-
thresholds: Union[float,
|
|
353
|
-
priority: Optional[
|
|
352
|
+
thresholds: Union[float, Sequence[float]],
|
|
353
|
+
priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
|
|
354
354
|
):
|
|
355
355
|
"""
|
|
356
356
|
:param nms_pairs: Groups of categories, either as string or by `ObjectType`.
|
|
@@ -362,7 +362,7 @@ class AnnotationNmsService(PipelineComponent):
|
|
|
362
362
|
self.threshold = [thresholds for _ in self.nms_pairs]
|
|
363
363
|
else:
|
|
364
364
|
assert len(self.nms_pairs) == len(thresholds), "Sequences of nms_pairs and thresholds must have same length"
|
|
365
|
-
self.threshold = thresholds
|
|
365
|
+
self.threshold = thresholds # type: ignore
|
|
366
366
|
if priority:
|
|
367
367
|
assert len(self.nms_pairs) == len(priority), "Sequences of nms_pairs and priority must have same length"
|
|
368
368
|
|
|
@@ -73,7 +73,7 @@ class DetrDerivedTrainer(Trainer):
|
|
|
73
73
|
model: Union[PreTrainedModel, nn.Module],
|
|
74
74
|
args: TrainingArguments,
|
|
75
75
|
data_collator: DetrDataCollator,
|
|
76
|
-
train_dataset:
|
|
76
|
+
train_dataset: DatasetAdapter,
|
|
77
77
|
):
|
|
78
78
|
self.evaluator: Optional[Evaluator] = None
|
|
79
79
|
self.build_eval_kwargs: Optional[dict[str, Any]] = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.39
|
|
3
|
+
Version: 0.39.1
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
|
|
|
61
61
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
62
62
|
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
63
63
|
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
64
|
-
Requires-Dist: fasttext
|
|
64
|
+
Requires-Dist: fasttext-wheel; extra == "tf"
|
|
65
65
|
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
66
66
|
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
67
67
|
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
|
86
86
|
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
87
87
|
Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
88
88
|
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
89
|
-
Requires-Dist: transformers>=4.
|
|
89
|
+
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
90
90
|
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
91
91
|
Requires-Dist: python-doctr==0.8.1; extra == "pt"
|
|
92
92
|
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
93
93
|
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
94
|
-
Requires-Dist: fasttext
|
|
94
|
+
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
95
95
|
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
96
96
|
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
97
97
|
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
|
99
99
|
Provides-Extra: docs
|
|
100
100
|
Requires-Dist: tensorpack==0.11; extra == "docs"
|
|
101
101
|
Requires-Dist: boto3==1.34.102; extra == "docs"
|
|
102
|
-
Requires-Dist: transformers>=4.
|
|
102
|
+
Requires-Dist: transformers>=4.48.0; extra == "docs"
|
|
103
103
|
Requires-Dist: accelerate>=0.29.1; extra == "docs"
|
|
104
104
|
Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
|
|
105
105
|
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
@@ -36,7 +36,7 @@ types-urllib3>=1.26.25.14
|
|
|
36
36
|
[docs]
|
|
37
37
|
tensorpack==0.11
|
|
38
38
|
boto3==1.34.102
|
|
39
|
-
transformers>=4.
|
|
39
|
+
transformers>=4.48.0
|
|
40
40
|
accelerate>=0.29.1
|
|
41
41
|
pdfplumber>=0.11.0
|
|
42
42
|
lxml>=4.9.1
|
|
@@ -67,12 +67,12 @@ termcolor>=1.1
|
|
|
67
67
|
tabulate>=0.7.7
|
|
68
68
|
tqdm==4.64.0
|
|
69
69
|
timm>=0.9.16
|
|
70
|
-
transformers>=4.
|
|
70
|
+
transformers>=4.48.0
|
|
71
71
|
accelerate>=0.29.1
|
|
72
72
|
python-doctr==0.8.1
|
|
73
73
|
boto3==1.34.102
|
|
74
74
|
pdfplumber>=0.11.0
|
|
75
|
-
fasttext
|
|
75
|
+
fasttext-wheel
|
|
76
76
|
jdeskew>=0.2.2
|
|
77
77
|
apted==1.0.3
|
|
78
78
|
distance==0.1.3
|
|
@@ -109,7 +109,7 @@ python-doctr==0.8.1
|
|
|
109
109
|
pycocotools>=2.0.2
|
|
110
110
|
boto3==1.34.102
|
|
111
111
|
pdfplumber>=0.11.0
|
|
112
|
-
fasttext
|
|
112
|
+
fasttext-wheel
|
|
113
113
|
jdeskew>=0.2.2
|
|
114
114
|
apted==1.0.3
|
|
115
115
|
distance==0.1.3
|
|
@@ -78,7 +78,7 @@ _DEPS = [
|
|
|
78
78
|
"tensorpack==0.11",
|
|
79
79
|
# PyTorch related dependencies
|
|
80
80
|
"timm>=0.9.16",
|
|
81
|
-
"transformers>=4.
|
|
81
|
+
"transformers>=4.48.0",
|
|
82
82
|
"accelerate>=0.29.1",
|
|
83
83
|
# As maintenance of Detectron2 decreases, we will now use our own Fork the keep updating after rigorous testing.
|
|
84
84
|
# This will hopefully prevent from issues like 233
|
|
@@ -91,6 +91,7 @@ _DEPS = [
|
|
|
91
91
|
"tf2onnx>=1.9.2",
|
|
92
92
|
"python-doctr==0.8.1",
|
|
93
93
|
"fasttext==0.9.2",
|
|
94
|
+
"fasttext-wheel",
|
|
94
95
|
# dev dependencies
|
|
95
96
|
"python-dotenv==1.0.0",
|
|
96
97
|
"click", # version will not break black
|
|
@@ -147,7 +148,7 @@ dist_deps = deps_list(
|
|
|
147
148
|
additional_deps = deps_list(
|
|
148
149
|
"boto3",
|
|
149
150
|
"pdfplumber",
|
|
150
|
-
"fasttext",
|
|
151
|
+
"fasttext-wheel",
|
|
151
152
|
"jdeskew",
|
|
152
153
|
"apted",
|
|
153
154
|
"distance",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/doclaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/fintabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/iiitar13k.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/layouttest.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/publaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtables1m.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/config.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py
RENAMED
|
File without changes
|
{deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -101,7 +101,6 @@ class DocumentType(ObjectTypes):
|
|
|
101
101
|
GOVERNMENT_TENDERS = "government_tenders"
|
|
102
102
|
MANUALS = "manuals"
|
|
103
103
|
PATENTS = "patents"
|
|
104
|
-
MARK = "mark"
|
|
105
104
|
|
|
106
105
|
|
|
107
106
|
@object_types_registry.register("LayoutType")
|
|
@@ -132,6 +131,7 @@ class LayoutType(ObjectTypes):
|
|
|
132
131
|
PAGE_NUMBER = "page_number"
|
|
133
132
|
KEY_VALUE_AREA = "key_value_area"
|
|
134
133
|
LIST_ITEM = "list_item"
|
|
134
|
+
MARK = "mark"
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
@object_types_registry.register("TableType")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|