deepdoctection 0.41.0__tar.gz → 0.42.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/PKG-INFO +1 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/__init__.py +3 -2
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/analyzer/_config.py +4 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/analyzer/dd.py +7 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/analyzer/factory.py +42 -5
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/view.py +52 -15
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/doctrocr.py +2 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/d2struct.py +64 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/tpstruct.py +30 -6
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/common.py +56 -31
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/segment.py +4 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/train/hf_detr_train.py +1 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/PKG-INFO +1 -1
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/LICENSE +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/README.md +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datapoint/image.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/settings.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/requires.txt +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/setup.cfg +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/setup.py +0 -0
- {deepdoctection-0.41.0 → deepdoctection-0.42.0}/tests/test_utils.py +0 -0
|
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
|
|
|
25
25
|
|
|
26
26
|
# pylint: enable=wrong-import-position
|
|
27
27
|
|
|
28
|
-
__version__ = "0.
|
|
28
|
+
__version__ = "0.42.0"
|
|
29
29
|
|
|
30
30
|
_IMPORT_STRUCTURE = {
|
|
31
31
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
|
|
@@ -95,6 +95,7 @@ _IMPORT_STRUCTURE = {
|
|
|
95
95
|
"Image",
|
|
96
96
|
"Word",
|
|
97
97
|
"Layout",
|
|
98
|
+
"List",
|
|
98
99
|
"Cell",
|
|
99
100
|
"Table",
|
|
100
101
|
"Page",
|
|
@@ -440,7 +441,7 @@ if TYPE_CHECKING:
|
|
|
440
441
|
from .eval import *
|
|
441
442
|
from .extern import * # type: ignore
|
|
442
443
|
from .mapper import * # type: ignore
|
|
443
|
-
from .pipe import *
|
|
444
|
+
from .pipe import * # type: ignore
|
|
444
445
|
from .train import *
|
|
445
446
|
from .utils import *
|
|
446
447
|
|
|
@@ -40,7 +40,7 @@ cfg.TF.CELL.FILTER = None
|
|
|
40
40
|
cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
|
|
41
41
|
cfg.TF.ITEM.FILTER = None
|
|
42
42
|
|
|
43
|
-
cfg.PT.ENFORCE_WEIGHTS =
|
|
43
|
+
cfg.PT.ENFORCE_WEIGHTS.LAYOUT = True
|
|
44
44
|
cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
|
|
45
45
|
cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
|
|
46
46
|
cfg.PT.LAYOUT.FILTER = None
|
|
@@ -49,6 +49,7 @@ cfg.PT.LAYOUT.PAD.RIGHT = 60
|
|
|
49
49
|
cfg.PT.LAYOUT.PAD.BOTTOM = 60
|
|
50
50
|
cfg.PT.LAYOUT.PAD.LEFT = 60
|
|
51
51
|
|
|
52
|
+
cfg.PT.ENFORCE_WEIGHTS.ITEM = True
|
|
52
53
|
cfg.PT.ITEM.WEIGHTS = "item/d2_model_1639999_item_inf_only.pt"
|
|
53
54
|
cfg.PT.ITEM.WEIGHTS_TS = "item/d2_model_1639999_item_inf_only.ts"
|
|
54
55
|
cfg.PT.ITEM.FILTER = None
|
|
@@ -57,6 +58,7 @@ cfg.PT.ITEM.PAD.RIGHT = 60
|
|
|
57
58
|
cfg.PT.ITEM.PAD.BOTTOM = 60
|
|
58
59
|
cfg.PT.ITEM.PAD.LEFT = 60
|
|
59
60
|
|
|
61
|
+
cfg.PT.ENFORCE_WEIGHTS.CELL = True
|
|
60
62
|
cfg.PT.CELL.WEIGHTS = "cell/d2_model_1849999_cell_inf_only.pt"
|
|
61
63
|
cfg.PT.CELL.WEIGHTS_TS = "cell/d2_model_1849999_cell_inf_only.ts"
|
|
62
64
|
cfg.PT.CELL.FILTER = None
|
|
@@ -137,6 +139,7 @@ cfg.TEXT_ORDERING.HEIGHT_TOLERANCE = 2.0
|
|
|
137
139
|
cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
|
|
138
140
|
|
|
139
141
|
cfg.USE_LAYOUT_LINK = False
|
|
142
|
+
cfg.USE_LINE_MATCHER = False
|
|
140
143
|
cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = []
|
|
141
144
|
cfg.LAYOUT_LINK.CHILD_CATEGORIES = []
|
|
142
145
|
|
|
@@ -32,7 +32,7 @@ from ..extern.pt.ptutils import get_torch_device
|
|
|
32
32
|
from ..extern.tp.tfutils import disable_tp_layer_logging, get_tf_device
|
|
33
33
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
34
34
|
from ..utils.env_info import ENV_VARS_TRUE
|
|
35
|
-
from ..utils.file_utils import tensorpack_available
|
|
35
|
+
from ..utils.file_utils import tensorpack_available, detectron2_available
|
|
36
36
|
from ..utils.fs import get_configs_dir_path, get_package_path, maybe_copy_config_to_cache
|
|
37
37
|
from ..utils.logger import LoggingRecord, logger
|
|
38
38
|
from ..utils.metacfg import set_config_by_yaml
|
|
@@ -140,6 +140,12 @@ def get_dd_analyzer(
|
|
|
140
140
|
cfg.LANGUAGE = None
|
|
141
141
|
cfg.LIB = lib
|
|
142
142
|
cfg.DEVICE = device
|
|
143
|
+
if not detectron2_available() or cfg.PT.LAYOUT.WEIGHTS is None:
|
|
144
|
+
cfg.PT.ENFORCE_WEIGHTS.LAYOUT=False
|
|
145
|
+
if not detectron2_available() or cfg.PT.ITEM.WEIGHTS is None:
|
|
146
|
+
cfg.PT.ENFORCE_WEIGHTS.ITEM=False
|
|
147
|
+
if not detectron2_available() or cfg.PT.CELL.WEIGHTS is None:
|
|
148
|
+
cfg.PT.ENFORCE_WEIGHTS.CELL=False
|
|
143
149
|
cfg.freeze()
|
|
144
150
|
|
|
145
151
|
if config_overwrite:
|
|
@@ -50,7 +50,6 @@ from ..pipe.sub_layout import DetectResultGenerator, SubImageLayoutService
|
|
|
50
50
|
from ..pipe.text import TextExtractionService
|
|
51
51
|
from ..pipe.transform import SimpleTransformService
|
|
52
52
|
from ..utils.error import DependencyError
|
|
53
|
-
from ..utils.file_utils import detectron2_available
|
|
54
53
|
from ..utils.fs import get_configs_dir_path
|
|
55
54
|
from ..utils.metacfg import AttrDict
|
|
56
55
|
from ..utils.settings import CellType, LayoutType, Relationships
|
|
@@ -96,12 +95,13 @@ class ServiceFactory:
|
|
|
96
95
|
"""
|
|
97
96
|
if config.LIB is None:
|
|
98
97
|
raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
|
|
98
|
+
|
|
99
99
|
weights = (
|
|
100
100
|
getattr(config.TF, mode).WEIGHTS
|
|
101
101
|
if config.LIB == "TF"
|
|
102
102
|
else (
|
|
103
103
|
getattr(config.PT, mode).WEIGHTS
|
|
104
|
-
if
|
|
104
|
+
if getattr(config.PT.ENFORCE_WEIGHTS,mode)
|
|
105
105
|
else getattr(config.PT, mode).WEIGHTS_TS
|
|
106
106
|
)
|
|
107
107
|
)
|
|
@@ -240,8 +240,6 @@ class ServiceFactory:
|
|
|
240
240
|
|
|
241
241
|
:param config: configuration object
|
|
242
242
|
"""
|
|
243
|
-
if not detectron2_available() and config.LIB == "PT":
|
|
244
|
-
raise ModuleNotFoundError("LAYOUT_NMS_PAIRS is only available for detectron2")
|
|
245
243
|
if not isinstance(config.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
|
|
246
244
|
config.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
|
|
247
245
|
):
|
|
@@ -577,7 +575,14 @@ class ServiceFactory:
|
|
|
577
575
|
parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
|
|
578
576
|
child_categories=config.TEXT_CONTAINER,
|
|
579
577
|
relationship_key=Relationships.CHILD,
|
|
580
|
-
)
|
|
578
|
+
),
|
|
579
|
+
FamilyCompound(
|
|
580
|
+
parent_categories=[LayoutType.LIST],
|
|
581
|
+
child_categories=[LayoutType.LIST_ITEM],
|
|
582
|
+
relationship_key=Relationships.CHILD,
|
|
583
|
+
create_synthetic_parent=True,
|
|
584
|
+
synthetic_parent=LayoutType.LIST,
|
|
585
|
+
),
|
|
581
586
|
]
|
|
582
587
|
return MatchingService(
|
|
583
588
|
family_compounds=family_compounds,
|
|
@@ -622,6 +627,34 @@ class ServiceFactory:
|
|
|
622
627
|
"""
|
|
623
628
|
return ServiceFactory._build_layout_link_matching_service(config)
|
|
624
629
|
|
|
630
|
+
@staticmethod
|
|
631
|
+
def _build_line_matching_service(config: AttrDict) -> MatchingService:
|
|
632
|
+
matcher = IntersectionMatcher(
|
|
633
|
+
matching_rule=config.WORD_MATCHING.RULE,
|
|
634
|
+
threshold=config.WORD_MATCHING.THRESHOLD,
|
|
635
|
+
max_parent_only=config.WORD_MATCHING.MAX_PARENT_ONLY,
|
|
636
|
+
)
|
|
637
|
+
family_compounds = [
|
|
638
|
+
FamilyCompound(
|
|
639
|
+
parent_categories=[LayoutType.LIST],
|
|
640
|
+
child_categories=[LayoutType.LINE],
|
|
641
|
+
relationship_key=Relationships.CHILD,
|
|
642
|
+
),
|
|
643
|
+
]
|
|
644
|
+
return MatchingService(
|
|
645
|
+
family_compounds=family_compounds,
|
|
646
|
+
matcher=matcher,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
@staticmethod
|
|
650
|
+
def build_line_matching_service(config: AttrDict) -> MatchingService:
|
|
651
|
+
"""Building a word matching service
|
|
652
|
+
|
|
653
|
+
:param config: configuration object
|
|
654
|
+
:return: MatchingService
|
|
655
|
+
"""
|
|
656
|
+
return ServiceFactory._build_line_matching_service(config)
|
|
657
|
+
|
|
625
658
|
@staticmethod
|
|
626
659
|
def _build_text_order_service(config: AttrDict) -> TextOrderService:
|
|
627
660
|
"""Building a text order service
|
|
@@ -748,6 +781,10 @@ class ServiceFactory:
|
|
|
748
781
|
layout_link_matching_service = ServiceFactory.build_layout_link_matching_service(config)
|
|
749
782
|
pipe_component_list.append(layout_link_matching_service)
|
|
750
783
|
|
|
784
|
+
if config.USE_LINE_MATCHER:
|
|
785
|
+
line_list_matching_service = ServiceFactory.build_line_matching_service(config)
|
|
786
|
+
pipe_component_list.append(line_list_matching_service)
|
|
787
|
+
|
|
751
788
|
page_parsing_service = ServiceFactory.build_page_parsing_service(config)
|
|
752
789
|
|
|
753
790
|
return DoctectionPipe(pipeline_component_list=pipe_component_list, page_parsing_service=page_parsing_service)
|
|
@@ -25,7 +25,6 @@ from copy import copy
|
|
|
25
25
|
from typing import Any, Mapping, Optional, Sequence, Type, TypedDict, Union, no_type_check
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
|
-
from typing_extensions import LiteralString
|
|
29
28
|
|
|
30
29
|
from ..utils.error import AnnotationError, ImageError
|
|
31
30
|
from ..utils.logger import LoggingRecord, log_once, logger
|
|
@@ -285,6 +284,52 @@ class Cell(Layout):
|
|
|
285
284
|
return set(CellType).union(super().get_attribute_names())
|
|
286
285
|
|
|
287
286
|
|
|
287
|
+
class List(Layout):
|
|
288
|
+
"""
|
|
289
|
+
List specific subclass of `ImageAnnotationBaseView` modelled by `LayoutType`.
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def words(self) -> list[ImageAnnotationBaseView]:
|
|
294
|
+
"""
|
|
295
|
+
Get a list of `ImageAnnotationBaseView` objects with `LayoutType` defined by `text_container`.
|
|
296
|
+
It will only select those among all annotations that have an entry in `Relationships.child` .
|
|
297
|
+
"""
|
|
298
|
+
all_words: list[ImageAnnotationBaseView] = []
|
|
299
|
+
|
|
300
|
+
for list_item in self.list_items:
|
|
301
|
+
all_words.extend(list_item.words) # type: ignore
|
|
302
|
+
return all_words
|
|
303
|
+
|
|
304
|
+
def get_ordered_words(self) -> list[ImageAnnotationBaseView]:
|
|
305
|
+
"""Returns a list of words order by reading order. Words with no reading order will not be returned"""
|
|
306
|
+
try:
|
|
307
|
+
list_items = self.list_items
|
|
308
|
+
all_words = []
|
|
309
|
+
list_items.sort(key=lambda x: x.bbox[1])
|
|
310
|
+
for list_item in list_items:
|
|
311
|
+
all_words.extend(list_item.get_ordered_words()) # type: ignore
|
|
312
|
+
return all_words
|
|
313
|
+
except (TypeError, AnnotationError):
|
|
314
|
+
return super().get_ordered_words()
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def list_items(self) -> list[ImageAnnotationBaseView]:
|
|
318
|
+
"""
|
|
319
|
+
A list of a list items.
|
|
320
|
+
"""
|
|
321
|
+
all_relation_ids = self.get_relationship(Relationships.CHILD)
|
|
322
|
+
list_items = self.base_page.get_annotation(
|
|
323
|
+
annotation_ids=all_relation_ids,
|
|
324
|
+
category_names=(
|
|
325
|
+
LayoutType.LIST_ITEM,
|
|
326
|
+
LayoutType.LINE,
|
|
327
|
+
),
|
|
328
|
+
)
|
|
329
|
+
list_items.sort(key=lambda x: x.bbox[1])
|
|
330
|
+
return list_items
|
|
331
|
+
|
|
332
|
+
|
|
288
333
|
class Table(Layout):
|
|
289
334
|
"""
|
|
290
335
|
Table specific sub class of `ImageAnnotationBaseView` modelled by `TableType`.
|
|
@@ -372,7 +417,7 @@ class Table(Layout):
|
|
|
372
417
|
category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
|
|
373
418
|
)
|
|
374
419
|
row_cells = list(
|
|
375
|
-
filter(lambda c: row_number
|
|
420
|
+
filter(lambda c: c.row_number <= row_number <= c.row_number + c.row_span - 1, all_cells) # type: ignore
|
|
376
421
|
)
|
|
377
422
|
row_cells.sort(key=lambda c: c.column_number) # type: ignore
|
|
378
423
|
column_header_cells = self.column_header_cells
|
|
@@ -560,6 +605,7 @@ IMAGE_ANNOTATION_TO_LAYOUTS: dict[ObjectTypes, Type[Union[Layout, Table, Word]]]
|
|
|
560
605
|
LayoutType.TABLE_ROTATED: Table,
|
|
561
606
|
LayoutType.WORD: Word,
|
|
562
607
|
LayoutType.CELL: Cell,
|
|
608
|
+
LayoutType.LIST: List,
|
|
563
609
|
CellType.SPANNING: Cell,
|
|
564
610
|
CellType.ROW_HEADER: Cell,
|
|
565
611
|
CellType.COLUMN_HEADER: Cell,
|
|
@@ -573,6 +619,7 @@ class ImageDefaults(TypedDict):
|
|
|
573
619
|
text_container: LayoutType
|
|
574
620
|
floating_text_block_categories: tuple[Union[LayoutType, CellType], ...]
|
|
575
621
|
text_block_categories: tuple[Union[LayoutType, CellType], ...]
|
|
622
|
+
residual_layouts: tuple[LayoutType, ...]
|
|
576
623
|
|
|
577
624
|
|
|
578
625
|
IMAGE_DEFAULTS: ImageDefaults = {
|
|
@@ -591,6 +638,7 @@ IMAGE_DEFAULTS: ImageDefaults = {
|
|
|
591
638
|
LayoutType.FIGURE,
|
|
592
639
|
CellType.SPANNING,
|
|
593
640
|
),
|
|
641
|
+
"residual_layouts": (LayoutType.LINE,),
|
|
594
642
|
}
|
|
595
643
|
|
|
596
644
|
|
|
@@ -770,19 +818,8 @@ class Page(Image):
|
|
|
770
818
|
"""
|
|
771
819
|
return self.get_annotation(category_names=self._get_residual_layout())
|
|
772
820
|
|
|
773
|
-
def _get_residual_layout(self) ->
|
|
774
|
-
|
|
775
|
-
layouts.extend(
|
|
776
|
-
[
|
|
777
|
-
LayoutType.TABLE,
|
|
778
|
-
LayoutType.FIGURE,
|
|
779
|
-
self.text_container,
|
|
780
|
-
LayoutType.CELL,
|
|
781
|
-
LayoutType.ROW,
|
|
782
|
-
LayoutType.COLUMN,
|
|
783
|
-
]
|
|
784
|
-
)
|
|
785
|
-
return [layout for layout in LayoutType if layout not in layouts]
|
|
821
|
+
def _get_residual_layout(self) -> tuple[LayoutType, ...]:
|
|
822
|
+
return IMAGE_DEFAULTS["residual_layouts"]
|
|
786
823
|
|
|
787
824
|
@classmethod
|
|
788
825
|
def from_image(
|
|
@@ -424,7 +424,8 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
424
424
|
custom_configs.pop("task", None)
|
|
425
425
|
recognition_configs["mean"] = custom_configs.pop("mean")
|
|
426
426
|
recognition_configs["std"] = custom_configs.pop("std")
|
|
427
|
-
batch_size
|
|
427
|
+
if "batch_size" in custom_configs:
|
|
428
|
+
batch_size = custom_configs.pop("batch_size")
|
|
428
429
|
recognition_configs["batch_size"] = batch_size
|
|
429
430
|
|
|
430
431
|
if isinstance(architecture, str):
|
|
@@ -102,7 +102,7 @@ def image_to_d2_frcnn_training(
|
|
|
102
102
|
return output
|
|
103
103
|
|
|
104
104
|
|
|
105
|
-
def
|
|
105
|
+
def pt_nms_image_annotations_depr(
|
|
106
106
|
anns: Sequence[ImageAnnotation], threshold: float, image_id: Optional[str] = None, prio: str = ""
|
|
107
107
|
) -> Sequence[str]:
|
|
108
108
|
"""
|
|
@@ -147,6 +147,69 @@ def pt_nms_image_annotations(
|
|
|
147
147
|
return []
|
|
148
148
|
|
|
149
149
|
|
|
150
|
+
def pt_nms_image_annotations(
|
|
151
|
+
anns: Sequence[ImageAnnotation], threshold: float, image_id: Optional[str] = None, prio: str = ""
|
|
152
|
+
) -> Sequence[str]:
|
|
153
|
+
"""
|
|
154
|
+
Processing given image annotations through NMS. This is useful, if you want to supress some specific image
|
|
155
|
+
annotation, e.g. given by name or returned through different predictors. This is the pt version, for tf check
|
|
156
|
+
`mapper.tpstruct`
|
|
157
|
+
|
|
158
|
+
:param anns: A sequence of ImageAnnotations. All annotations will be treated as if they belong to one category
|
|
159
|
+
:param threshold: NMS threshold
|
|
160
|
+
:param image_id: id in order to get the embedding bounding box
|
|
161
|
+
:param prio: If an annotation has prio, it will overwrite its given score to 1 so that it will never be suppressed
|
|
162
|
+
:return: A list of annotation_ids that belong to the given input sequence and that survive the NMS process
|
|
163
|
+
"""
|
|
164
|
+
if len(anns) == 1:
|
|
165
|
+
return [anns[0].annotation_id]
|
|
166
|
+
|
|
167
|
+
if not anns:
|
|
168
|
+
return []
|
|
169
|
+
|
|
170
|
+
# First, identify priority annotations that should always be kept
|
|
171
|
+
priority_ann_ids = []
|
|
172
|
+
|
|
173
|
+
if prio:
|
|
174
|
+
for ann in anns:
|
|
175
|
+
if ann.category_name == prio:
|
|
176
|
+
priority_ann_ids.append(ann.annotation_id)
|
|
177
|
+
|
|
178
|
+
# If all annotations are priority or none are left for NMS, return all priority IDs
|
|
179
|
+
if len(priority_ann_ids) == len(anns):
|
|
180
|
+
return priority_ann_ids
|
|
181
|
+
|
|
182
|
+
def priority_to_confidence(ann: ImageAnnotation, priority: str) -> float:
|
|
183
|
+
if ann.category_name == priority:
|
|
184
|
+
return 1.0
|
|
185
|
+
if ann.score:
|
|
186
|
+
return ann.score
|
|
187
|
+
raise ValueError("score cannot be None")
|
|
188
|
+
|
|
189
|
+
# Perform NMS only on non-priority annotations
|
|
190
|
+
ann_ids = np.array([ann.annotation_id for ann in anns], dtype="object")
|
|
191
|
+
|
|
192
|
+
# Get boxes for non-priority annotations
|
|
193
|
+
boxes = torch.tensor(
|
|
194
|
+
[ann.get_bounding_box(image_id).to_list(mode="xyxy") for ann in anns if ann.bounding_box is not None]
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
scores = torch.tensor([priority_to_confidence(ann, prio) for ann in anns])
|
|
198
|
+
class_mask = torch.ones(len(boxes), dtype=torch.uint8)
|
|
199
|
+
|
|
200
|
+
keep = batched_nms(boxes, scores, class_mask, threshold)
|
|
201
|
+
kept_ids = ann_ids[keep]
|
|
202
|
+
|
|
203
|
+
# Convert to list if necessary
|
|
204
|
+
if isinstance(kept_ids, str):
|
|
205
|
+
kept_ids = [kept_ids]
|
|
206
|
+
elif not isinstance(kept_ids, list):
|
|
207
|
+
kept_ids = kept_ids.tolist()
|
|
208
|
+
|
|
209
|
+
# Combine priority annotations with surviving non-priority annotations
|
|
210
|
+
return list(set(priority_ann_ids + kept_ids))
|
|
211
|
+
|
|
212
|
+
|
|
150
213
|
def _get_category_attributes(
|
|
151
214
|
ann: ImageAnnotation, cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None
|
|
152
215
|
) -> tuple[ObjectTypes, int, Optional[float]]:
|
|
@@ -95,11 +95,21 @@ def tf_nms_image_annotations(
|
|
|
95
95
|
"""
|
|
96
96
|
if len(anns) == 1:
|
|
97
97
|
return [anns[0].annotation_id]
|
|
98
|
+
|
|
98
99
|
if not anns:
|
|
99
100
|
return []
|
|
100
|
-
ann_ids = np.array([ann.annotation_id for ann in anns], dtype="object")
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
# First, identify priority annotations that should always be kept
|
|
103
|
+
priority_ann_ids = []
|
|
104
|
+
|
|
105
|
+
if prio:
|
|
106
|
+
for ann in anns:
|
|
107
|
+
if ann.category_name == prio:
|
|
108
|
+
priority_ann_ids.append(ann.annotation_id)
|
|
109
|
+
|
|
110
|
+
# If all annotations are priority or none are left for NMS, return all priority IDs
|
|
111
|
+
if len(priority_ann_ids) == len(anns):
|
|
112
|
+
return priority_ann_ids
|
|
103
113
|
|
|
104
114
|
def priority_to_confidence(ann: ImageAnnotation, priority: str) -> float:
|
|
105
115
|
if ann.category_name == priority:
|
|
@@ -108,10 +118,24 @@ def tf_nms_image_annotations(
|
|
|
108
118
|
return ann.score
|
|
109
119
|
raise ValueError("score cannot be None")
|
|
110
120
|
|
|
121
|
+
# Perform NMS only on non-priority annotations
|
|
122
|
+
ann_ids = np.array([ann.annotation_id for ann in anns], dtype="object")
|
|
123
|
+
|
|
124
|
+
# Get boxes for non-priority annotations
|
|
125
|
+
boxes = convert_to_tensor([ann.get_bounding_box(image_id).to_list(mode="xyxy") for ann in anns if ann.bounding_box
|
|
126
|
+
is not None])
|
|
127
|
+
|
|
111
128
|
scores = convert_to_tensor([priority_to_confidence(ann, prio) for ann in anns])
|
|
112
129
|
class_mask = convert_to_tensor(len(boxes), dtype=uint8)
|
|
130
|
+
|
|
113
131
|
keep = non_max_suppression(boxes, scores, class_mask, iou_threshold=threshold)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
132
|
+
kept_ids = ann_ids[keep]
|
|
133
|
+
|
|
134
|
+
# Convert to list if necessary
|
|
135
|
+
if isinstance(kept_ids, str):
|
|
136
|
+
kept_ids = [kept_ids]
|
|
137
|
+
elif not isinstance(kept_ids, list):
|
|
138
|
+
kept_ids = kept_ids.tolist()
|
|
139
|
+
|
|
140
|
+
# Combine priority annotations with surviving non-priority annotations
|
|
141
|
+
return list(set(priority_ann_ids + kept_ids))
|
|
@@ -30,6 +30,7 @@ import numpy as np
|
|
|
30
30
|
from ..dataflow import DataFlow, MapData
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
32
|
from ..datapoint.view import IMAGE_DEFAULTS, Page
|
|
33
|
+
from ..extern.base import DetectionResult
|
|
33
34
|
from ..mapper.match import match_anns_by_distance, match_anns_by_intersection
|
|
34
35
|
from ..mapper.misc import to_image
|
|
35
36
|
from ..utils.settings import LayoutType, ObjectTypes, Relationships, TypeOrStr, get_type
|
|
@@ -51,9 +52,9 @@ class ImageCroppingService(PipelineComponent):
|
|
|
51
52
|
"""
|
|
52
53
|
|
|
53
54
|
def __init__(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
self,
|
|
56
|
+
category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
57
|
+
service_ids: Optional[Sequence[str]] = None,
|
|
57
58
|
) -> None:
|
|
58
59
|
"""
|
|
59
60
|
:param category_names: A single name or a list of category names to crop
|
|
@@ -106,11 +107,11 @@ class IntersectionMatcher:
|
|
|
106
107
|
"""
|
|
107
108
|
|
|
108
109
|
def __init__(
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
110
|
+
self,
|
|
111
|
+
matching_rule: Literal["iou", "ioa"],
|
|
112
|
+
threshold: float,
|
|
113
|
+
use_weighted_intersections: bool = False,
|
|
114
|
+
max_parent_only: bool = False,
|
|
114
115
|
) -> None:
|
|
115
116
|
"""
|
|
116
117
|
:param matching_rule: "iou" or "ioa"
|
|
@@ -130,12 +131,12 @@ class IntersectionMatcher:
|
|
|
130
131
|
self.max_parent_only = max_parent_only
|
|
131
132
|
|
|
132
133
|
def match(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
134
|
+
self,
|
|
135
|
+
dp: Image,
|
|
136
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
137
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
138
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
139
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
139
140
|
) -> list[tuple[str, str]]:
|
|
140
141
|
"""
|
|
141
142
|
The matching algorithm
|
|
@@ -188,12 +189,12 @@ class NeighbourMatcher:
|
|
|
188
189
|
"""
|
|
189
190
|
|
|
190
191
|
def match(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
192
|
+
self,
|
|
193
|
+
dp: Image,
|
|
194
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
195
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
196
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
197
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
197
198
|
) -> list[tuple[str, str]]:
|
|
198
199
|
"""
|
|
199
200
|
The matching algorithm
|
|
@@ -233,6 +234,8 @@ class FamilyCompound:
|
|
|
233
234
|
child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
|
|
234
235
|
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
235
236
|
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
237
|
+
create_synthetic_parent: bool = field(default=False)
|
|
238
|
+
synthetic_parent: Optional[ObjectTypes] = field(default=None)
|
|
236
239
|
|
|
237
240
|
def __post_init__(self) -> None:
|
|
238
241
|
if isinstance(self.parent_categories, str):
|
|
@@ -257,9 +260,9 @@ class MatchingService(PipelineComponent):
|
|
|
257
260
|
"""
|
|
258
261
|
|
|
259
262
|
def __init__(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
+
self,
|
|
264
|
+
family_compounds: Sequence[FamilyCompound],
|
|
265
|
+
matcher: Union[IntersectionMatcher, NeighbourMatcher],
|
|
263
266
|
) -> None:
|
|
264
267
|
"""
|
|
265
268
|
:param family_compounds: A list of FamilyCompounds
|
|
@@ -287,6 +290,28 @@ class MatchingService(PipelineComponent):
|
|
|
287
290
|
|
|
288
291
|
for pair in matched_pairs:
|
|
289
292
|
self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
|
|
293
|
+
if family_compound.synthetic_parent:
|
|
294
|
+
parent_anns = dp.get_annotation(category_names=family_compound.parent_categories)
|
|
295
|
+
child_anns = dp.get_annotation(category_names=family_compound.child_categories)
|
|
296
|
+
child_ann_ids = []
|
|
297
|
+
for parent in parent_anns:
|
|
298
|
+
if family_compound.relationship_key in parent.relationships:
|
|
299
|
+
child_ann_ids.extend(parent.get_relationship(family_compound.relationship_key))
|
|
300
|
+
detect_result_list = []
|
|
301
|
+
for child_ann in child_anns:
|
|
302
|
+
if child_ann.annotation_id not in child_ann_ids:
|
|
303
|
+
detect_result_list.append(DetectionResult(
|
|
304
|
+
class_name=family_compound.synthetic_parent,
|
|
305
|
+
box=child_ann.get_bounding_box(dp.image_id).to_list(mode="xyxy"),
|
|
306
|
+
absolute_coords=child_ann.get_bounding_box(dp.image_id).absolute_coords,
|
|
307
|
+
relationships={family_compound.relationship_key: child_ann.annotation_id}))
|
|
308
|
+
for detect_result in detect_result_list:
|
|
309
|
+
annotation_id = self.dp_manager.set_image_annotation(detect_result)
|
|
310
|
+
if annotation_id is not None and detect_result.relationships is not None:
|
|
311
|
+
self.dp_manager.set_relationship_annotation(family_compound.relationship_key,
|
|
312
|
+
annotation_id,
|
|
313
|
+
detect_result.relationships.get(
|
|
314
|
+
family_compound.relationship_key, None))
|
|
290
315
|
|
|
291
316
|
def clone(self) -> PipelineComponent:
|
|
292
317
|
return self.__class__(self.family_compounds, self.matcher)
|
|
@@ -316,10 +341,10 @@ class PageParsingService(PipelineComponent):
|
|
|
316
341
|
"""
|
|
317
342
|
|
|
318
343
|
def __init__(
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
344
|
+
self,
|
|
345
|
+
text_container: TypeOrStr,
|
|
346
|
+
floating_text_block_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
347
|
+
include_residual_text_container: bool = True,
|
|
323
348
|
):
|
|
324
349
|
"""
|
|
325
350
|
:param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
|
|
@@ -401,10 +426,10 @@ class AnnotationNmsService(PipelineComponent):
|
|
|
401
426
|
"""
|
|
402
427
|
|
|
403
428
|
def __init__(
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
429
|
+
self,
|
|
430
|
+
nms_pairs: Sequence[Sequence[TypeOrStr]],
|
|
431
|
+
thresholds: Union[float, Sequence[float]],
|
|
432
|
+
priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
|
|
408
433
|
):
|
|
409
434
|
"""
|
|
410
435
|
:param nms_pairs: Groups of categories, either as string or by `ObjectType`.
|
|
@@ -441,6 +441,7 @@ def segment_table(
|
|
|
441
441
|
matching_rule=segment_rule,
|
|
442
442
|
threshold=threshold_rows,
|
|
443
443
|
use_weighted_intersections=True,
|
|
444
|
+
# Rows and columns are child annotations of the table.
|
|
444
445
|
parent_ann_ids=child_ann_ids,
|
|
445
446
|
child_ann_ids=child_ann_ids,
|
|
446
447
|
)
|
|
@@ -452,6 +453,7 @@ def segment_table(
|
|
|
452
453
|
matching_rule=segment_rule,
|
|
453
454
|
threshold=threshold_cols,
|
|
454
455
|
use_weighted_intersections=True,
|
|
456
|
+
# Rows and columns are child annotations of the table.
|
|
455
457
|
parent_ann_ids=child_ann_ids,
|
|
456
458
|
child_ann_ids=child_ann_ids,
|
|
457
459
|
)
|
|
@@ -624,6 +626,7 @@ def segment_pubtables(
|
|
|
624
626
|
matching_rule=segment_rule,
|
|
625
627
|
threshold=threshold_rows,
|
|
626
628
|
use_weighted_intersections=True,
|
|
629
|
+
# Rows and columns are child annotations of the table.
|
|
627
630
|
parent_ann_ids=child_ann_ids,
|
|
628
631
|
child_ann_ids=child_ann_ids,
|
|
629
632
|
)
|
|
@@ -635,6 +638,7 @@ def segment_pubtables(
|
|
|
635
638
|
matching_rule=segment_rule,
|
|
636
639
|
threshold=threshold_cols,
|
|
637
640
|
use_weighted_intersections=True,
|
|
641
|
+
# Rows and columns are child annotations of the table.
|
|
638
642
|
parent_ann_ids=child_ann_ids,
|
|
639
643
|
child_ann_ids=child_ann_ids,
|
|
640
644
|
)
|
|
@@ -276,7 +276,7 @@ def train_hf_detr(
|
|
|
276
276
|
|
|
277
277
|
if path_weights != "":
|
|
278
278
|
model = TableTransformerForObjectDetection.from_pretrained(
|
|
279
|
-
pretrained_model_name_or_path=path_weights, config=config
|
|
279
|
+
pretrained_model_name_or_path=path_weights, config=config, ignore_mismatched_sizes=True
|
|
280
280
|
)
|
|
281
281
|
else:
|
|
282
282
|
model = TableTransformerForObjectDetection(config)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/doclaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/fintabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/iiitar13k.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/layouttest.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/publaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/pubtables1m.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/pubtabnet.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/rvlcdip.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/xsl/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py
RENAMED
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.41.0 → deepdoctection-0.42.0}/deepdoctection.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|