deepdoctection 1.0.7__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/PKG-INFO +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/pyproject.toml +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/__init__.py +2 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/config.py +37 -37
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/factory.py +13 -13
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/eval.py +7 -7
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/tedsmetric.py +4 -4
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/deskew.py +2 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/doctrocr.py +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hfdetr.py +20 -10
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hflm.py +1 -1
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/pdftext.py +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/tessocr.py +6 -6
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/texocr.py +4 -4
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/anngen.py +6 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/base.py +3 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/common.py +5 -5
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/doctectionpipe.py +31 -16
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/language.py +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/lm.py +24 -24
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/order.py +24 -24
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/refine.py +46 -46
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/segment.py +143 -130
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/sub_layout.py +2 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/text.py +6 -7
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/hf_detr_train.py +2 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/hf_layoutlm_train.py +21 -21
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/PKG-INFO +3 -3
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/requires.txt +2 -2
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/README.md +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/setup.cfg +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/profiles.jsonl +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/base.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/base.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/model.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/py.typed +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Repository for Document AI - server/inference core package
|
|
5
5
|
Author: Dr. Janis Meyer
|
|
6
6
|
License: Apache License 2.0
|
|
@@ -18,7 +18,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
18
18
|
Requires-Python: >=3.10
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Requires-Dist: dd-core[full]>=1.0.1
|
|
21
|
-
Requires-Dist: huggingface_hub>=
|
|
21
|
+
Requires-Dist: huggingface_hub>=1.0
|
|
22
22
|
Provides-Extra: full
|
|
23
23
|
Requires-Dist: dd-datasets[full]>=1.0.1; extra == "full"
|
|
24
24
|
Requires-Dist: boto3==1.34.102; extra == "full"
|
|
@@ -30,7 +30,7 @@ Requires-Dist: distance==0.1.3; extra == "full"
|
|
|
30
30
|
Requires-Dist: lxml>=4.9.1; extra == "full"
|
|
31
31
|
Requires-Dist: pycocotools>=2.0.2; extra == "full"
|
|
32
32
|
Requires-Dist: timm>=0.9.16; extra == "full"
|
|
33
|
-
Requires-Dist: transformers
|
|
33
|
+
Requires-Dist: transformers>=5.2.0; extra == "full"
|
|
34
34
|
Requires-Dist: accelerate>=0.29.1; extra == "full"
|
|
35
35
|
Requires-Dist: python-doctr>=1.0.0; extra == "full"
|
|
36
36
|
Provides-Extra: types
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "deepdoctection"
|
|
7
|
-
version = "1.0
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Dr. Janis Meyer"}
|
|
10
10
|
]
|
|
@@ -25,7 +25,7 @@ classifiers = [
|
|
|
25
25
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"dd-core[full]>=1.0.1",
|
|
28
|
-
"huggingface_hub>=
|
|
28
|
+
"huggingface_hub>=1.0",
|
|
29
29
|
]
|
|
30
30
|
|
|
31
31
|
[project.optional-dependencies]
|
|
@@ -43,7 +43,7 @@ full = [
|
|
|
43
43
|
"pycocotools>=2.0.2",
|
|
44
44
|
# DL dependencies
|
|
45
45
|
"timm>=0.9.16",
|
|
46
|
-
"transformers>=
|
|
46
|
+
"transformers>=5.2.0",
|
|
47
47
|
"accelerate>=0.29.1",
|
|
48
48
|
"python-doctr>=1.0.0",
|
|
49
49
|
]
|
|
@@ -6,13 +6,13 @@ Init file for deepdoctection package. This file is used to import all submodules
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import sys
|
|
9
|
-
from typing import TYPE_CHECKING, Dict
|
|
9
|
+
from typing import TYPE_CHECKING, Dict
|
|
10
10
|
|
|
11
11
|
from dd_core.utils.env_info import collect_env_info
|
|
12
12
|
from dd_core.utils.file_utils import _LazyModule
|
|
13
13
|
from dd_core.utils.logger import LoggingRecord, logger
|
|
14
14
|
|
|
15
|
-
__version__ = "1.0
|
|
15
|
+
__version__ = "1.2.0"
|
|
16
16
|
_IMPORT_STRUCTURE = {
|
|
17
17
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
|
|
18
18
|
"eval": [
|
|
@@ -423,7 +423,7 @@ The distance is calculated using the center points of the layout elements.
|
|
|
423
423
|
|
|
424
424
|
from dd_core.datapoint.view import IMAGE_DEFAULTS
|
|
425
425
|
from dd_core.utils.metacfg import AttrDict
|
|
426
|
-
from dd_core.utils.object_types import
|
|
426
|
+
from dd_core.utils.object_types import CellKey, CellLabel, LayoutLabel
|
|
427
427
|
|
|
428
428
|
cfg = AttrDict()
|
|
429
429
|
|
|
@@ -551,33 +551,33 @@ cfg.LAYOUT.PAD.LEFT = 0
|
|
|
551
551
|
# LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01]
|
|
552
552
|
# LAYOUT_NMS_PAIRS.PRIORITY = ['table', None]
|
|
553
553
|
cfg.LAYOUT_NMS_PAIRS.COMBINATIONS = [
|
|
554
|
-
[
|
|
555
|
-
[
|
|
556
|
-
[
|
|
557
|
-
[
|
|
558
|
-
[
|
|
559
|
-
[
|
|
560
|
-
[
|
|
561
|
-
[
|
|
562
|
-
[
|
|
563
|
-
[
|
|
564
|
-
[
|
|
565
|
-
[
|
|
554
|
+
[LayoutLabel.TABLE, LayoutLabel.TITLE],
|
|
555
|
+
[LayoutLabel.TABLE, LayoutLabel.TEXT],
|
|
556
|
+
[LayoutLabel.TABLE, LayoutLabel.KEY_VALUE_AREA],
|
|
557
|
+
[LayoutLabel.TABLE, LayoutLabel.LIST_ITEM],
|
|
558
|
+
[LayoutLabel.TABLE, LayoutLabel.LIST],
|
|
559
|
+
[LayoutLabel.TABLE, LayoutLabel.FIGURE],
|
|
560
|
+
[LayoutLabel.TITLE, LayoutLabel.TEXT],
|
|
561
|
+
[LayoutLabel.TEXT, LayoutLabel.KEY_VALUE_AREA],
|
|
562
|
+
[LayoutLabel.TEXT, LayoutLabel.LIST_ITEM],
|
|
563
|
+
[LayoutLabel.TEXT, LayoutLabel.CAPTION],
|
|
564
|
+
[LayoutLabel.KEY_VALUE_AREA, LayoutLabel.LIST_ITEM],
|
|
565
|
+
[LayoutLabel.FIGURE, LayoutLabel.CAPTION],
|
|
566
566
|
]
|
|
567
567
|
cfg.LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01, 0.01, 0.001, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.01, 0.001]
|
|
568
568
|
cfg.LAYOUT_NMS_PAIRS.PRIORITY = [
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
569
|
+
LayoutLabel.TABLE,
|
|
570
|
+
LayoutLabel.TABLE,
|
|
571
|
+
LayoutLabel.TABLE,
|
|
572
|
+
LayoutLabel.TABLE,
|
|
573
|
+
LayoutLabel.TABLE,
|
|
574
|
+
LayoutLabel.TABLE,
|
|
575
|
+
LayoutLabel.TEXT,
|
|
576
|
+
LayoutLabel.TEXT,
|
|
577
577
|
None,
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
578
|
+
LayoutLabel.CAPTION,
|
|
579
|
+
LayoutLabel.KEY_VALUE_AREA,
|
|
580
|
+
LayoutLabel.FIGURE,
|
|
581
581
|
]
|
|
582
582
|
|
|
583
583
|
# Relevant when LIB = PT. Use either TorchScript weights via ITEM.WEIGHTS_TS
|
|
@@ -688,43 +688,43 @@ cfg.SEGMENTATION.STRETCH_RULE = "equal"
|
|
|
688
688
|
|
|
689
689
|
# Specifies the layout category used to identify tables.
|
|
690
690
|
# Used in both Deepdoctection and Table Transformer approaches.
|
|
691
|
-
cfg.SEGMENTATION.TABLE_NAME =
|
|
691
|
+
cfg.SEGMENTATION.TABLE_NAME = LayoutLabel.TABLE
|
|
692
692
|
|
|
693
693
|
# Lists the layout or cell types used in the original Deepdoctection approach.
|
|
694
694
|
# Used by TableSegmentationService for cell assignments.
|
|
695
|
-
cfg.SEGMENTATION.CELL_NAMES = [
|
|
695
|
+
cfg.SEGMENTATION.CELL_NAMES = [CellLabel.COLUMN_HEADER, CellLabel.BODY, LayoutLabel.CELL]
|
|
696
696
|
|
|
697
697
|
# Lists all cell types used by the Table Transformer approach (PubtablesSegmentationService).
|
|
698
698
|
# LayoutType.CELL is synthetically generated and not predicted by the structure recognition model.
|
|
699
699
|
cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
|
|
700
|
-
|
|
700
|
+
LayoutLabel.CELL,
|
|
701
701
|
]
|
|
702
702
|
|
|
703
703
|
# Subset of PUBTABLES_CELL_NAMES that represent spanning/header cells.
|
|
704
704
|
# These need to be matched with row or column elements.
|
|
705
705
|
cfg.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES = [
|
|
706
|
-
|
|
706
|
+
CellLabel.SPANNING,
|
|
707
707
|
]
|
|
708
708
|
|
|
709
709
|
# Lists the layout categories used to identify row and column elements.
|
|
710
710
|
# Used by TableSegmentationService.
|
|
711
|
-
cfg.SEGMENTATION.ITEM_NAMES = [
|
|
711
|
+
cfg.SEGMENTATION.ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
|
|
712
712
|
|
|
713
713
|
# Equivalent to ITEM_NAMES but used in the Table Transformer approach.
|
|
714
|
-
cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [
|
|
714
|
+
cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
|
|
715
715
|
|
|
716
716
|
# Used in TableSegmentationService to specify sub-category annotations for row and column numbers.
|
|
717
|
-
cfg.SEGMENTATION.SUB_ITEM_NAMES = [
|
|
717
|
+
cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
|
|
718
718
|
|
|
719
719
|
# Equivalent to SUB_ITEM_NAMES, but used with the Table Transformer approach.
|
|
720
|
-
cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [
|
|
720
|
+
cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
|
|
721
721
|
|
|
722
722
|
# Used in PubtablesSegmentationService.
|
|
723
723
|
# Specifies which cells should be treated as header cells that need to be linked to row/column elements.
|
|
724
724
|
cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES = [
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
725
|
+
CellLabel.COLUMN_HEADER,
|
|
726
|
+
CellLabel.ROW_HEADER,
|
|
727
|
+
CellLabel.PROJECTED_ROW_HEADER,
|
|
728
728
|
]
|
|
729
729
|
|
|
730
730
|
# Defines the threshold values for matching column/row header cells to their respective rows/columns
|
|
@@ -840,11 +840,11 @@ cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
|
|
|
840
840
|
|
|
841
841
|
# Specifies the parent layout categories in the link relationship.
|
|
842
842
|
# These are the elements to which related components (e.g., captions) should be linked.
|
|
843
|
-
cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [
|
|
843
|
+
cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [LayoutLabel.FIGURE, LayoutLabel.TABLE]
|
|
844
844
|
|
|
845
845
|
# Specifies the child layout categories in the link relationship.
|
|
846
846
|
# These are typically smaller or subordinate elements (e.g., captions).
|
|
847
|
-
cfg.LAYOUT_LINK.CHILD_CATEGORIES = [
|
|
847
|
+
cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutLabel.CAPTION]
|
|
848
848
|
|
|
849
849
|
# Weights configuration for language detection model.
|
|
850
850
|
cfg.LM_LANGUAGE_DETECT_CLASS.WEIGHTS = None
|
|
@@ -28,7 +28,7 @@ from lazy_imports import try_import
|
|
|
28
28
|
from dd_core.utils.env_info import SETTINGS
|
|
29
29
|
from dd_core.utils.error import DependencyError
|
|
30
30
|
from dd_core.utils.metacfg import AttrDict
|
|
31
|
-
from dd_core.utils.object_types import
|
|
31
|
+
from dd_core.utils.object_types import CellLabel, LayoutLabel, ObjectTypes, RelationshipKey
|
|
32
32
|
from dd_core.utils.transform import PadTransform
|
|
33
33
|
|
|
34
34
|
from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner
|
|
@@ -427,7 +427,7 @@ class ServiceFactory:
|
|
|
427
427
|
if mode == "ITEM":
|
|
428
428
|
if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
|
|
429
429
|
exclude_category_names.extend(
|
|
430
|
-
[
|
|
430
|
+
[LayoutLabel.TABLE, CellLabel.COLUMN_HEADER, CellLabel.PROJECTED_ROW_HEADER, CellLabel.SPANNING]
|
|
431
431
|
)
|
|
432
432
|
return {"exclude_category_names": exclude_category_names}
|
|
433
433
|
|
|
@@ -452,7 +452,7 @@ class ServiceFactory:
|
|
|
452
452
|
)
|
|
453
453
|
return SubImageLayoutService(
|
|
454
454
|
sub_image_detector=detector,
|
|
455
|
-
sub_image_names=[
|
|
455
|
+
sub_image_names=[LayoutLabel.TABLE, LayoutLabel.TABLE_ROTATED],
|
|
456
456
|
detect_result_generator=detect_result_generator,
|
|
457
457
|
padder=padder,
|
|
458
458
|
)
|
|
@@ -1031,14 +1031,14 @@ class ServiceFactory:
|
|
|
1031
1031
|
FamilyCompound(
|
|
1032
1032
|
parent_categories=parental_categories,
|
|
1033
1033
|
child_categories=text_container,
|
|
1034
|
-
relationship_key=
|
|
1034
|
+
relationship_key=RelationshipKey.CHILD,
|
|
1035
1035
|
),
|
|
1036
1036
|
FamilyCompound(
|
|
1037
|
-
parent_categories=[
|
|
1038
|
-
child_categories=[
|
|
1039
|
-
relationship_key=
|
|
1037
|
+
parent_categories=[LayoutLabel.LIST],
|
|
1038
|
+
child_categories=[LayoutLabel.LIST_ITEM],
|
|
1039
|
+
relationship_key=RelationshipKey.CHILD,
|
|
1040
1040
|
create_synthetic_parent=True,
|
|
1041
|
-
synthetic_parent=
|
|
1041
|
+
synthetic_parent=LayoutLabel.LIST,
|
|
1042
1042
|
),
|
|
1043
1043
|
]
|
|
1044
1044
|
return MatchingService(
|
|
@@ -1093,7 +1093,7 @@ class ServiceFactory:
|
|
|
1093
1093
|
FamilyCompound(
|
|
1094
1094
|
parent_categories=parental_categories,
|
|
1095
1095
|
child_categories=child_categories,
|
|
1096
|
-
relationship_key=
|
|
1096
|
+
relationship_key=RelationshipKey.LAYOUT_LINK,
|
|
1097
1097
|
)
|
|
1098
1098
|
]
|
|
1099
1099
|
return MatchingService(
|
|
@@ -1153,9 +1153,9 @@ class ServiceFactory:
|
|
|
1153
1153
|
)
|
|
1154
1154
|
family_compounds = [
|
|
1155
1155
|
FamilyCompound(
|
|
1156
|
-
parent_categories=[
|
|
1157
|
-
child_categories=[
|
|
1158
|
-
relationship_key=
|
|
1156
|
+
parent_categories=[LayoutLabel.LIST],
|
|
1157
|
+
child_categories=[LayoutLabel.LINE],
|
|
1158
|
+
relationship_key=RelationshipKey.CHILD,
|
|
1159
1159
|
),
|
|
1160
1160
|
]
|
|
1161
1161
|
return MatchingService(
|
|
@@ -1612,7 +1612,7 @@ class ServiceFactory:
|
|
|
1612
1612
|
token_classifier: Union[LayoutTokenModels, LmTokenModels],
|
|
1613
1613
|
tokenizer_fast: Any,
|
|
1614
1614
|
use_other_as_default_category: bool,
|
|
1615
|
-
segment_positions: Union[
|
|
1615
|
+
segment_positions: Union[LayoutLabel, Sequence[LayoutLabel], None],
|
|
1616
1616
|
sliding_window_stride: int,
|
|
1617
1617
|
) -> LMTokenClassifierService:
|
|
1618
1618
|
"""
|
|
@@ -34,7 +34,7 @@ from dd_core.mapper import filter_cat, remove_cats
|
|
|
34
34
|
from dd_core.mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
|
|
35
35
|
from dd_core.mapper.wandbstruct import to_wandb_image
|
|
36
36
|
from dd_core.utils.logger import LoggingRecord, logger
|
|
37
|
-
from dd_core.utils.object_types import
|
|
37
|
+
from dd_core.utils.object_types import DatasetKind, LayoutLabel, TypeOrStr, get_type
|
|
38
38
|
from dd_core.utils.types import PixelValues
|
|
39
39
|
from dd_core.utils.viz import interactive_imshow
|
|
40
40
|
|
|
@@ -146,14 +146,14 @@ class Evaluator:
|
|
|
146
146
|
|
|
147
147
|
self.wandb_table_agent: Optional[WandbTableAgent]
|
|
148
148
|
if run is not None:
|
|
149
|
-
if self.dataset.dataset_info.type ==
|
|
149
|
+
if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
|
|
150
150
|
self.wandb_table_agent = WandbTableAgent(
|
|
151
151
|
run,
|
|
152
152
|
self.dataset.dataset_info.name,
|
|
153
153
|
50,
|
|
154
154
|
self.dataset.dataflow.categories.get_categories(filtered=True),
|
|
155
155
|
)
|
|
156
|
-
elif self.dataset.dataset_info.type ==
|
|
156
|
+
elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
|
|
157
157
|
if hasattr(self.metric, "sub_cats"):
|
|
158
158
|
sub_cat_key, sub_cat_val_list = list(self.metric.sub_cats.items())[0]
|
|
159
159
|
sub_cat_val = sub_cat_val_list[0]
|
|
@@ -253,7 +253,7 @@ class Evaluator:
|
|
|
253
253
|
possible_cats_in_datapoint = self.dataset.dataflow.categories.get_categories(as_dict=False, filtered=True)
|
|
254
254
|
|
|
255
255
|
# clean-up procedure depends on the dataset type
|
|
256
|
-
if self.dataset.dataset_info.type ==
|
|
256
|
+
if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
|
|
257
257
|
# we keep all image annotations that will not be generated through processing
|
|
258
258
|
anns_to_keep = {ann for ann in possible_cats_in_datapoint if ann not in meta_anns.image_annotations}
|
|
259
259
|
sub_cats_to_remove = meta_anns.sub_categories
|
|
@@ -269,11 +269,11 @@ class Evaluator:
|
|
|
269
269
|
remove_cats(sub_categories=sub_cats_to_remove, relationships=relationships_to_remove),
|
|
270
270
|
)
|
|
271
271
|
|
|
272
|
-
elif self.dataset.dataset_info.type ==
|
|
272
|
+
elif self.dataset.dataset_info.type == DatasetKind.SEQUENCE_CLASSIFICATION:
|
|
273
273
|
summary_sub_cats_to_remove = meta_anns.summaries
|
|
274
274
|
df_pr = MapData(df_pr, remove_cats(summary_sub_categories=summary_sub_cats_to_remove))
|
|
275
275
|
|
|
276
|
-
elif self.dataset.dataset_info.type ==
|
|
276
|
+
elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
|
|
277
277
|
sub_cats_to_remove = meta_anns.sub_categories
|
|
278
278
|
df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
|
|
279
279
|
else:
|
|
@@ -313,7 +313,7 @@ class Evaluator:
|
|
|
313
313
|
df_pr = self._clean_up_predict_dataflow_annotations(df_pr)
|
|
314
314
|
|
|
315
315
|
page_parsing_component = PageParsingService(
|
|
316
|
-
text_container=
|
|
316
|
+
text_container=LayoutLabel.WORD,
|
|
317
317
|
floating_text_block_categories=floating_text_block_categories, # type: ignore
|
|
318
318
|
include_residual_text_container=bool(include_residual_text_containers),
|
|
319
319
|
)
|
|
@@ -31,7 +31,7 @@ from dd_core.datapoint.image import Image
|
|
|
31
31
|
from dd_core.datapoint.view import Page
|
|
32
32
|
from dd_core.utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
|
|
33
33
|
from dd_core.utils.logger import LoggingRecord, logger
|
|
34
|
-
from dd_core.utils.object_types import
|
|
34
|
+
from dd_core.utils.object_types import LayoutLabel
|
|
35
35
|
from dd_core.utils.types import MetricResults
|
|
36
36
|
|
|
37
37
|
from .base import MetricBase
|
|
@@ -238,9 +238,9 @@ class TedsMetric(MetricBase):
|
|
|
238
238
|
"""
|
|
239
239
|
|
|
240
240
|
metric = teds_metric
|
|
241
|
-
mapper: Callable[[Image,
|
|
242
|
-
text_container:
|
|
243
|
-
floating_text_block_categories = [
|
|
241
|
+
mapper: Callable[[Image, LayoutLabel, list[LayoutLabel]], Page] = Page.from_image
|
|
242
|
+
text_container: LayoutLabel = LayoutLabel.WORD
|
|
243
|
+
floating_text_block_categories = [LayoutLabel.TABLE]
|
|
244
244
|
|
|
245
245
|
structure_only = False
|
|
246
246
|
|
|
@@ -24,7 +24,7 @@ from __future__ import annotations
|
|
|
24
24
|
from lazy_imports import try_import
|
|
25
25
|
|
|
26
26
|
from dd_core.utils.file_utils import get_jdeskew_requirement
|
|
27
|
-
from dd_core.utils.object_types import ObjectTypes,
|
|
27
|
+
from dd_core.utils.object_types import ObjectTypes, PageKey
|
|
28
28
|
from dd_core.utils.types import PixelValues, Requirement
|
|
29
29
|
from dd_core.utils.viz import viz_handler
|
|
30
30
|
|
|
@@ -90,4 +90,4 @@ class Jdeskewer(ImageTransformer):
|
|
|
90
90
|
return self.__class__(self.min_angle_rotation)
|
|
91
91
|
|
|
92
92
|
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
93
|
-
return (
|
|
93
|
+
return (PageKey.ANGLE,)
|
|
@@ -35,7 +35,7 @@ from dd_core.utils.file_utils import (
|
|
|
35
35
|
get_pytorch_requirement,
|
|
36
36
|
)
|
|
37
37
|
from dd_core.utils.fs import load_json
|
|
38
|
-
from dd_core.utils.object_types import
|
|
38
|
+
from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey, TypeOrStr
|
|
39
39
|
from dd_core.utils.transform import RotationTransform
|
|
40
40
|
from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
|
|
41
41
|
from dd_core.utils.viz import viz_handler
|
|
@@ -84,7 +84,7 @@ def doctr_predict_text_lines(np_img: PixelValues, predictor: DetectionPredictor)
|
|
|
84
84
|
|
|
85
85
|
detection_results = [
|
|
86
86
|
DetectionResult(
|
|
87
|
-
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=
|
|
87
|
+
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutLabel.WORD
|
|
88
88
|
)
|
|
89
89
|
for box in raw_output[0]["words"]
|
|
90
90
|
]
|
|
@@ -507,4 +507,4 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
507
507
|
return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
|
|
508
508
|
|
|
509
509
|
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
510
|
-
return (
|
|
510
|
+
return (PageKey.ANGLE,)
|
|
@@ -22,6 +22,7 @@ HF Detr and DeformableDetr models.
|
|
|
22
22
|
from __future__ import annotations
|
|
23
23
|
|
|
24
24
|
import os
|
|
25
|
+
import warnings
|
|
25
26
|
from abc import ABC
|
|
26
27
|
from pathlib import Path
|
|
27
28
|
from typing import TYPE_CHECKING, Literal, Mapping, Optional, Sequence, Union
|
|
@@ -41,6 +42,7 @@ with try_import() as pt_import_guard:
|
|
|
41
42
|
|
|
42
43
|
with try_import() as tr_import_guard:
|
|
43
44
|
from transformers import (
|
|
45
|
+
AutoConfig,
|
|
44
46
|
DeformableDetrForObjectDetection,
|
|
45
47
|
DeformableDetrImageProcessorFast,
|
|
46
48
|
DetrImageProcessorFast,
|
|
@@ -265,14 +267,16 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
|
|
|
265
267
|
Raises:
|
|
266
268
|
ValueError: If model architecture is not eligible.
|
|
267
269
|
"""
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
270
|
+
with warnings.catch_warnings():
|
|
271
|
+
warnings.filterwarnings("ignore", message=".*copying from a non-meta parameter.*")
|
|
272
|
+
if "TableTransformerForObjectDetection" in config.architectures:
|
|
273
|
+
return TableTransformerForObjectDetection.from_pretrained(
|
|
274
|
+
pretrained_model_name_or_path=os.fspath(path_weights), config=config
|
|
275
|
+
)
|
|
276
|
+
if "DeformableDetrForObjectDetection" in config.architectures:
|
|
277
|
+
return DeformableDetrForObjectDetection.from_pretrained(
|
|
278
|
+
pretrained_model_name_or_path=os.fspath(path_weights), config=config
|
|
279
|
+
)
|
|
276
280
|
raise ValueError(
|
|
277
281
|
f"Model architecture {config.architectures} not eligible. Please use either "
|
|
278
282
|
"TableTransformerForObjectDetection or DeformableDetrForObjectDetection."
|
|
@@ -317,8 +321,14 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
|
|
|
317
321
|
Returns:
|
|
318
322
|
`PretrainedConfig` instance.
|
|
319
323
|
"""
|
|
320
|
-
|
|
321
|
-
config
|
|
324
|
+
|
|
325
|
+
config = AutoConfig.from_pretrained(pretrained_model_name_or_path=os.fspath(path_config))
|
|
326
|
+
|
|
327
|
+
# keep older behavior when supported by the concrete config
|
|
328
|
+
if hasattr(config, "use_timm_backbone"):
|
|
329
|
+
config.use_timm_backbone = True
|
|
330
|
+
|
|
331
|
+
# deepdoctection-specific runtime attributes
|
|
322
332
|
config.threshold = 0.1
|
|
323
333
|
config.nms_threshold = 0.05
|
|
324
334
|
return config
|
|
@@ -123,7 +123,7 @@ def predict_sequence_classes_from_lm(
|
|
|
123
123
|
|
|
124
124
|
outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
|
|
125
125
|
|
|
126
|
-
score = torch.max(F.softmax(outputs.logits)).tolist()
|
|
126
|
+
score = torch.max(F.softmax(outputs.logits, dim=1)).tolist()
|
|
127
127
|
sequence_class_predictions = outputs.logits.argmax(-1).squeeze().tolist()
|
|
128
128
|
|
|
129
129
|
return SequenceClassResult(class_id=sequence_class_predictions, score=float(score))
|
|
@@ -25,7 +25,7 @@ from lazy_imports import try_import
|
|
|
25
25
|
|
|
26
26
|
from dd_core.utils.context import save_tmp_file
|
|
27
27
|
from dd_core.utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
|
|
28
|
-
from dd_core.utils.object_types import
|
|
28
|
+
from dd_core.utils.object_types import LayoutLabel, ObjectTypes
|
|
29
29
|
from dd_core.utils.types import Requirement
|
|
30
30
|
|
|
31
31
|
from .base import DetectionResult, ModelCategories, PdfMiner
|
|
@@ -82,7 +82,7 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
82
82
|
def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
|
|
83
83
|
self.name = "Pdfplumber"
|
|
84
84
|
self.model_id = self.get_model_id()
|
|
85
|
-
self.categories = ModelCategories(init_categories={1:
|
|
85
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
|
|
86
86
|
self.x_tolerance = x_tolerance
|
|
87
87
|
self.y_tolerance = y_tolerance
|
|
88
88
|
self._page: Optional[Page] = None
|
|
@@ -169,7 +169,7 @@ class Pdfmium2TextDetector(PdfMiner):
|
|
|
169
169
|
def __init__(self) -> None:
|
|
170
170
|
self.name = "Pdfmium"
|
|
171
171
|
self.model_id = self.get_model_id()
|
|
172
|
-
self.categories = ModelCategories(init_categories={1:
|
|
172
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.LINE})
|
|
173
173
|
self._page: Optional[Page] = None
|
|
174
174
|
|
|
175
175
|
def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
|
|
@@ -37,7 +37,7 @@ from dd_core.utils.context import save_tmp_file, timeout_manager
|
|
|
37
37
|
from dd_core.utils.error import DependencyError, TesseractError
|
|
38
38
|
from dd_core.utils.file_utils import _TESS_PATH, get_tesseract_requirement
|
|
39
39
|
from dd_core.utils.metacfg import config_to_cli_str, set_config_by_yaml
|
|
40
|
-
from dd_core.utils.object_types import
|
|
40
|
+
from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey
|
|
41
41
|
from dd_core.utils.transform import RotationTransform
|
|
42
42
|
from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
|
|
43
43
|
from dd_core.utils.viz import viz_handler
|
|
@@ -248,7 +248,7 @@ def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) ->
|
|
|
248
248
|
DetectionResult(
|
|
249
249
|
box=[ulx, uly, lrx, lry],
|
|
250
250
|
class_id=2,
|
|
251
|
-
class_name=
|
|
251
|
+
class_name=LayoutLabel.LINE,
|
|
252
252
|
text=" ".join(
|
|
253
253
|
[detect_result.text for detect_result in block_group if isinstance(detect_result.text, str)]
|
|
254
254
|
),
|
|
@@ -295,7 +295,7 @@ def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool
|
|
|
295
295
|
score=score / 100,
|
|
296
296
|
text=caption[5],
|
|
297
297
|
class_id=1,
|
|
298
|
-
class_name=
|
|
298
|
+
class_name=LayoutLabel.WORD,
|
|
299
299
|
)
|
|
300
300
|
all_results.append(word)
|
|
301
301
|
if text_lines:
|
|
@@ -381,9 +381,9 @@ class TesseractOcrDetector(ObjectDetector):
|
|
|
381
381
|
self.config = hyper_param_config
|
|
382
382
|
|
|
383
383
|
if self.config.LINES:
|
|
384
|
-
self.categories = ModelCategories(init_categories={1:
|
|
384
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
|
|
385
385
|
else:
|
|
386
|
-
self.categories = ModelCategories(init_categories={1:
|
|
386
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
|
|
387
387
|
|
|
388
388
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
389
389
|
"""
|
|
@@ -455,7 +455,7 @@ class TesseractRotationTransformer(ImageTransformer):
|
|
|
455
455
|
|
|
456
456
|
def __init__(self) -> None:
|
|
457
457
|
self.name = fspath(_TESS_PATH) + "-rotation"
|
|
458
|
-
self.categories = ModelCategories(init_categories={1:
|
|
458
|
+
self.categories = ModelCategories(init_categories={1: PageKey.ANGLE})
|
|
459
459
|
self.model_id = self.get_model_id()
|
|
460
460
|
self.rotator = RotationTransform(360)
|
|
461
461
|
|
|
@@ -28,7 +28,7 @@ from lazy_imports import try_import
|
|
|
28
28
|
from dd_core.datapoint.convert import convert_np_array_to_b64_b
|
|
29
29
|
from dd_core.utils.file_utils import get_boto3_requirement
|
|
30
30
|
from dd_core.utils.logger import LoggingRecord, logger
|
|
31
|
-
from dd_core.utils.object_types import
|
|
31
|
+
from dd_core.utils.object_types import LayoutLabel, ObjectTypes
|
|
32
32
|
from dd_core.utils.types import JsonDict, PixelValues, Requirement
|
|
33
33
|
|
|
34
34
|
from .base import DetectionResult, ModelCategories, ObjectDetector
|
|
@@ -54,7 +54,7 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
|
|
|
54
54
|
score=block["Confidence"] / 100,
|
|
55
55
|
text=block["Text"],
|
|
56
56
|
class_id=1 if block["BlockType"] == "WORD" else 2,
|
|
57
|
-
class_name=
|
|
57
|
+
class_name=LayoutLabel.WORD if block["BlockType"] == "WORD" else LayoutLabel.LINE,
|
|
58
58
|
)
|
|
59
59
|
all_results.append(word)
|
|
60
60
|
|
|
@@ -142,9 +142,9 @@ class TextractOcrDetector(ObjectDetector):
|
|
|
142
142
|
credentials_kwargs = self._maybe_resolve_secret(**credentials_kwargs)
|
|
143
143
|
self.client = boto3.client("textract", **credentials_kwargs)
|
|
144
144
|
if self.text_lines:
|
|
145
|
-
self.categories = ModelCategories(init_categories={1:
|
|
145
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
|
|
146
146
|
else:
|
|
147
|
-
self.categories = ModelCategories(init_categories={1:
|
|
147
|
+
self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
|
|
148
148
|
|
|
149
149
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
150
150
|
"""
|
|
@@ -28,7 +28,7 @@ from dd_core.datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation
|
|
|
28
28
|
from dd_core.datapoint.box import BoundingBox, local_to_global_coords, rescale_coords
|
|
29
29
|
from dd_core.datapoint.image import Image
|
|
30
30
|
from dd_core.mapper.maputils import MappingContextManager
|
|
31
|
-
from dd_core.utils.object_types import ObjectTypes,
|
|
31
|
+
from dd_core.utils.object_types import ObjectTypes, RelationshipKey
|
|
32
32
|
|
|
33
33
|
from ..extern.base import DetectionResult
|
|
34
34
|
|
|
@@ -121,6 +121,10 @@ class DatapointManager:
|
|
|
121
121
|
self._cache_anns = {ann.annotation_id: ann for ann in dp.get_annotation()}
|
|
122
122
|
self.datapoint_is_passed = True
|
|
123
123
|
|
|
124
|
+
def set_model_id(self, model_id: str | None) -> None:
|
|
125
|
+
"""Re-sets the model_id."""
|
|
126
|
+
self.model_id = model_id
|
|
127
|
+
|
|
124
128
|
def assert_datapoint_passed(self) -> None:
|
|
125
129
|
"""
|
|
126
130
|
Asserts that a datapoint is passed.
|
|
@@ -227,7 +231,7 @@ class DatapointManager:
|
|
|
227
231
|
self.datapoint.image_id,
|
|
228
232
|
ann_global_box.transform(image_width=self.datapoint.width, image_height=self.datapoint.height),
|
|
229
233
|
)
|
|
230
|
-
parent_ann.dump_relationship(
|
|
234
|
+
parent_ann.dump_relationship(RelationshipKey.CHILD, ann.annotation_id)
|
|
231
235
|
|
|
232
236
|
self.datapoint.dump(ann)
|
|
233
237
|
self._cache_anns[ann.annotation_id] = ann
|
|
@@ -63,7 +63,7 @@ class PipelineComponent(ABC):
|
|
|
63
63
|
Currently, predictors can only process single images. Processing higher number of batches is not planned.
|
|
64
64
|
"""
|
|
65
65
|
|
|
66
|
-
def __init__(self, name: str, model_id: Optional[str] = None) -> None:
|
|
66
|
+
def __init__(self, name: str, model_id: Optional[str] = None, service_id: Optional[str] = None) -> None:
|
|
67
67
|
"""
|
|
68
68
|
Initializes a `PipelineComponent`.
|
|
69
69
|
|
|
@@ -71,9 +71,10 @@ class PipelineComponent(ABC):
|
|
|
71
71
|
name: The name of the pipeline component. The name will be used to identify a pipeline component in a
|
|
72
72
|
pipeline. Use something that describes the task of the pipeline.
|
|
73
73
|
model_id: Optional model identifier.
|
|
74
|
+
service_id: Optional service identifier override to avoid name collisions.
|
|
74
75
|
"""
|
|
75
76
|
self.name = name
|
|
76
|
-
self.service_id = self.get_service_id()
|
|
77
|
+
self.service_id = service_id or self.get_service_id()
|
|
77
78
|
self.dp_manager = DatapointManager(self.service_id, model_id)
|
|
78
79
|
self.timer_on = False
|
|
79
80
|
self.filter_func: Callable[[DP], bool] = lambda dp: False
|