deepdoctection 1.0.7__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/PKG-INFO +3 -3
  2. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/pyproject.toml +3 -3
  3. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/__init__.py +2 -2
  4. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/config.py +37 -37
  5. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/factory.py +13 -13
  6. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/eval.py +7 -7
  7. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/tedsmetric.py +4 -4
  8. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/deskew.py +2 -2
  9. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/doctrocr.py +3 -3
  10. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hfdetr.py +20 -10
  11. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hflm.py +1 -1
  12. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/pdftext.py +3 -3
  13. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/tessocr.py +6 -6
  14. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/texocr.py +4 -4
  15. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/anngen.py +6 -2
  16. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/base.py +3 -2
  17. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/common.py +5 -5
  18. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/doctectionpipe.py +31 -16
  19. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/language.py +3 -3
  20. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/lm.py +24 -24
  21. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/order.py +24 -24
  22. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/refine.py +46 -46
  23. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/segment.py +143 -130
  24. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/sub_layout.py +2 -2
  25. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/text.py +6 -7
  26. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/hf_detr_train.py +2 -2
  27. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/hf_layoutlm_train.py +21 -21
  28. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/PKG-INFO +3 -3
  29. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/requires.txt +2 -2
  30. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/README.md +0 -0
  31. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/setup.cfg +0 -0
  32. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/__init__.py +0 -0
  33. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/dd.py +0 -0
  34. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/__init__.py +0 -0
  35. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/conf_dd_one.yaml +0 -0
  36. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/conf_tesseract.yaml +0 -0
  37. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/configs/profiles.jsonl +0 -0
  38. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/__init__.py +0 -0
  39. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/accmetric.py +0 -0
  40. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/base.py +0 -0
  41. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/cocometric.py +0 -0
  42. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/registry.py +0 -0
  43. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/__init__.py +0 -0
  44. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/base.py +0 -0
  45. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/d2detect.py +0 -0
  46. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hflayoutlm.py +0 -0
  47. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/model.py +0 -0
  48. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/__init__.py +0 -0
  49. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/concurrency.py +0 -0
  50. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/layout.py +0 -0
  51. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/registry.py +0 -0
  52. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/transform.py +0 -0
  53. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/py.typed +0 -0
  54. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/__init__.py +0 -0
  55. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/train/d2_frcnn_train.py +0 -0
  56. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/SOURCES.txt +0 -0
  57. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/dependency_links.txt +0 -0
  58. {deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 1.0.7
3
+ Version: 1.2.0
4
4
  Summary: Repository for Document AI - server/inference core package
5
5
  Author: Dr. Janis Meyer
6
6
  License: Apache License 2.0
@@ -18,7 +18,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown
20
20
  Requires-Dist: dd-core[full]>=1.0.1
21
- Requires-Dist: huggingface_hub>=0.26.0
21
+ Requires-Dist: huggingface_hub>=1.0
22
22
  Provides-Extra: full
23
23
  Requires-Dist: dd-datasets[full]>=1.0.1; extra == "full"
24
24
  Requires-Dist: boto3==1.34.102; extra == "full"
@@ -30,7 +30,7 @@ Requires-Dist: distance==0.1.3; extra == "full"
30
30
  Requires-Dist: lxml>=4.9.1; extra == "full"
31
31
  Requires-Dist: pycocotools>=2.0.2; extra == "full"
32
32
  Requires-Dist: timm>=0.9.16; extra == "full"
33
- Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "full"
33
+ Requires-Dist: transformers>=5.2.0; extra == "full"
34
34
  Requires-Dist: accelerate>=0.29.1; extra == "full"
35
35
  Requires-Dist: python-doctr>=1.0.0; extra == "full"
36
36
  Provides-Extra: types
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deepdoctection"
7
- version = "1.0.7"
7
+ version = "1.2.0"
8
8
  authors = [
9
9
  {name = "Dr. Janis Meyer"}
10
10
  ]
@@ -25,7 +25,7 @@ classifiers = [
25
25
 
26
26
  dependencies = [
27
27
  "dd-core[full]>=1.0.1",
28
- "huggingface_hub>=0.26.0",
28
+ "huggingface_hub>=1.0",
29
29
  ]
30
30
 
31
31
  [project.optional-dependencies]
@@ -43,7 +43,7 @@ full = [
43
43
  "pycocotools>=2.0.2",
44
44
  # DL dependencies
45
45
  "timm>=0.9.16",
46
- "transformers>=4.48.0,<5.0.0",
46
+ "transformers>=5.2.0",
47
47
  "accelerate>=0.29.1",
48
48
  "python-doctr>=1.0.0",
49
49
  ]
@@ -6,13 +6,13 @@ Init file for deepdoctection package. This file is used to import all submodules
6
6
  """
7
7
 
8
8
  import sys
9
- from typing import TYPE_CHECKING, Dict, List
9
+ from typing import TYPE_CHECKING, Dict
10
10
 
11
11
  from dd_core.utils.env_info import collect_env_info
12
12
  from dd_core.utils.file_utils import _LazyModule
13
13
  from dd_core.utils.logger import LoggingRecord, logger
14
14
 
15
- __version__ = "1.0.7"
15
+ __version__ = "1.2.0"
16
16
  _IMPORT_STRUCTURE = {
17
17
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
18
18
  "eval": [
@@ -423,7 +423,7 @@ The distance is calculated using the center points of the layout elements.
423
423
 
424
424
  from dd_core.datapoint.view import IMAGE_DEFAULTS
425
425
  from dd_core.utils.metacfg import AttrDict
426
- from dd_core.utils.object_types import CellType, LayoutType
426
+ from dd_core.utils.object_types import CellKey, CellLabel, LayoutLabel
427
427
 
428
428
  cfg = AttrDict()
429
429
 
@@ -551,33 +551,33 @@ cfg.LAYOUT.PAD.LEFT = 0
551
551
  # LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01]
552
552
  # LAYOUT_NMS_PAIRS.PRIORITY = ['table', None]
553
553
  cfg.LAYOUT_NMS_PAIRS.COMBINATIONS = [
554
- [LayoutType.TABLE, LayoutType.TITLE],
555
- [LayoutType.TABLE, LayoutType.TEXT],
556
- [LayoutType.TABLE, LayoutType.KEY_VALUE_AREA],
557
- [LayoutType.TABLE, LayoutType.LIST_ITEM],
558
- [LayoutType.TABLE, LayoutType.LIST],
559
- [LayoutType.TABLE, LayoutType.FIGURE],
560
- [LayoutType.TITLE, LayoutType.TEXT],
561
- [LayoutType.TEXT, LayoutType.KEY_VALUE_AREA],
562
- [LayoutType.TEXT, LayoutType.LIST_ITEM],
563
- [LayoutType.TEXT, LayoutType.CAPTION],
564
- [LayoutType.KEY_VALUE_AREA, LayoutType.LIST_ITEM],
565
- [LayoutType.FIGURE, LayoutType.CAPTION],
554
+ [LayoutLabel.TABLE, LayoutLabel.TITLE],
555
+ [LayoutLabel.TABLE, LayoutLabel.TEXT],
556
+ [LayoutLabel.TABLE, LayoutLabel.KEY_VALUE_AREA],
557
+ [LayoutLabel.TABLE, LayoutLabel.LIST_ITEM],
558
+ [LayoutLabel.TABLE, LayoutLabel.LIST],
559
+ [LayoutLabel.TABLE, LayoutLabel.FIGURE],
560
+ [LayoutLabel.TITLE, LayoutLabel.TEXT],
561
+ [LayoutLabel.TEXT, LayoutLabel.KEY_VALUE_AREA],
562
+ [LayoutLabel.TEXT, LayoutLabel.LIST_ITEM],
563
+ [LayoutLabel.TEXT, LayoutLabel.CAPTION],
564
+ [LayoutLabel.KEY_VALUE_AREA, LayoutLabel.LIST_ITEM],
565
+ [LayoutLabel.FIGURE, LayoutLabel.CAPTION],
566
566
  ]
567
567
  cfg.LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01, 0.01, 0.001, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.01, 0.001]
568
568
  cfg.LAYOUT_NMS_PAIRS.PRIORITY = [
569
- LayoutType.TABLE,
570
- LayoutType.TABLE,
571
- LayoutType.TABLE,
572
- LayoutType.TABLE,
573
- LayoutType.TABLE,
574
- LayoutType.TABLE,
575
- LayoutType.TEXT,
576
- LayoutType.TEXT,
569
+ LayoutLabel.TABLE,
570
+ LayoutLabel.TABLE,
571
+ LayoutLabel.TABLE,
572
+ LayoutLabel.TABLE,
573
+ LayoutLabel.TABLE,
574
+ LayoutLabel.TABLE,
575
+ LayoutLabel.TEXT,
576
+ LayoutLabel.TEXT,
577
577
  None,
578
- LayoutType.CAPTION,
579
- LayoutType.KEY_VALUE_AREA,
580
- LayoutType.FIGURE,
578
+ LayoutLabel.CAPTION,
579
+ LayoutLabel.KEY_VALUE_AREA,
580
+ LayoutLabel.FIGURE,
581
581
  ]
582
582
 
583
583
  # Relevant when LIB = PT. Use either TorchScript weights via ITEM.WEIGHTS_TS
@@ -688,43 +688,43 @@ cfg.SEGMENTATION.STRETCH_RULE = "equal"
688
688
 
689
689
  # Specifies the layout category used to identify tables.
690
690
  # Used in both Deepdoctection and Table Transformer approaches.
691
- cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
691
+ cfg.SEGMENTATION.TABLE_NAME = LayoutLabel.TABLE
692
692
 
693
693
  # Lists the layout or cell types used in the original Deepdoctection approach.
694
694
  # Used by TableSegmentationService for cell assignments.
695
- cfg.SEGMENTATION.CELL_NAMES = [CellType.COLUMN_HEADER, CellType.BODY, LayoutType.CELL]
695
+ cfg.SEGMENTATION.CELL_NAMES = [CellLabel.COLUMN_HEADER, CellLabel.BODY, LayoutLabel.CELL]
696
696
 
697
697
  # Lists all cell types used by the Table Transformer approach (PubtablesSegmentationService).
698
698
  # LayoutType.CELL is synthetically generated and not predicted by the structure recognition model.
699
699
  cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
700
- LayoutType.CELL,
700
+ LayoutLabel.CELL,
701
701
  ]
702
702
 
703
703
  # Subset of PUBTABLES_CELL_NAMES that represent spanning/header cells.
704
704
  # These need to be matched with row or column elements.
705
705
  cfg.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES = [
706
- CellType.SPANNING,
706
+ CellLabel.SPANNING,
707
707
  ]
708
708
 
709
709
  # Lists the layout categories used to identify row and column elements.
710
710
  # Used by TableSegmentationService.
711
- cfg.SEGMENTATION.ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
711
+ cfg.SEGMENTATION.ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
712
712
 
713
713
  # Equivalent to ITEM_NAMES but used in the Table Transformer approach.
714
- cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
714
+ cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
715
715
 
716
716
  # Used in TableSegmentationService to specify sub-category annotations for row and column numbers.
717
- cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
717
+ cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
718
718
 
719
719
  # Equivalent to SUB_ITEM_NAMES, but used with the Table Transformer approach.
720
- cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
720
+ cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
721
721
 
722
722
  # Used in PubtablesSegmentationService.
723
723
  # Specifies which cells should be treated as header cells that need to be linked to row/column elements.
724
724
  cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES = [
725
- CellType.COLUMN_HEADER,
726
- CellType.ROW_HEADER,
727
- CellType.PROJECTED_ROW_HEADER,
725
+ CellLabel.COLUMN_HEADER,
726
+ CellLabel.ROW_HEADER,
727
+ CellLabel.PROJECTED_ROW_HEADER,
728
728
  ]
729
729
 
730
730
  # Defines the threshold values for matching column/row header cells to their respective rows/columns
@@ -840,11 +840,11 @@ cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
840
840
 
841
841
  # Specifies the parent layout categories in the link relationship.
842
842
  # These are the elements to which related components (e.g., captions) should be linked.
843
- cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [LayoutType.FIGURE, LayoutType.TABLE]
843
+ cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [LayoutLabel.FIGURE, LayoutLabel.TABLE]
844
844
 
845
845
  # Specifies the child layout categories in the link relationship.
846
846
  # These are typically smaller or subordinate elements (e.g., captions).
847
- cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
847
+ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutLabel.CAPTION]
848
848
 
849
849
  # Weights configuration for language detection model.
850
850
  cfg.LM_LANGUAGE_DETECT_CLASS.WEIGHTS = None
@@ -28,7 +28,7 @@ from lazy_imports import try_import
28
28
  from dd_core.utils.env_info import SETTINGS
29
29
  from dd_core.utils.error import DependencyError
30
30
  from dd_core.utils.metacfg import AttrDict
31
- from dd_core.utils.object_types import CellType, LayoutType, ObjectTypes, Relationships
31
+ from dd_core.utils.object_types import CellLabel, LayoutLabel, ObjectTypes, RelationshipKey
32
32
  from dd_core.utils.transform import PadTransform
33
33
 
34
34
  from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner
@@ -427,7 +427,7 @@ class ServiceFactory:
427
427
  if mode == "ITEM":
428
428
  if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
429
429
  exclude_category_names.extend(
430
- [LayoutType.TABLE, CellType.COLUMN_HEADER, CellType.PROJECTED_ROW_HEADER, CellType.SPANNING]
430
+ [LayoutLabel.TABLE, CellLabel.COLUMN_HEADER, CellLabel.PROJECTED_ROW_HEADER, CellLabel.SPANNING]
431
431
  )
432
432
  return {"exclude_category_names": exclude_category_names}
433
433
 
@@ -452,7 +452,7 @@ class ServiceFactory:
452
452
  )
453
453
  return SubImageLayoutService(
454
454
  sub_image_detector=detector,
455
- sub_image_names=[LayoutType.TABLE, LayoutType.TABLE_ROTATED],
455
+ sub_image_names=[LayoutLabel.TABLE, LayoutLabel.TABLE_ROTATED],
456
456
  detect_result_generator=detect_result_generator,
457
457
  padder=padder,
458
458
  )
@@ -1031,14 +1031,14 @@ class ServiceFactory:
1031
1031
  FamilyCompound(
1032
1032
  parent_categories=parental_categories,
1033
1033
  child_categories=text_container,
1034
- relationship_key=Relationships.CHILD,
1034
+ relationship_key=RelationshipKey.CHILD,
1035
1035
  ),
1036
1036
  FamilyCompound(
1037
- parent_categories=[LayoutType.LIST],
1038
- child_categories=[LayoutType.LIST_ITEM],
1039
- relationship_key=Relationships.CHILD,
1037
+ parent_categories=[LayoutLabel.LIST],
1038
+ child_categories=[LayoutLabel.LIST_ITEM],
1039
+ relationship_key=RelationshipKey.CHILD,
1040
1040
  create_synthetic_parent=True,
1041
- synthetic_parent=LayoutType.LIST,
1041
+ synthetic_parent=LayoutLabel.LIST,
1042
1042
  ),
1043
1043
  ]
1044
1044
  return MatchingService(
@@ -1093,7 +1093,7 @@ class ServiceFactory:
1093
1093
  FamilyCompound(
1094
1094
  parent_categories=parental_categories,
1095
1095
  child_categories=child_categories,
1096
- relationship_key=Relationships.LAYOUT_LINK,
1096
+ relationship_key=RelationshipKey.LAYOUT_LINK,
1097
1097
  )
1098
1098
  ]
1099
1099
  return MatchingService(
@@ -1153,9 +1153,9 @@ class ServiceFactory:
1153
1153
  )
1154
1154
  family_compounds = [
1155
1155
  FamilyCompound(
1156
- parent_categories=[LayoutType.LIST],
1157
- child_categories=[LayoutType.LINE],
1158
- relationship_key=Relationships.CHILD,
1156
+ parent_categories=[LayoutLabel.LIST],
1157
+ child_categories=[LayoutLabel.LINE],
1158
+ relationship_key=RelationshipKey.CHILD,
1159
1159
  ),
1160
1160
  ]
1161
1161
  return MatchingService(
@@ -1612,7 +1612,7 @@ class ServiceFactory:
1612
1612
  token_classifier: Union[LayoutTokenModels, LmTokenModels],
1613
1613
  tokenizer_fast: Any,
1614
1614
  use_other_as_default_category: bool,
1615
- segment_positions: Union[LayoutType, Sequence[LayoutType], None],
1615
+ segment_positions: Union[LayoutLabel, Sequence[LayoutLabel], None],
1616
1616
  sliding_window_stride: int,
1617
1617
  ) -> LMTokenClassifierService:
1618
1618
  """
@@ -34,7 +34,7 @@ from dd_core.mapper import filter_cat, remove_cats
34
34
  from dd_core.mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
35
35
  from dd_core.mapper.wandbstruct import to_wandb_image
36
36
  from dd_core.utils.logger import LoggingRecord, logger
37
- from dd_core.utils.object_types import DatasetType, LayoutType, TypeOrStr, get_type
37
+ from dd_core.utils.object_types import DatasetKind, LayoutLabel, TypeOrStr, get_type
38
38
  from dd_core.utils.types import PixelValues
39
39
  from dd_core.utils.viz import interactive_imshow
40
40
 
@@ -146,14 +146,14 @@ class Evaluator:
146
146
 
147
147
  self.wandb_table_agent: Optional[WandbTableAgent]
148
148
  if run is not None:
149
- if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
149
+ if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
150
150
  self.wandb_table_agent = WandbTableAgent(
151
151
  run,
152
152
  self.dataset.dataset_info.name,
153
153
  50,
154
154
  self.dataset.dataflow.categories.get_categories(filtered=True),
155
155
  )
156
- elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
156
+ elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
157
157
  if hasattr(self.metric, "sub_cats"):
158
158
  sub_cat_key, sub_cat_val_list = list(self.metric.sub_cats.items())[0]
159
159
  sub_cat_val = sub_cat_val_list[0]
@@ -253,7 +253,7 @@ class Evaluator:
253
253
  possible_cats_in_datapoint = self.dataset.dataflow.categories.get_categories(as_dict=False, filtered=True)
254
254
 
255
255
  # clean-up procedure depends on the dataset type
256
- if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
256
+ if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
257
257
  # we keep all image annotations that will not be generated through processing
258
258
  anns_to_keep = {ann for ann in possible_cats_in_datapoint if ann not in meta_anns.image_annotations}
259
259
  sub_cats_to_remove = meta_anns.sub_categories
@@ -269,11 +269,11 @@ class Evaluator:
269
269
  remove_cats(sub_categories=sub_cats_to_remove, relationships=relationships_to_remove),
270
270
  )
271
271
 
272
- elif self.dataset.dataset_info.type == DatasetType.SEQUENCE_CLASSIFICATION:
272
+ elif self.dataset.dataset_info.type == DatasetKind.SEQUENCE_CLASSIFICATION:
273
273
  summary_sub_cats_to_remove = meta_anns.summaries
274
274
  df_pr = MapData(df_pr, remove_cats(summary_sub_categories=summary_sub_cats_to_remove))
275
275
 
276
- elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
276
+ elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
277
277
  sub_cats_to_remove = meta_anns.sub_categories
278
278
  df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
279
279
  else:
@@ -313,7 +313,7 @@ class Evaluator:
313
313
  df_pr = self._clean_up_predict_dataflow_annotations(df_pr)
314
314
 
315
315
  page_parsing_component = PageParsingService(
316
- text_container=LayoutType.WORD,
316
+ text_container=LayoutLabel.WORD,
317
317
  floating_text_block_categories=floating_text_block_categories, # type: ignore
318
318
  include_residual_text_container=bool(include_residual_text_containers),
319
319
  )
@@ -31,7 +31,7 @@ from dd_core.datapoint.image import Image
31
31
  from dd_core.datapoint.view import Page
32
32
  from dd_core.utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
33
33
  from dd_core.utils.logger import LoggingRecord, logger
34
- from dd_core.utils.object_types import LayoutType
34
+ from dd_core.utils.object_types import LayoutLabel
35
35
  from dd_core.utils.types import MetricResults
36
36
 
37
37
  from .base import MetricBase
@@ -238,9 +238,9 @@ class TedsMetric(MetricBase):
238
238
  """
239
239
 
240
240
  metric = teds_metric
241
- mapper: Callable[[Image, LayoutType, list[LayoutType]], Page] = Page.from_image
242
- text_container: LayoutType = LayoutType.WORD
243
- floating_text_block_categories = [LayoutType.TABLE]
241
+ mapper: Callable[[Image, LayoutLabel, list[LayoutLabel]], Page] = Page.from_image
242
+ text_container: LayoutLabel = LayoutLabel.WORD
243
+ floating_text_block_categories = [LayoutLabel.TABLE]
244
244
 
245
245
  structure_only = False
246
246
 
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
  from lazy_imports import try_import
25
25
 
26
26
  from dd_core.utils.file_utils import get_jdeskew_requirement
27
- from dd_core.utils.object_types import ObjectTypes, PageType
27
+ from dd_core.utils.object_types import ObjectTypes, PageKey
28
28
  from dd_core.utils.types import PixelValues, Requirement
29
29
  from dd_core.utils.viz import viz_handler
30
30
 
@@ -90,4 +90,4 @@ class Jdeskewer(ImageTransformer):
90
90
  return self.__class__(self.min_angle_rotation)
91
91
 
92
92
  def get_category_names(self) -> tuple[ObjectTypes, ...]:
93
- return (PageType.ANGLE,)
93
+ return (PageKey.ANGLE,)
@@ -35,7 +35,7 @@ from dd_core.utils.file_utils import (
35
35
  get_pytorch_requirement,
36
36
  )
37
37
  from dd_core.utils.fs import load_json
38
- from dd_core.utils.object_types import LayoutType, ObjectTypes, PageType, TypeOrStr
38
+ from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey, TypeOrStr
39
39
  from dd_core.utils.transform import RotationTransform
40
40
  from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
41
41
  from dd_core.utils.viz import viz_handler
@@ -84,7 +84,7 @@ def doctr_predict_text_lines(np_img: PixelValues, predictor: DetectionPredictor)
84
84
 
85
85
  detection_results = [
86
86
  DetectionResult(
87
- box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.WORD
87
+ box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutLabel.WORD
88
88
  )
89
89
  for box in raw_output[0]["words"]
90
90
  ]
@@ -507,4 +507,4 @@ class DocTrRotationTransformer(ImageTransformer):
507
507
  return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
508
508
 
509
509
  def get_category_names(self) -> tuple[ObjectTypes, ...]:
510
- return (PageType.ANGLE,)
510
+ return (PageKey.ANGLE,)
@@ -22,6 +22,7 @@ HF Detr and DeformableDetr models.
22
22
  from __future__ import annotations
23
23
 
24
24
  import os
25
+ import warnings
25
26
  from abc import ABC
26
27
  from pathlib import Path
27
28
  from typing import TYPE_CHECKING, Literal, Mapping, Optional, Sequence, Union
@@ -41,6 +42,7 @@ with try_import() as pt_import_guard:
41
42
 
42
43
  with try_import() as tr_import_guard:
43
44
  from transformers import (
45
+ AutoConfig,
44
46
  DeformableDetrForObjectDetection,
45
47
  DeformableDetrImageProcessorFast,
46
48
  DetrImageProcessorFast,
@@ -265,14 +267,16 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
265
267
  Raises:
266
268
  ValueError: If model architecture is not eligible.
267
269
  """
268
- if "TableTransformerForObjectDetection" in config.architectures:
269
- return TableTransformerForObjectDetection.from_pretrained(
270
- pretrained_model_name_or_path=os.fspath(path_weights), config=config
271
- )
272
- if "DeformableDetrForObjectDetection" in config.architectures:
273
- return DeformableDetrForObjectDetection.from_pretrained(
274
- pretrained_model_name_or_path=os.fspath(path_weights), config=config
275
- )
270
+ with warnings.catch_warnings():
271
+ warnings.filterwarnings("ignore", message=".*copying from a non-meta parameter.*")
272
+ if "TableTransformerForObjectDetection" in config.architectures:
273
+ return TableTransformerForObjectDetection.from_pretrained(
274
+ pretrained_model_name_or_path=os.fspath(path_weights), config=config
275
+ )
276
+ if "DeformableDetrForObjectDetection" in config.architectures:
277
+ return DeformableDetrForObjectDetection.from_pretrained(
278
+ pretrained_model_name_or_path=os.fspath(path_weights), config=config
279
+ )
276
280
  raise ValueError(
277
281
  f"Model architecture {config.architectures} not eligible. Please use either "
278
282
  "TableTransformerForObjectDetection or DeformableDetrForObjectDetection."
@@ -317,8 +321,14 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
317
321
  Returns:
318
322
  `PretrainedConfig` instance.
319
323
  """
320
- config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=os.fspath(path_config))
321
- config.use_timm_backbone = True
324
+
325
+ config = AutoConfig.from_pretrained(pretrained_model_name_or_path=os.fspath(path_config))
326
+
327
+ # keep older behavior when supported by the concrete config
328
+ if hasattr(config, "use_timm_backbone"):
329
+ config.use_timm_backbone = True
330
+
331
+ # deepdoctection-specific runtime attributes
322
332
  config.threshold = 0.1
323
333
  config.nms_threshold = 0.05
324
334
  return config
@@ -123,7 +123,7 @@ def predict_sequence_classes_from_lm(
123
123
 
124
124
  outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
125
125
 
126
- score = torch.max(F.softmax(outputs.logits)).tolist()
126
+ score = torch.max(F.softmax(outputs.logits, dim=1)).tolist()
127
127
  sequence_class_predictions = outputs.logits.argmax(-1).squeeze().tolist()
128
128
 
129
129
  return SequenceClassResult(class_id=sequence_class_predictions, score=float(score))
@@ -25,7 +25,7 @@ from lazy_imports import try_import
25
25
 
26
26
  from dd_core.utils.context import save_tmp_file
27
27
  from dd_core.utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
28
- from dd_core.utils.object_types import LayoutType, ObjectTypes
28
+ from dd_core.utils.object_types import LayoutLabel, ObjectTypes
29
29
  from dd_core.utils.types import Requirement
30
30
 
31
31
  from .base import DetectionResult, ModelCategories, PdfMiner
@@ -82,7 +82,7 @@ class PdfPlumberTextDetector(PdfMiner):
82
82
  def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
83
83
  self.name = "Pdfplumber"
84
84
  self.model_id = self.get_model_id()
85
- self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
85
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
86
86
  self.x_tolerance = x_tolerance
87
87
  self.y_tolerance = y_tolerance
88
88
  self._page: Optional[Page] = None
@@ -169,7 +169,7 @@ class Pdfmium2TextDetector(PdfMiner):
169
169
  def __init__(self) -> None:
170
170
  self.name = "Pdfmium"
171
171
  self.model_id = self.get_model_id()
172
- self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
172
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.LINE})
173
173
  self._page: Optional[Page] = None
174
174
 
175
175
  def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
@@ -37,7 +37,7 @@ from dd_core.utils.context import save_tmp_file, timeout_manager
37
37
  from dd_core.utils.error import DependencyError, TesseractError
38
38
  from dd_core.utils.file_utils import _TESS_PATH, get_tesseract_requirement
39
39
  from dd_core.utils.metacfg import config_to_cli_str, set_config_by_yaml
40
- from dd_core.utils.object_types import LayoutType, ObjectTypes, PageType
40
+ from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey
41
41
  from dd_core.utils.transform import RotationTransform
42
42
  from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
43
43
  from dd_core.utils.viz import viz_handler
@@ -248,7 +248,7 @@ def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) ->
248
248
  DetectionResult(
249
249
  box=[ulx, uly, lrx, lry],
250
250
  class_id=2,
251
- class_name=LayoutType.LINE,
251
+ class_name=LayoutLabel.LINE,
252
252
  text=" ".join(
253
253
  [detect_result.text for detect_result in block_group if isinstance(detect_result.text, str)]
254
254
  ),
@@ -295,7 +295,7 @@ def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool
295
295
  score=score / 100,
296
296
  text=caption[5],
297
297
  class_id=1,
298
- class_name=LayoutType.WORD,
298
+ class_name=LayoutLabel.WORD,
299
299
  )
300
300
  all_results.append(word)
301
301
  if text_lines:
@@ -381,9 +381,9 @@ class TesseractOcrDetector(ObjectDetector):
381
381
  self.config = hyper_param_config
382
382
 
383
383
  if self.config.LINES:
384
- self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
384
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
385
385
  else:
386
- self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
386
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
387
387
 
388
388
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
389
389
  """
@@ -455,7 +455,7 @@ class TesseractRotationTransformer(ImageTransformer):
455
455
 
456
456
  def __init__(self) -> None:
457
457
  self.name = fspath(_TESS_PATH) + "-rotation"
458
- self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
458
+ self.categories = ModelCategories(init_categories={1: PageKey.ANGLE})
459
459
  self.model_id = self.get_model_id()
460
460
  self.rotator = RotationTransform(360)
461
461
 
@@ -28,7 +28,7 @@ from lazy_imports import try_import
28
28
  from dd_core.datapoint.convert import convert_np_array_to_b64_b
29
29
  from dd_core.utils.file_utils import get_boto3_requirement
30
30
  from dd_core.utils.logger import LoggingRecord, logger
31
- from dd_core.utils.object_types import LayoutType, ObjectTypes
31
+ from dd_core.utils.object_types import LayoutLabel, ObjectTypes
32
32
  from dd_core.utils.types import JsonDict, PixelValues, Requirement
33
33
 
34
34
  from .base import DetectionResult, ModelCategories, ObjectDetector
@@ -54,7 +54,7 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
54
54
  score=block["Confidence"] / 100,
55
55
  text=block["Text"],
56
56
  class_id=1 if block["BlockType"] == "WORD" else 2,
57
- class_name=LayoutType.WORD if block["BlockType"] == "WORD" else LayoutType.LINE,
57
+ class_name=LayoutLabel.WORD if block["BlockType"] == "WORD" else LayoutLabel.LINE,
58
58
  )
59
59
  all_results.append(word)
60
60
 
@@ -142,9 +142,9 @@ class TextractOcrDetector(ObjectDetector):
142
142
  credentials_kwargs = self._maybe_resolve_secret(**credentials_kwargs)
143
143
  self.client = boto3.client("textract", **credentials_kwargs)
144
144
  if self.text_lines:
145
- self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
145
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
146
146
  else:
147
- self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
147
+ self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
148
148
 
149
149
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
150
150
  """
@@ -28,7 +28,7 @@ from dd_core.datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation
28
28
  from dd_core.datapoint.box import BoundingBox, local_to_global_coords, rescale_coords
29
29
  from dd_core.datapoint.image import Image
30
30
  from dd_core.mapper.maputils import MappingContextManager
31
- from dd_core.utils.object_types import ObjectTypes, Relationships
31
+ from dd_core.utils.object_types import ObjectTypes, RelationshipKey
32
32
 
33
33
  from ..extern.base import DetectionResult
34
34
 
@@ -121,6 +121,10 @@ class DatapointManager:
121
121
  self._cache_anns = {ann.annotation_id: ann for ann in dp.get_annotation()}
122
122
  self.datapoint_is_passed = True
123
123
 
124
+ def set_model_id(self, model_id: str | None) -> None:
125
+ """Re-sets the model_id."""
126
+ self.model_id = model_id
127
+
124
128
  def assert_datapoint_passed(self) -> None:
125
129
  """
126
130
  Asserts that a datapoint is passed.
@@ -227,7 +231,7 @@ class DatapointManager:
227
231
  self.datapoint.image_id,
228
232
  ann_global_box.transform(image_width=self.datapoint.width, image_height=self.datapoint.height),
229
233
  )
230
- parent_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
234
+ parent_ann.dump_relationship(RelationshipKey.CHILD, ann.annotation_id)
231
235
 
232
236
  self.datapoint.dump(ann)
233
237
  self._cache_anns[ann.annotation_id] = ann
@@ -63,7 +63,7 @@ class PipelineComponent(ABC):
63
63
  Currently, predictors can only process single images. Processing higher number of batches is not planned.
64
64
  """
65
65
 
66
- def __init__(self, name: str, model_id: Optional[str] = None) -> None:
66
+ def __init__(self, name: str, model_id: Optional[str] = None, service_id: Optional[str] = None) -> None:
67
67
  """
68
68
  Initializes a `PipelineComponent`.
69
69
 
@@ -71,9 +71,10 @@ class PipelineComponent(ABC):
71
71
  name: The name of the pipeline component. The name will be used to identify a pipeline component in a
72
72
  pipeline. Use something that describes the task of the pipeline.
73
73
  model_id: Optional model identifier.
74
+ service_id: Optional service identifier override to avoid name collisions.
74
75
  """
75
76
  self.name = name
76
- self.service_id = self.get_service_id()
77
+ self.service_id = service_id or self.get_service_id()
77
78
  self.dp_manager = DatapointManager(self.service_id, model_id)
78
79
  self.timer_on = False
79
80
  self.filter_func: Callable[[DP], bool] = lambda dp: False