PyPI - deepdoctection - Versions diffs - 1.0.7__tar.gz → 1.2.0__tar.gz - Mend

deepdoctection 1.0.7tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepdoctection
-Version: 1.0.7
+Version: 1.2.0
 Summary: Repository for Document AI - server/inference core package
 Author: Dr. Janis Meyer
 License: Apache License 2.0
@@ -18,7 +18,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: dd-core[full]>=1.0.1
-Requires-Dist: huggingface_hub>=0.26.0
+Requires-Dist: huggingface_hub>=1.0
 Provides-Extra: full
 Requires-Dist: dd-datasets[full]>=1.0.1; extra == "full"
 Requires-Dist: boto3==1.34.102; extra == "full"
@@ -30,7 +30,7 @@ Requires-Dist: distance==0.1.3; extra == "full"
 Requires-Dist: lxml>=4.9.1; extra == "full"
 Requires-Dist: pycocotools>=2.0.2; extra == "full"
 Requires-Dist: timm>=0.9.16; extra == "full"
-Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "full"
+Requires-Dist: transformers>=5.2.0; extra == "full"
 Requires-Dist: accelerate>=0.29.1; extra == "full"
 Requires-Dist: python-doctr>=1.0.0; extra == "full"
 Provides-Extra: types

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "deepdoctection"
-version = "1.0.7"
+version = "1.2.0"
 authors = [
     {name = "Dr. Janis Meyer"}
 ]
@@ -25,7 +25,7 @@ classifiers = [
 dependencies = [
     "dd-core[full]>=1.0.1",
-    "huggingface_hub>=0.26.0",
+    "huggingface_hub>=1.0",
 ]
 [project.optional-dependencies]
@@ -43,7 +43,7 @@ full = [
     "pycocotools>=2.0.2",
     # DL dependencies
     "timm>=0.9.16",
-    "transformers>=4.48.0,<5.0.0",
+    "transformers>=5.2.0",
     "accelerate>=0.29.1",
     "python-doctr>=1.0.0",
 ]

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/__init__.py RENAMED Viewed

@@ -6,13 +6,13 @@ Init file for deepdoctection package. This file is used to import all submodules
 """
 import sys
-from typing import TYPE_CHECKING, Dict, List
+from typing import TYPE_CHECKING, Dict
 from dd_core.utils.env_info import collect_env_info
 from dd_core.utils.file_utils import _LazyModule
 from dd_core.utils.logger import LoggingRecord, logger
-__version__ = "1.0.7"
+__version__ = "1.2.0"
 _IMPORT_STRUCTURE = {
     "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory", "update_cfg_from_defaults"],
     "eval": [

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/config.py RENAMED Viewed

@@ -423,7 +423,7 @@ The distance is calculated using the center points of the layout elements.
 from dd_core.datapoint.view import IMAGE_DEFAULTS
 from dd_core.utils.metacfg import AttrDict
-from dd_core.utils.object_types import CellType, LayoutType
+from dd_core.utils.object_types import CellKey, CellLabel, LayoutLabel
 cfg = AttrDict()
@@ -551,33 +551,33 @@ cfg.LAYOUT.PAD.LEFT = 0
 # LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01]
 # LAYOUT_NMS_PAIRS.PRIORITY = ['table', None]
 cfg.LAYOUT_NMS_PAIRS.COMBINATIONS = [
-    [LayoutType.TABLE, LayoutType.TITLE],
-    [LayoutType.TABLE, LayoutType.TEXT],
-    [LayoutType.TABLE, LayoutType.KEY_VALUE_AREA],
-    [LayoutType.TABLE, LayoutType.LIST_ITEM],
-    [LayoutType.TABLE, LayoutType.LIST],
-    [LayoutType.TABLE, LayoutType.FIGURE],
-    [LayoutType.TITLE, LayoutType.TEXT],
-    [LayoutType.TEXT, LayoutType.KEY_VALUE_AREA],
-    [LayoutType.TEXT, LayoutType.LIST_ITEM],
-    [LayoutType.TEXT, LayoutType.CAPTION],
-    [LayoutType.KEY_VALUE_AREA, LayoutType.LIST_ITEM],
-    [LayoutType.FIGURE, LayoutType.CAPTION],
+    [LayoutLabel.TABLE, LayoutLabel.TITLE],
+    [LayoutLabel.TABLE, LayoutLabel.TEXT],
+    [LayoutLabel.TABLE, LayoutLabel.KEY_VALUE_AREA],
+    [LayoutLabel.TABLE, LayoutLabel.LIST_ITEM],
+    [LayoutLabel.TABLE, LayoutLabel.LIST],
+    [LayoutLabel.TABLE, LayoutLabel.FIGURE],
+    [LayoutLabel.TITLE, LayoutLabel.TEXT],
+    [LayoutLabel.TEXT, LayoutLabel.KEY_VALUE_AREA],
+    [LayoutLabel.TEXT, LayoutLabel.LIST_ITEM],
+    [LayoutLabel.TEXT, LayoutLabel.CAPTION],
+    [LayoutLabel.KEY_VALUE_AREA, LayoutLabel.LIST_ITEM],
+    [LayoutLabel.FIGURE, LayoutLabel.CAPTION],
 ]
 cfg.LAYOUT_NMS_PAIRS.THRESHOLDS = [0.001, 0.01, 0.01, 0.001, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.01, 0.001]
 cfg.LAYOUT_NMS_PAIRS.PRIORITY = [
-    LayoutType.TABLE,
-    LayoutType.TABLE,
-    LayoutType.TABLE,
-    LayoutType.TABLE,
-    LayoutType.TABLE,
-    LayoutType.TABLE,
-    LayoutType.TEXT,
-    LayoutType.TEXT,
+    LayoutLabel.TABLE,
+    LayoutLabel.TABLE,
+    LayoutLabel.TABLE,
+    LayoutLabel.TABLE,
+    LayoutLabel.TABLE,
+    LayoutLabel.TABLE,
+    LayoutLabel.TEXT,
+    LayoutLabel.TEXT,
     None,
-    LayoutType.CAPTION,
-    LayoutType.KEY_VALUE_AREA,
-    LayoutType.FIGURE,
+    LayoutLabel.CAPTION,
+    LayoutLabel.KEY_VALUE_AREA,
+    LayoutLabel.FIGURE,
 ]
 # Relevant when LIB = PT. Use either TorchScript weights via ITEM.WEIGHTS_TS
@@ -688,43 +688,43 @@ cfg.SEGMENTATION.STRETCH_RULE = "equal"
 # Specifies the layout category used to identify tables.
 # Used in both Deepdoctection and Table Transformer approaches.
-cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
+cfg.SEGMENTATION.TABLE_NAME = LayoutLabel.TABLE
 # Lists the layout or cell types used in the original Deepdoctection approach.
 # Used by TableSegmentationService for cell assignments.
-cfg.SEGMENTATION.CELL_NAMES = [CellType.COLUMN_HEADER, CellType.BODY, LayoutType.CELL]
+cfg.SEGMENTATION.CELL_NAMES = [CellLabel.COLUMN_HEADER, CellLabel.BODY, LayoutLabel.CELL]
 # Lists all cell types used by the Table Transformer approach (PubtablesSegmentationService).
 # LayoutType.CELL is synthetically generated and not predicted by the structure recognition model.
 cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
-    LayoutType.CELL,
+    LayoutLabel.CELL,
 ]
 # Subset of PUBTABLES_CELL_NAMES that represent spanning/header cells.
 # These need to be matched with row or column elements.
 cfg.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES = [
-    CellType.SPANNING,
+    CellLabel.SPANNING,
 ]
 # Lists the layout categories used to identify row and column elements.
 # Used by TableSegmentationService.
-cfg.SEGMENTATION.ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
+cfg.SEGMENTATION.ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
 # Equivalent to ITEM_NAMES but used in the Table Transformer approach.
-cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
+cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutLabel.ROW, LayoutLabel.COLUMN]
 # Used in TableSegmentationService to specify sub-category annotations for row and column numbers.
-cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
+cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
 # Equivalent to SUB_ITEM_NAMES, but used with the Table Transformer approach.
-cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
+cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellKey.ROW_NUMBER, CellKey.COLUMN_NUMBER]
 # Used in PubtablesSegmentationService.
 # Specifies which cells should be treated as header cells that need to be linked to row/column elements.
 cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES = [
-    CellType.COLUMN_HEADER,
-    CellType.ROW_HEADER,
-    CellType.PROJECTED_ROW_HEADER,
+    CellLabel.COLUMN_HEADER,
+    CellLabel.ROW_HEADER,
+    CellLabel.PROJECTED_ROW_HEADER,
 ]
 # Defines the threshold values for matching column/row header cells to their respective rows/columns
@@ -840,11 +840,11 @@ cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
 # Specifies the parent layout categories in the link relationship.
 # These are the elements to which related components (e.g., captions) should be linked.
-cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [LayoutType.FIGURE, LayoutType.TABLE]
+cfg.LAYOUT_LINK.PARENTAL_CATEGORIES = [LayoutLabel.FIGURE, LayoutLabel.TABLE]
 # Specifies the child layout categories in the link relationship.
 # These are typically smaller or subordinate elements (e.g., captions).
-cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
+cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutLabel.CAPTION]
 # Weights configuration for language detection model.
 cfg.LM_LANGUAGE_DETECT_CLASS.WEIGHTS = None

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/analyzer/factory.py RENAMED Viewed

@@ -28,7 +28,7 @@ from lazy_imports import try_import
 from dd_core.utils.env_info import SETTINGS
 from dd_core.utils.error import DependencyError
 from dd_core.utils.metacfg import AttrDict
-from dd_core.utils.object_types import CellType, LayoutType, ObjectTypes, Relationships
+from dd_core.utils.object_types import CellLabel, LayoutLabel, ObjectTypes, RelationshipKey
 from dd_core.utils.transform import PadTransform
 from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner
@@ -427,7 +427,7 @@ class ServiceFactory:
         if mode == "ITEM":
             if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
                 exclude_category_names.extend(
-                    [LayoutType.TABLE, CellType.COLUMN_HEADER, CellType.PROJECTED_ROW_HEADER, CellType.SPANNING]
+                    [LayoutLabel.TABLE, CellLabel.COLUMN_HEADER, CellLabel.PROJECTED_ROW_HEADER, CellLabel.SPANNING]
                 )
         return {"exclude_category_names": exclude_category_names}
@@ -452,7 +452,7 @@ class ServiceFactory:
         )
         return SubImageLayoutService(
             sub_image_detector=detector,
-            sub_image_names=[LayoutType.TABLE, LayoutType.TABLE_ROTATED],
+            sub_image_names=[LayoutLabel.TABLE, LayoutLabel.TABLE_ROTATED],
             detect_result_generator=detect_result_generator,
             padder=padder,
         )
@@ -1031,14 +1031,14 @@ class ServiceFactory:
             FamilyCompound(
                 parent_categories=parental_categories,
                 child_categories=text_container,
-                relationship_key=Relationships.CHILD,
+                relationship_key=RelationshipKey.CHILD,
             ),
             FamilyCompound(
-                parent_categories=[LayoutType.LIST],
-                child_categories=[LayoutType.LIST_ITEM],
-                relationship_key=Relationships.CHILD,
+                parent_categories=[LayoutLabel.LIST],
+                child_categories=[LayoutLabel.LIST_ITEM],
+                relationship_key=RelationshipKey.CHILD,
                 create_synthetic_parent=True,
-                synthetic_parent=LayoutType.LIST,
+                synthetic_parent=LayoutLabel.LIST,
             ),
         ]
         return MatchingService(
@@ -1093,7 +1093,7 @@ class ServiceFactory:
             FamilyCompound(
                 parent_categories=parental_categories,
                 child_categories=child_categories,
-                relationship_key=Relationships.LAYOUT_LINK,
+                relationship_key=RelationshipKey.LAYOUT_LINK,
             )
         ]
         return MatchingService(
@@ -1153,9 +1153,9 @@ class ServiceFactory:
         )
         family_compounds = [
             FamilyCompound(
-                parent_categories=[LayoutType.LIST],
-                child_categories=[LayoutType.LINE],
-                relationship_key=Relationships.CHILD,
+                parent_categories=[LayoutLabel.LIST],
+                child_categories=[LayoutLabel.LINE],
+                relationship_key=RelationshipKey.CHILD,
             ),
         ]
         return MatchingService(
@@ -1612,7 +1612,7 @@ class ServiceFactory:
         token_classifier: Union[LayoutTokenModels, LmTokenModels],
         tokenizer_fast: Any,
         use_other_as_default_category: bool,
-        segment_positions: Union[LayoutType, Sequence[LayoutType], None],
+        segment_positions: Union[LayoutLabel, Sequence[LayoutLabel], None],
         sliding_window_stride: int,
     ) -> LMTokenClassifierService:
         """

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/eval.py RENAMED Viewed

@@ -34,7 +34,7 @@ from dd_core.mapper import filter_cat, remove_cats
 from dd_core.mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
 from dd_core.mapper.wandbstruct import to_wandb_image
 from dd_core.utils.logger import LoggingRecord, logger
-from dd_core.utils.object_types import DatasetType, LayoutType, TypeOrStr, get_type
+from dd_core.utils.object_types import DatasetKind, LayoutLabel, TypeOrStr, get_type
 from dd_core.utils.types import PixelValues
 from dd_core.utils.viz import interactive_imshow
@@ -146,14 +146,14 @@ class Evaluator:
         self.wandb_table_agent: Optional[WandbTableAgent]
         if run is not None:
-            if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
+            if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
                 self.wandb_table_agent = WandbTableAgent(
                     run,
                     self.dataset.dataset_info.name,
                     50,
                     self.dataset.dataflow.categories.get_categories(filtered=True),
                 )
-            elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
+            elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
                 if hasattr(self.metric, "sub_cats"):
                     sub_cat_key, sub_cat_val_list = list(self.metric.sub_cats.items())[0]
                     sub_cat_val = sub_cat_val_list[0]
@@ -253,7 +253,7 @@ class Evaluator:
         possible_cats_in_datapoint = self.dataset.dataflow.categories.get_categories(as_dict=False, filtered=True)
         # clean-up procedure depends on the dataset type
-        if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
+        if self.dataset.dataset_info.type == DatasetKind.OBJECT_DETECTION:
             # we keep all image annotations that will not be generated through processing
             anns_to_keep = {ann for ann in possible_cats_in_datapoint if ann not in meta_anns.image_annotations}
             sub_cats_to_remove = meta_anns.sub_categories
@@ -269,11 +269,11 @@ class Evaluator:
                 remove_cats(sub_categories=sub_cats_to_remove, relationships=relationships_to_remove),
             )
-        elif self.dataset.dataset_info.type == DatasetType.SEQUENCE_CLASSIFICATION:
+        elif self.dataset.dataset_info.type == DatasetKind.SEQUENCE_CLASSIFICATION:
             summary_sub_cats_to_remove = meta_anns.summaries
             df_pr = MapData(df_pr, remove_cats(summary_sub_categories=summary_sub_cats_to_remove))
-        elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
+        elif self.dataset.dataset_info.type == DatasetKind.TOKEN_CLASSIFICATION:
             sub_cats_to_remove = meta_anns.sub_categories
             df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
         else:
@@ -313,7 +313,7 @@ class Evaluator:
         df_pr = self._clean_up_predict_dataflow_annotations(df_pr)
         page_parsing_component = PageParsingService(
-            text_container=LayoutType.WORD,
+            text_container=LayoutLabel.WORD,
             floating_text_block_categories=floating_text_block_categories,  # type: ignore
             include_residual_text_container=bool(include_residual_text_containers),
         )

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/eval/tedsmetric.py RENAMED Viewed

@@ -31,7 +31,7 @@ from dd_core.datapoint.image import Image
 from dd_core.datapoint.view import Page
 from dd_core.utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
 from dd_core.utils.logger import LoggingRecord, logger
-from dd_core.utils.object_types import LayoutType
+from dd_core.utils.object_types import LayoutLabel
 from dd_core.utils.types import MetricResults
 from .base import MetricBase
@@ -238,9 +238,9 @@ class TedsMetric(MetricBase):
     """
     metric = teds_metric
-    mapper: Callable[[Image, LayoutType, list[LayoutType]], Page] = Page.from_image
-    text_container: LayoutType = LayoutType.WORD
-    floating_text_block_categories = [LayoutType.TABLE]
+    mapper: Callable[[Image, LayoutLabel, list[LayoutLabel]], Page] = Page.from_image
+    text_container: LayoutLabel = LayoutLabel.WORD
+    floating_text_block_categories = [LayoutLabel.TABLE]
     structure_only = False

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/deskew.py RENAMED Viewed

@@ -24,7 +24,7 @@ from __future__ import annotations
 from lazy_imports import try_import
 from dd_core.utils.file_utils import get_jdeskew_requirement
-from dd_core.utils.object_types import ObjectTypes, PageType
+from dd_core.utils.object_types import ObjectTypes, PageKey
 from dd_core.utils.types import PixelValues, Requirement
 from dd_core.utils.viz import viz_handler
@@ -90,4 +90,4 @@ class Jdeskewer(ImageTransformer):
         return self.__class__(self.min_angle_rotation)
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
-        return (PageType.ANGLE,)
+        return (PageKey.ANGLE,)

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/doctrocr.py RENAMED Viewed

@@ -35,7 +35,7 @@ from dd_core.utils.file_utils import (
     get_pytorch_requirement,
 )
 from dd_core.utils.fs import load_json
-from dd_core.utils.object_types import LayoutType, ObjectTypes, PageType, TypeOrStr
+from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey, TypeOrStr
 from dd_core.utils.transform import RotationTransform
 from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
 from dd_core.utils.viz import viz_handler
@@ -84,7 +84,7 @@ def doctr_predict_text_lines(np_img: PixelValues, predictor: DetectionPredictor)
     detection_results = [
         DetectionResult(
-            box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.WORD
+            box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutLabel.WORD
         )
         for box in raw_output[0]["words"]
     ]
@@ -507,4 +507,4 @@ class DocTrRotationTransformer(ImageTransformer):
         return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
-        return (PageType.ANGLE,)
+        return (PageKey.ANGLE,)

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hfdetr.py RENAMED Viewed

@@ -22,6 +22,7 @@ HF Detr and DeformableDetr models.
 from __future__ import annotations
 import os
+import warnings
 from abc import ABC
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, Mapping, Optional, Sequence, Union
@@ -41,6 +42,7 @@ with try_import() as pt_import_guard:
 with try_import() as tr_import_guard:
     from transformers import (
+        AutoConfig,
         DeformableDetrForObjectDetection,
         DeformableDetrImageProcessorFast,
         DetrImageProcessorFast,
@@ -265,14 +267,16 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
         Raises:
             ValueError: If model architecture is not eligible.
         """
-        if "TableTransformerForObjectDetection" in config.architectures:
-            return TableTransformerForObjectDetection.from_pretrained(
-                pretrained_model_name_or_path=os.fspath(path_weights), config=config
-            )
-        if "DeformableDetrForObjectDetection" in config.architectures:
-            return DeformableDetrForObjectDetection.from_pretrained(
-                pretrained_model_name_or_path=os.fspath(path_weights), config=config
-            )
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", message=".*copying from a non-meta parameter.*")
+            if "TableTransformerForObjectDetection" in config.architectures:
+                return TableTransformerForObjectDetection.from_pretrained(
+                    pretrained_model_name_or_path=os.fspath(path_weights), config=config
+                )
+            if "DeformableDetrForObjectDetection" in config.architectures:
+                return DeformableDetrForObjectDetection.from_pretrained(
+                    pretrained_model_name_or_path=os.fspath(path_weights), config=config
+                )
         raise ValueError(
             f"Model architecture {config.architectures} not eligible. Please use either "
             "TableTransformerForObjectDetection or DeformableDetrForObjectDetection."
@@ -317,8 +321,14 @@ class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
         Returns:
             `PretrainedConfig` instance.
         """
-        config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=os.fspath(path_config))
-        config.use_timm_backbone = True
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path=os.fspath(path_config))
+        # keep older behavior when supported by the concrete config
+        if hasattr(config, "use_timm_backbone"):
+            config.use_timm_backbone = True
+        # deepdoctection-specific runtime attributes
         config.threshold = 0.1
         config.nms_threshold = 0.05
         return config

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/hflm.py RENAMED Viewed

@@ -123,7 +123,7 @@ def predict_sequence_classes_from_lm(
     outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
-    score = torch.max(F.softmax(outputs.logits)).tolist()
+    score = torch.max(F.softmax(outputs.logits, dim=1)).tolist()
     sequence_class_predictions = outputs.logits.argmax(-1).squeeze().tolist()
     return SequenceClassResult(class_id=sequence_class_predictions, score=float(score))

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/pdftext.py RENAMED Viewed

@@ -25,7 +25,7 @@ from lazy_imports import try_import
 from dd_core.utils.context import save_tmp_file
 from dd_core.utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
-from dd_core.utils.object_types import LayoutType, ObjectTypes
+from dd_core.utils.object_types import LayoutLabel, ObjectTypes
 from dd_core.utils.types import Requirement
 from .base import DetectionResult, ModelCategories, PdfMiner
@@ -82,7 +82,7 @@ class PdfPlumberTextDetector(PdfMiner):
     def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
         self.name = "Pdfplumber"
         self.model_id = self.get_model_id()
-        self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
+        self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
         self.x_tolerance = x_tolerance
         self.y_tolerance = y_tolerance
         self._page: Optional[Page] = None
@@ -169,7 +169,7 @@ class Pdfmium2TextDetector(PdfMiner):
     def __init__(self) -> None:
         self.name = "Pdfmium"
         self.model_id = self.get_model_id()
-        self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
+        self.categories = ModelCategories(init_categories={1: LayoutLabel.LINE})
         self._page: Optional[Page] = None
     def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/tessocr.py RENAMED Viewed

@@ -37,7 +37,7 @@ from dd_core.utils.context import save_tmp_file, timeout_manager
 from dd_core.utils.error import DependencyError, TesseractError
 from dd_core.utils.file_utils import _TESS_PATH, get_tesseract_requirement
 from dd_core.utils.metacfg import config_to_cli_str, set_config_by_yaml
-from dd_core.utils.object_types import LayoutType, ObjectTypes, PageType
+from dd_core.utils.object_types import LayoutLabel, ObjectTypes, PageKey
 from dd_core.utils.transform import RotationTransform
 from dd_core.utils.types import PathLikeOrStr, PixelValues, Requirement
 from dd_core.utils.viz import viz_handler
@@ -248,7 +248,7 @@ def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) ->
                 DetectionResult(
                     box=[ulx, uly, lrx, lry],
                     class_id=2,
-                    class_name=LayoutType.LINE,
+                    class_name=LayoutLabel.LINE,
                     text=" ".join(
                         [detect_result.text for detect_result in block_group if isinstance(detect_result.text, str)]
                     ),
@@ -295,7 +295,7 @@ def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool
                 score=score / 100,
                 text=caption[5],
                 class_id=1,
-                class_name=LayoutType.WORD,
+                class_name=LayoutLabel.WORD,
             )
             all_results.append(word)
     if text_lines:
@@ -381,9 +381,9 @@ class TesseractOcrDetector(ObjectDetector):
         self.config = hyper_param_config
         if self.config.LINES:
-            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
+            self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
         else:
-            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
+            self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
     def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
@@ -455,7 +455,7 @@ class TesseractRotationTransformer(ImageTransformer):
     def __init__(self) -> None:
         self.name = fspath(_TESS_PATH) + "-rotation"
-        self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
+        self.categories = ModelCategories(init_categories={1: PageKey.ANGLE})
         self.model_id = self.get_model_id()
         self.rotator = RotationTransform(360)

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/extern/texocr.py RENAMED Viewed

@@ -28,7 +28,7 @@ from lazy_imports import try_import
 from dd_core.datapoint.convert import convert_np_array_to_b64_b
 from dd_core.utils.file_utils import get_boto3_requirement
 from dd_core.utils.logger import LoggingRecord, logger
-from dd_core.utils.object_types import LayoutType, ObjectTypes
+from dd_core.utils.object_types import LayoutLabel, ObjectTypes
 from dd_core.utils.types import JsonDict, PixelValues, Requirement
 from .base import DetectionResult, ModelCategories, ObjectDetector
@@ -54,7 +54,7 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
                     score=block["Confidence"] / 100,
                     text=block["Text"],
                     class_id=1 if block["BlockType"] == "WORD" else 2,
-                    class_name=LayoutType.WORD if block["BlockType"] == "WORD" else LayoutType.LINE,
+                    class_name=LayoutLabel.WORD if block["BlockType"] == "WORD" else LayoutLabel.LINE,
                 )
                 all_results.append(word)
@@ -142,9 +142,9 @@ class TextractOcrDetector(ObjectDetector):
         credentials_kwargs = self._maybe_resolve_secret(**credentials_kwargs)
         self.client = boto3.client("textract", **credentials_kwargs)
         if self.text_lines:
-            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
+            self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD, 2: LayoutLabel.LINE})
         else:
-            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
+            self.categories = ModelCategories(init_categories={1: LayoutLabel.WORD})
     def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/anngen.py RENAMED Viewed

@@ -28,7 +28,7 @@ from dd_core.datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation
 from dd_core.datapoint.box import BoundingBox, local_to_global_coords, rescale_coords
 from dd_core.datapoint.image import Image
 from dd_core.mapper.maputils import MappingContextManager
-from dd_core.utils.object_types import ObjectTypes, Relationships
+from dd_core.utils.object_types import ObjectTypes, RelationshipKey
 from ..extern.base import DetectionResult
@@ -121,6 +121,10 @@ class DatapointManager:
         self._cache_anns = {ann.annotation_id: ann for ann in dp.get_annotation()}
         self.datapoint_is_passed = True
+    def set_model_id(self, model_id: str | None) -> None:
+        """Re-sets the model_id."""
+        self.model_id = model_id
     def assert_datapoint_passed(self) -> None:
         """
         Asserts that a datapoint is passed.
@@ -227,7 +231,7 @@ class DatapointManager:
                     self.datapoint.image_id,
                     ann_global_box.transform(image_width=self.datapoint.width, image_height=self.datapoint.height),
                 )
-                parent_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
+                parent_ann.dump_relationship(RelationshipKey.CHILD, ann.annotation_id)
             self.datapoint.dump(ann)
             self._cache_anns[ann.annotation_id] = ann

{deepdoctection-1.0.7 → deepdoctection-1.2.0}/src/deepdoctection/pipe/base.py RENAMED Viewed

@@ -63,7 +63,7 @@ class PipelineComponent(ABC):
         Currently, predictors can only process single images. Processing higher number of batches is not planned.
     """
-    def __init__(self, name: str, model_id: Optional[str] = None) -> None:
+    def __init__(self, name: str, model_id: Optional[str] = None, service_id: Optional[str] = None) -> None:
         """
         Initializes a `PipelineComponent`.
@@ -71,9 +71,10 @@ class PipelineComponent(ABC):
             name: The name of the pipeline component. The name will be used to identify a pipeline component in a
                   pipeline. Use something that describes the task of the pipeline.
             model_id: Optional model identifier.
+            service_id: Optional service identifier override to avoid name collisions.
         """
         self.name = name
-        self.service_id = self.get_service_id()
+        self.service_id = service_id or self.get_service_id()
         self.dp_manager = DatapointManager(self.service_id, model_id)
         self.timer_on = False
         self.filter_func: Callable[[DP], bool] = lambda dp: False

deepdoctection 1.0.7__tar.gz → 1.2.0__tar.gz

deepdoctection 1.0.7tar.gz → 1.2.0tar.gz