PyPI - deepdoctection - Versions diffs - 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl - Mend

deepdoctection 0.39.6py3-none-any.whl → 0.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (24) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/_config.py +0 -1
deepdoctection/analyzer/factory.py +34 -13
deepdoctection/datapoint/box.py +239 -92
deepdoctection/datapoint/convert.py +4 -0
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +5 -5
deepdoctection/datasets/registry.py +1 -1
deepdoctection/mapper/match.py +28 -8
deepdoctection/pipe/anngen.py +1 -25
deepdoctection/pipe/common.py +91 -38
deepdoctection/pipe/layout.py +26 -13
deepdoctection/pipe/order.py +6 -22
deepdoctection/pipe/segment.py +36 -43
deepdoctection/pipe/sub_layout.py +2 -11
deepdoctection/pipe/text.py +5 -14
deepdoctection/train/hf_detr_train.py +41 -8
deepdoctection/train/hf_layoutlm_train.py +2 -2
deepdoctection/utils/types.py +1 -1
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/METADATA +3 -2
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/RECORD +24 -24
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL +1 -1
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info/licenses}/LICENSE +0 -0
{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/segment.py CHANGED Viewed

@@ -436,24 +436,24 @@ def segment_table(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=cell_names)
@@ -499,7 +499,6 @@ def create_intersection_cells(
     rows: Sequence[ImageAnnotation],
     cols: Sequence[ImageAnnotation],
     table_annotation_id: str,
-    cell_class_id: int,
     sub_item_names: Sequence[ObjectTypes],
 ) -> tuple[Sequence[DetectionResult], Sequence[SegmentationResult]]:
     """
@@ -509,7 +508,6 @@ def create_intersection_cells(
     :param rows: list of rows
     :param cols: list of columns
     :param table_annotation_id: annotation_id of underlying table ImageAnnotation
-    :param cell_class_id: The class_id to a synthetically generated DetectionResult
     :param sub_item_names: ObjectTypes for row-/column number
     :return: Pair of lists of `DetectionResult` and `SegmentationResult`.
     """
@@ -526,7 +524,6 @@ def create_intersection_cells(
             detect_result_cells.append(
                 DetectionResult(
                     box=boxes_cells[idx].to_list(mode="xyxy"),
-                    class_id=cell_class_id,
                     absolute_coords=boxes_cells[idx].absolute_coords,
                     class_name=LayoutType.CELL,
                 )
@@ -574,13 +571,13 @@ def header_cell_to_item_detect_result(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     item_index, _, items, _ = match_anns_by_intersection(
         dp,
-        item_header_name,
-        item_name,
-        segment_rule,
-        threshold,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_header_name,
+        child_ann_category_names=item_name,
+        matching_rule=segment_rule,
+        threshold=threshold,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     item_headers = []
     for idx, item in enumerate(items):
@@ -622,24 +619,24 @@ def segment_pubtables(
     child_ann_ids = table.get_relationship(Relationships.CHILD)
     cell_index_rows, row_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[0],
-        spanning_cell_names,
-        segment_rule,
-        threshold_rows,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[0],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_rows,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     cell_index_cols, col_index, _, _ = match_anns_by_intersection(
         dp,
-        item_names[1],
-        spanning_cell_names,
-        segment_rule,
-        threshold_cols,
-        True,
-        child_ann_ids,
-        child_ann_ids,
+        parent_ann_category_names=item_names[1],
+        child_ann_category_names=spanning_cell_names,
+        matching_rule=segment_rule,
+        threshold=threshold_cols,
+        use_weighted_intersections=True,
+        parent_ann_ids=child_ann_ids,
+        child_ann_ids=child_ann_ids,
     )
     spanning_cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=spanning_cell_names)
@@ -976,7 +973,6 @@ class PubtablesSegmentationService(PipelineComponent):
         tile_table_with_items: bool,
         remove_iou_threshold_rows: float,
         remove_iou_threshold_cols: float,
-        cell_class_id: int,
         table_name: TypeOrStr,
         cell_names: Sequence[TypeOrStr],
         spanning_cell_names: Sequence[TypeOrStr],
@@ -997,7 +993,6 @@ class PubtablesSegmentationService(PipelineComponent):
                                       the adjacent row. Will do a similar shifting with columns.
         :param remove_iou_threshold_rows: iou threshold for removing overlapping rows
         :param remove_iou_threshold_cols: iou threshold for removing overlapping columns
-        :param cell_class_id: 'category_id' for cells to be generated from intersected rows and columns
         :param table_name: layout type table
         :param cell_names: layout type of cells
         :param spanning_cell_names: layout type of spanning cells
@@ -1022,7 +1017,6 @@ class PubtablesSegmentationService(PipelineComponent):
         self.spanning_cell_names = [get_type(cell_name) for cell_name in spanning_cell_names]
         self.remove_iou_threshold_rows = remove_iou_threshold_rows
         self.remove_iou_threshold_cols = remove_iou_threshold_cols
-        self.cell_class_id = cell_class_id
         self.cell_to_image = cell_to_image
         self.crop_cell_image = crop_cell_image
         self.item_names = [get_type(item_name) for item_name in item_names]  # row names must be before column name
@@ -1089,7 +1083,7 @@ class PubtablesSegmentationService(PipelineComponent):
             rows = dp.get_annotation(category_names=self.item_names[0], annotation_ids=item_ann_ids)
             columns = dp.get_annotation(category_names=self.item_names[1], annotation_ids=item_ann_ids)
             detect_result_cells, segment_result_cells = create_intersection_cells(
-                rows, columns, table.annotation_id, self.cell_class_id, self.sub_item_names
+                rows, columns, table.annotation_id, self.sub_item_names
             )
             cell_rn_cn_to_ann_id = {}
             for detect_result, segment_result in zip(detect_result_cells, segment_result_cells):
@@ -1228,7 +1222,6 @@ class PubtablesSegmentationService(PipelineComponent):
             self.tile_table,
             self.remove_iou_threshold_rows,
             self.remove_iou_threshold_cols,
-            self.cell_class_id,
             self.table_name,
             self.cell_names,
             self.spanning_cell_names,

deepdoctection/pipe/sub_layout.py CHANGED Viewed

@@ -92,7 +92,6 @@ class DetectResultGenerator:
                         detect_result_list.append(
                             DetectionResult(
                                 box=[0.0, 0.0, float(self.width), float(self.height)],  # type: ignore
-                                class_id=self.categories_name_as_key[category_name],
                                 class_name=category_name,
                                 score=0.0,
                                 absolute_coords=self.absolute_coords,
@@ -123,7 +122,7 @@ class DetectResultGenerator:
         """
         sane_detect_results = []
         for detect_result in detect_result_list:
-            if detect_result.box:
+            if detect_result.box is not None:
                 ulx, uly, lrx, lry = detect_result.box
                 if ulx >= 0 and lrx - ulx >= 0 and uly >= 0 and lry - uly >= 0:
                     sane_detect_results.append(detect_result)
@@ -156,14 +155,13 @@ class SubImageLayoutService(PipelineComponent):
             detect_result_generator = DetectResultGenerator(categories_items)
             d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
             "2": LayoutType.column})
-            item_component = SubImageLayoutService(d_items, LayoutType.table, {1: 7, 2: 8}, detect_result_generator)
+            item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
     """
     def __init__(
         self,
         sub_image_detector: ObjectDetector,
         sub_image_names: Union[str, Sequence[TypeOrStr]],
-        category_id_mapping: Optional[dict[int, int]] = None,
         detect_result_generator: Optional[DetectResultGenerator] = None,
         padder: Optional[PadTransform] = None,
     ):
@@ -186,7 +184,6 @@ class SubImageLayoutService(PipelineComponent):
             if isinstance(sub_image_names, str)
             else tuple((get_type(cat) for cat in sub_image_names))
         )
-        self.category_id_mapping = category_id_mapping
         self.detect_result_generator = detect_result_generator
         self.padder = padder
         self.predictor = sub_image_detector
@@ -223,11 +220,6 @@ class SubImageLayoutService(PipelineComponent):
                 detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
             for detect_result in detect_result_list:
-                if self.category_id_mapping:
-                    if detect_result.class_id:
-                        detect_result.class_id = self.category_id_mapping.get(
-                            detect_result.class_id, detect_result.class_id
-                        )
                 self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
     def get_meta_annotation(self) -> MetaAnnotation:
@@ -254,7 +246,6 @@ class SubImageLayoutService(PipelineComponent):
         return self.__class__(
             predictor,
             self.sub_image_name,
-            self.category_id_mapping,
             self.detect_result_generator,
             padder_clone,
         )

deepdoctection/pipe/text.py CHANGED Viewed

@@ -70,7 +70,6 @@ class TextExtractionService(PipelineComponent):
         text_extract_detector: Union[ObjectDetector, PdfMiner, TextRecognizer],
         extract_from_roi: Optional[Union[Sequence[TypeOrStr], TypeOrStr]] = None,
         run_time_ocr_language_selection: bool = False,
-        skip_if_text_extracted: bool = False,
     ):
         """
         :param text_extract_detector: ObjectDetector
@@ -79,8 +78,6 @@ class TextExtractionService(PipelineComponent):
                                                 multiple language selections. Also requires that a language detection
                                                 pipeline component ran before. It will select the expert language OCR
                                                 model based on the determined language.
-        :param skip_if_text_extracted: Set to `True` if text has already been extracted in a previous pipeline component
-                                       and should not be extracted again. Use-case: A PDF with some scanned images.
         """
         if extract_from_roi is None:
@@ -104,11 +101,6 @@ class TextExtractionService(PipelineComponent):
                 raise TypeError("Only TesseractOcrDetector supports multiple languages")
         self.run_time_ocr_language_selection = run_time_ocr_language_selection
-        self.skip_if_text_extracted = skip_if_text_extracted
-        if self.skip_if_text_extracted and isinstance(self.predictor, TextRecognizer):
-            raise ValueError(
-                "skip_if_text_extracted=True and TextRecognizer in TextExtractionService is not compatible"
-            )
     def serve(self, dp: Image) -> None:
         maybe_batched_text_rois = self.get_text_rois(dp)
@@ -154,11 +146,6 @@ class TextExtractionService(PipelineComponent):
         well `get_text_rois` will return an empty list.
         :return: list of ImageAnnotation or Image
         """
-        if self.skip_if_text_extracted:
-            text_categories = self.predictor.get_category_names()
-            text_anns = dp.get_annotation(category_names=text_categories)
-            if text_anns:
-                return []
         if self.extract_from_category:
             if self.predictor.accepts_batch:
@@ -223,7 +210,11 @@ class TextExtractionService(PipelineComponent):
         predictor = self.predictor.clone()
         if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
             raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
-        return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
+        return self.__class__(
+            text_extract_detector=predictor,
+            extract_from_roi=deepcopy(self.extract_from_category),
+            run_time_ocr_language_selection=self.run_time_ocr_language_selection,
+        )
     def clear_predictor(self) -> None:
         self.predictor.clear_model()

deepdoctection/train/hf_detr_train.py CHANGED Viewed

@@ -37,6 +37,8 @@ from ..extern.hfdetr import HFDetrDerivedDetector
 from ..mapper.hfstruct import DetrDataCollator, image_to_hf_detr_training
 from ..pipe.base import PipelineComponent
 from ..pipe.registry import pipeline_component_registry
+from ..utils.error import DependencyError
+from ..utils.file_utils import wandb_available
 from ..utils.logger import LoggingRecord, logger
 from ..utils.types import PathLikeOrStr
 from ..utils.utils import string_to_dict
@@ -56,6 +58,9 @@ with try_import() as hf_import_guard:
         TrainingArguments,
     )
+with try_import() as wb_import_guard:
+    import wandb
 class DetrDerivedTrainer(Trainer):
     """
@@ -74,16 +79,18 @@ class DetrDerivedTrainer(Trainer):
         args: TrainingArguments,
         data_collator: DetrDataCollator,
         train_dataset: DatasetAdapter,
+        eval_dataset: Optional[DatasetBase] = None,
     ):
         self.evaluator: Optional[Evaluator] = None
         self.build_eval_kwargs: Optional[dict[str, Any]] = None
-        super().__init__(model, args, data_collator, train_dataset)
+        super().__init__(model, args, data_collator, train_dataset, eval_dataset=eval_dataset)
     def setup_evaluator(
         self,
         dataset_val: DatasetBase,
         pipeline_component: PipelineComponent,
         metric: Union[Type[MetricBase], MetricBase],
+        run: Optional[wandb.sdk.wandb_run.Run] = None,
         **build_eval_kwargs: Union[str, int],
     ) -> None:
         """
@@ -93,10 +100,11 @@ class DetrDerivedTrainer(Trainer):
         :param dataset_val: dataset on which to run evaluation
         :param pipeline_component: pipeline component to plug into the evaluator
         :param metric: A metric class
+        :param run: WandB run
         :param build_eval_kwargs:
         """
-        self.evaluator = Evaluator(dataset_val, pipeline_component, metric, num_threads=1)
+        self.evaluator = Evaluator(dataset_val, pipeline_component, metric, num_threads=1, run=run)
         assert self.evaluator.pipe_component
         for comp in self.evaluator.pipe_component.pipe_components:
             comp.clear_predictor()
@@ -205,12 +213,19 @@ def train_hf_detr(
         "remove_unused_columns": False,
         "per_device_train_batch_size": 2,
         "max_steps": number_samples,
-        "evaluation_strategy": (
+        "eval_strategy": (
             "steps"
-            if (dataset_val is not None and metric is not None and pipeline_component_name is not None)
+            if (
+                dataset_val is not None
+                and (metric is not None or metric_name is not None)
+                and pipeline_component_name is not None
+            )
             else "no"
         ),
         "eval_steps": 5000,
+        "use_wandb": False,
+        "wandb_project": None,
+        "wandb_repo": "deepdoctection",
     }
     for conf in config_overwrite:
@@ -224,6 +239,23 @@ def train_hf_detr(
                 pass
         conf_dict[key] = val
+    use_wandb = conf_dict.pop("use_wandb")
+    wandb_project = str(conf_dict.pop("wandb_project"))
+    wandb_repo = str(conf_dict.pop("wandb_repo"))
+    # Initialize Wandb, if necessary
+    run = None
+    if use_wandb:
+        if not wandb_available():
+            raise DependencyError("WandB must be installed separately")
+        run = wandb.init(project=wandb_project, config=conf_dict)
+        run._label(repo=wandb_repo)  # pylint: disable=W0212
+        os.environ["WANDB_DISABLED"] = "False"
+        os.environ["WANDB_WATCH"] = "True"
+        os.environ["WANDB_PROJECT"] = wandb_project
+    else:
+        os.environ["WANDB_DISABLED"] = "True"
     # Will inform about dataloader warnings if max_steps exceeds length of dataset
     if conf_dict["max_steps"] > number_samples:  # type: ignore
         logger.warning(
@@ -232,7 +264,7 @@ def train_hf_detr(
             )
         )
-    arguments = TrainingArguments(**conf_dict)
+    arguments = TrainingArguments(**conf_dict)  # pylint: disable=E1123
     logger.info(LoggingRecord(f"Config: \n {arguments.to_dict()}", arguments.to_dict()))
     id2label = {int(k) - 1: v for v, k in categories_dict_name_as_key.items()}
@@ -240,6 +272,7 @@ def train_hf_detr(
         pretrained_model_name_or_path=path_config_json,
         num_labels=len(id2label),
     )
+    config.use_timm_backbone = True
     if path_weights != "":
         model = TableTransformerForObjectDetection.from_pretrained(
@@ -252,9 +285,9 @@ def train_hf_detr(
         pretrained_model_name_or_path=path_feature_extractor_config_json
     )
     data_collator = DetrDataCollator(feature_extractor)
-    trainer = DetrDerivedTrainer(model, arguments, data_collator, dataset)
+    trainer = DetrDerivedTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
-    if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
+    if arguments.eval_strategy in (IntervalStrategy.STEPS,):
         categories = dataset_val.dataflow.categories.get_categories(filtered=True)  # type: ignore
         detector = HFDetrDerivedDetector(
             path_config_json, path_weights, path_feature_extractor_config_json, categories  # type: ignore
@@ -266,6 +299,6 @@ def train_hf_detr(
             metric = metric_registry.get(metric_name)
         assert metric is not None
-        trainer.setup_evaluator(dataset_val, pipeline_component, metric)  # type: ignore
+        trainer.setup_evaluator(dataset_val, pipeline_component, metric, run, **build_val_dict)  # type: ignore
     trainer.train()

deepdoctection/train/hf_layoutlm_train.py CHANGED Viewed

@@ -395,7 +395,7 @@ def train_hf_layoutlm(
         "remove_unused_columns": False,
         "per_device_train_batch_size": 8,
         "max_steps": number_samples,
-        "evaluation_strategy": (
+        "eval_strategy": (
             "steps"
             if (dataset_val is not None and metric is not None and pipeline_component_name is not None)
             else "no"
@@ -475,7 +475,7 @@ def train_hf_layoutlm(
     )
     trainer = LayoutLMTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
-    if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
+    if arguments.eval_strategy in (IntervalStrategy.STEPS,):
         assert metric is not None  # silence mypy
         if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
             categories = dataset_val.dataflow.categories.get_categories(filtered=True)  # type: ignore

deepdoctection/utils/types.py CHANGED Viewed

@@ -63,7 +63,7 @@ else:
 JsonDict = dict[str, Any]
+BoxCoordinate = Union[int, float]
 # Some common deepdoctection dict-types
 AnnotationDict: TypeAlias = dict[str, Any]

{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: deepdoctection
-Version: 0.39.6
+Version: 0.40.0
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer
@@ -133,6 +133,7 @@ Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
 Dynamic: license
+Dynamic: license-file
 Dynamic: provides-extra
 Dynamic: requires-dist
 Dynamic: requires-python

{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-deepdoctection/__init__.py,sha256=F9uc6mjLFOYYGkT5UFs6M5YBxhcJlJ33G3-NSctzSF8,12754
+deepdoctection/__init__.py,sha256=Onsg4vkNNIGYytDmH96KsxYt3xQLxcAbyYHCeOqThR8,12780
 deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
-deepdoctection/analyzer/_config.py,sha256=1rfvVrp7cI2YLzpahD77aa1tZ_KFAIQ21DM1NWhxYiI,5058
+deepdoctection/analyzer/_config.py,sha256=kxQzDQvl2ygH84VTnumbRF7JLGM6VeJoBzv1xssm6H4,5019
 deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
-deepdoctection/analyzer/factory.py,sha256=7L-bJ9957TBn_C6OGWJFmZobrh8MPq4Q-Espx5faEiY,32435
+deepdoctection/analyzer/factory.py,sha256=sXGL_faLkKCUBfq5YIpmzV5cWuvWChYy-zP5OtdaM4Y,33251
 deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
 deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
 deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
@@ -17,16 +17,16 @@ deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP
 deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
 deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
 deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
-deepdoctection/datapoint/box.py,sha256=UAdSnLexvFyg4KK1u9kXdJxhaWTwRxTU-cnQcvl37Q8,23410
-deepdoctection/datapoint/convert.py,sha256=gJbHY2V8nlMht1N5VdxTmWSsOeydpFPTJsaJHp6XGgE,7516
-deepdoctection/datapoint/image.py,sha256=uGmlgF6zGptvNowZTqf-io4hbd8aFFngAvQqgdEQ5Kw,34040
-deepdoctection/datapoint/view.py,sha256=sK6Ta9R6jdOS7iwF05-uPjL2wSz8wHQ5RIGCatw7i2M,50774
+deepdoctection/datapoint/box.py,sha256=XPhC_xHqLZJjzafg1pIS_CxnVB5-0_yk-twsZZ3ncUU,30093
+deepdoctection/datapoint/convert.py,sha256=Be2FvmRXt-5prZ1vwa5fG6VjgEQ_31hiQ13hAoXoaes,7740
+deepdoctection/datapoint/image.py,sha256=_jN46UJUsOi6GC6VEUcp3L_vLL-iYRW05RKcFLWb6Dc,34048
+deepdoctection/datapoint/view.py,sha256=iZiHMc2hkk6vWn87LK0Qf-toZU_kocW3m7Wq8M4IS2E,50782
 deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
 deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
 deepdoctection/datasets/base.py,sha256=AZx-hw8Mchzb7FiOASt7zCbiybFNsM_diBzKXyC-auU,22618
 deepdoctection/datasets/dataflow_builder.py,sha256=cYU2zV3gZW2bFvMHimlO9VIl3BAUaCwML08cCIQ8Em4,4107
 deepdoctection/datasets/info.py,sha256=sC1QCOdLWFMooVmiShZ43sLUpAi3FK4d0fsLyl_9-gA,20548
-deepdoctection/datasets/registry.py,sha256=K6ZHCSIHuElOMWWN_KJjicHP-BUhNWBHBSOFbCGuvRg,3388
+deepdoctection/datasets/registry.py,sha256=utiB-PnE6vc5HvjcudO0O4Urp2BC3snqswY6d8uPQAo,3388
 deepdoctection/datasets/save.py,sha256=Y9508Qqp8gIGN7pbGgVBBnkiC6NdCb9L2YR4wVvEUxM,3350
 deepdoctection/datasets/instances/__init__.py,sha256=XEc_4vT5lDn6bbZID9ujDEumWu8Ec2W-QS4pI_bfWWE,1388
 deepdoctection/datasets/instances/doclaynet.py,sha256=Az7USCqF0lMk1n1Dk59uUrBgBNAbKEjtUvZnCgdUH70,12286
@@ -94,7 +94,7 @@ deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhX
 deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
 deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
 deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
-deepdoctection/mapper/match.py,sha256=Ed9FsuVPNp_faaW5PKnvUHZoEXcRcrO-muduTMzjp1s,8937
+deepdoctection/mapper/match.py,sha256=RDTYSGtbtT8ph3L83PyHIkezJ2K82MwNerSM72uTMxM,10267
 deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
 deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
 deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
@@ -102,25 +102,25 @@ deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8O
 deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_rnww,4481
 deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
 deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
-deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac,16393
+deepdoctection/pipe/anngen.py,sha256=7wvp7eghDwrgcIyu1vjRxmVy4SADPbn-k4ud8y2bgjU,15338
 deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
-deepdoctection/pipe/common.py,sha256=C1KxEfJFSPeDqlnkiJ1ZYPuA36P8BU_4jVhdsszW_V8,17752
+deepdoctection/pipe/common.py,sha256=S6-NKvR0sqBfqjN-mH76uVgM_aHOZvhPe_ore36UPZA,21028
 deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
 deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
 deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
-deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
+deepdoctection/pipe/layout.py,sha256=ThULc0b1f9KyaXYk9z0qbuJ0nhIodah9PcrEq2xKpAY,5670
 deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
-deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
+deepdoctection/pipe/order.py,sha256=0KNiMinedjfuDVVHxJSaDL1yl4Sub-miMPcEC4gGwPA,39423
 deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
 deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
-deepdoctection/pipe/segment.py,sha256=mWYRg7UR80PtIj1SIg_hiujDcCtLlvKJUP9vx4ZpW0Y,59318
-deepdoctection/pipe/sub_layout.py,sha256=N1RcID-boORcwsW_j0l64HpUu3rff0ge5qEanudLYgk,13838
-deepdoctection/pipe/text.py,sha256=h9q6d3HFOs7LOg-iwdLUPiQxrPqgunBVNmtYMBrfRQE,11180
+deepdoctection/pipe/segment.py,sha256=sny59GuP7dxLGX3YjHF0wllPxSiXL1GNQEhMGKcF8ZU,59594
+deepdoctection/pipe/sub_layout.py,sha256=OLKvCYJynoFpo7bf2b3HzY0k-TJDLc0PHveWKcDbqZI,13324
+deepdoctection/pipe/text.py,sha256=tLlJtneM__WsrAvp4pQFqwNlmq2RLqKqiPXlJ2lkniU,10483
 deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
 deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
 deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
-deepdoctection/train/hf_detr_train.py,sha256=eHSdI11U8oGy93noZxAISfukhRBElj4dBerJ4Xcercw,10785
-deepdoctection/train/hf_layoutlm_train.py,sha256=DTPJZYKeDdRtDpcObYh93uh5D4sgT4c0ckHiAknCroY,22568
+deepdoctection/train/hf_detr_train.py,sha256=uBkkRyxrJF5UF__KbYvIlmb-HRWQ9TY6LiJr1Rm56kI,12043
+deepdoctection/train/hf_layoutlm_train.py,sha256=8kiGp_8GEyqCkLgeMgCJOLJWSVoKWkUBHsZtDjZOcRk,22556
 deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
 deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
 deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
@@ -138,11 +138,11 @@ deepdoctection/utils/pdf_utils.py,sha256=Fi0eZ2GbnO7N61Rd8b8YRKRff4dalHAzkcn3zpG
 deepdoctection/utils/settings.py,sha256=hDD6yDX_4pQXwR5ILVwJIj6hb7NXA0-ifnC25ldcUjA,12464
 deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
 deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F2GPU,8502
-deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
+deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
 deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
 deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
-deepdoctection-0.39.6.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
-deepdoctection-0.39.6.dist-info/METADATA,sha256=ezZIfdIjMmVp_60jLqco8OMeHQG_ksT4zEhflFLB5tI,19741
-deepdoctection-0.39.6.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
-deepdoctection-0.39.6.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
-deepdoctection-0.39.6.dist-info/RECORD,,
+deepdoctection-0.40.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
+deepdoctection-0.40.0.dist-info/METADATA,sha256=YyPBlJBcUfAQP_cW7Mhq3eNs2-924o4BMS4X6Sn0Xwo,19763
+deepdoctection-0.40.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+deepdoctection-0.40.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
+deepdoctection-0.40.0.dist-info/RECORD,,

{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.1.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{deepdoctection-0.39.6.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

deepdoctection 0.39.6__py3-none-any.whl → 0.40.0__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.39.6py3-none-any.whl → 0.40.0py3-none-any.whl