PyPI - docling-ibm-models - Versions diffs - 1.1.3__tar.gz → 1.3.2__tar.gz - Mend

docling-ibm-models 1.1.3tar.gz → 1.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) [year] [fullname]
+Copyright (c) 2024 International Business Machines
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.1.3
+Version: 1.3.2
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -18,17 +18,18 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Dist: Distance (>=0.1.3,<0.2.0)
 Requires-Dist: Pillow (>=10.0.0,<11.0.0)
-Requires-Dist: apted (>=1.0.3,<2.0.0)
+Requires-Dist: huggingface_hub (>=0.23,<1)
 Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
 Requires-Dist: lxml (>=4.9.1,<5.0.0)
 Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
 Requires-Dist: numpy (>=1.24.4,<2.0.0)
 Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
-Requires-Dist: opencv-python-headless (>=4.9.0.80,<5.0.0.0)
-Requires-Dist: torch (>=2.2.2,<3.0.0)
-Requires-Dist: torchvision (>=0.17.2)
+Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
+Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
+Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
+Requires-Dist: torchvision (>=0,<1) ; sys_platform != "darwin" or platform_machine != "x86_64"
+Requires-Dist: torchvision (>=0.17.2,<0.18.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
 Requires-Dist: tqdm (>=4.64.0,<5.0.0)
 Description-Content-Type: text/markdown
@@ -110,7 +111,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
 ## Configuration file
-Example configuration can be seen inside test `tests/test_tf_predictor.py`
+Example configuration can be found inside test `tests/test_tf_predictor.py`
 These are the main sections of the configuration file:
 - `dataset`: The directory for prepared data and the parameters used during the data loading.
@@ -128,16 +129,13 @@ You can download the model weights and config files from the links:
 - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
 - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
-Place the downloaded files into `tests/test_data/model_artifacts/` directory.
 ## Inference Tests
-This contains unit tests for Docling models.
+You can run the inference tests for the models with:
-First download the model weights (see above), then run:
 ```
-./devtools/check_code.sh
+python -m pytest tests/
 ```
 This will also generate prediction and matching visualizations that can be found here:

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/README.md RENAMED Viewed

@@ -76,7 +76,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
 ## Configuration file
-Example configuration can be seen inside test `tests/test_tf_predictor.py`
+Example configuration can be found inside test `tests/test_tf_predictor.py`
 These are the main sections of the configuration file:
 - `dataset`: The directory for prepared data and the parameters used during the data loading.
@@ -94,16 +94,13 @@ You can download the model weights and config files from the links:
 - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
 - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
-Place the downloaded files into `tests/test_data/model_artifacts/` directory.
 ## Inference Tests
-This contains unit tests for Docling models.
+You can run the inference tests for the models with:
-First download the model weights (see above), then run:
 ```
-./devtools/check_code.sh
+python -m pytest tests/
 ```
 This will also generate prediction and matching visualizations that can be found here:

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/layoutmodel/layout_predictor.py RENAMED Viewed

@@ -14,29 +14,6 @@ MODEL_CHECKPOINT_FN = "model.pt"
 DEFAULT_NUM_THREADS = 4
-# Classes:
-CLASSES_MAP = {
-    0: "background",
-    1: "Caption",
-    2: "Footnote",
-    3: "Formula",
-    4: "List-item",
-    5: "Page-footer",
-    6: "Page-header",
-    7: "Picture",
-    8: "Section-header",
-    9: "Table",
-    10: "Text",
-    11: "Title",
-    12: "Document Index",
-    13: "Code",
-    14: "Checkbox-Selected",
-    15: "Checkbox-Unselected",
-    16: "Form",
-    17: "Key-Value Region",
-}
 class LayoutPredictor:
     r"""
     Document layout prediction using ONNX
@@ -69,6 +46,31 @@ class LayoutPredictor:
         ------
         FileNotFoundError when the model's ONNX file is missing
         """
+        # Initialize classes map:
+        self._classes_map = {
+            0: "background",
+            1: "Caption",
+            2: "Footnote",
+            3: "Formula",
+            4: "List-item",
+            5: "Page-footer",
+            6: "Page-header",
+            7: "Picture",
+            8: "Section-header",
+            9: "Table",
+            10: "Text",
+            11: "Title",
+            12: "Document Index",
+            13: "Code",
+            14: "Checkbox-Selected",
+            15: "Checkbox-Unselected",
+            16: "Form",
+            17: "Key-Value Region",
+        }
+        # Blacklisted classes
+        self._black_classes = set(["Form", "Key-Value Region"])
         # Set basic params
         self._threshold = 0.6  # Score threshold
         self._image_size = 640
@@ -159,13 +161,19 @@ class LayoutPredictor:
         )
         # Yield output
-        for label, box, score in zip(labels[0], boxes[0], scores[0]):
+        for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
+            # Filter out blacklisted classes
+            label = self._classes_map[label_idx]
+            if label in self._black_classes:
+                continue
+            # Check against threshold
             if score > self._threshold:
                 yield {
                     "l": box[0] / self._image_size * w,
                     "t": box[1] / self._image_size * h,
                     "r": box[2] / self._image_size * w,
                     "b": box[3] / self._image_size * h,
-                    "label": CLASSES_MAP[label],
+                    "label": label,
                     "confidence": score,
                 }

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/common.py RENAMED Viewed

@@ -48,32 +48,6 @@ def validate_config(config):
     return True
-def parse_arguments():
-    r"""
-    Parse the input arguments
-    A ValueError exception will be thrown in case the config file is invalid
-    """
-    parser = argparse.ArgumentParser(description="Train the TableModel")
-    parser.add_argument(
-        "-c", "--config", required=True, default=None, help="configuration file (JSON)"
-    )
-    args = parser.parse_args()
-    config_filename = args.config
-    assert os.path.isfile(config_filename), "FAILURE: Config file not found."
-    return read_config(config_filename)
-def read_config(config_filename):
-    with open(config_filename, "r") as fd:
-        config = json.load(fd)
-    # Validate the config file
-    validate_config(config)
-    return config
 def safe_get_parameter(input_dict, index_path, default=None, required=False):
     r"""
     Safe get parameter from a nested dictionary.
@@ -130,71 +104,3 @@ def get_prepared_data_filename(prepared_data_part, dataset_name):
     if "<POSTFIX>" in template:
         template = template.replace("<POSTFIX>", dataset_name)
     return template
-def create_dataset_and_model(config, purpose, fixed_padding=False):
-    r"""
-    Gets a model from configuration
-    Parameters
-    ---------
-    config : Dictionary
-        The configuration of the model
-    purpose : string
-        One of "train", "eval", "predict"
-    fixed_padding : bool
-        Parameter passed to the constructor of the DataLoader
-    Returns
-    -------
-    In case a Model cannot be initialized return None, None, None. Otherwise:
-    device : selected device
-    dataset : Instance of the DataLoader
-    model : Instance of the model
-    """
-    from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
-    model_type = config["model"]["type"]
-    model = None
-    # Get env vars:
-    use_cpu_only = os.environ.get("USE_CPU_ONLY", False)
-    use_cuda_only = not use_cpu_only
-    # Use the cpu for the evaluation
-    device = "cpu"  # Default, run on CPU
-    num_gpus = torch.cuda.device_count()  # Check if GPU is available
-    if use_cuda_only:
-        device = "cuda:0" if num_gpus > 0 else "cpu"  # Run on first available GPU
-    else:
-        device = "cpu"
-    # Create the DataLoader
-    # loader = DataLoader(config, purpose, fixed_padding=fixed_padding)
-    dataset = TFDataset(config, purpose, fixed_padding=fixed_padding)
-    dataset.set_device(device)
-    dataset_val = None
-    if config["train"]["validation"] and purpose == "train":
-        dataset_val = TFDataset(config, "val", fixed_padding=fixed_padding)
-        dataset_val.set_device(device)
-    if model_type == "TableModel04_rs":
-        from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa: F401
-            TableModel04_rs,
-        )
-    # Find the model class and create an instance of it
-    for candidate in BaseModel.__subclasses__():
-        if candidate.__name__ == model_type:
-            init_data = dataset.get_init_data()
-            model = candidate(config, init_data, purpose, device)
-    if model is None:
-        logger.warn("Not found model: " + str(model_type))
-        return None, None, None
-    logger.info("Found model: " + str(model_type))
-    if purpose == s.PREDICT_PURPOSE:
-        return device, dataset, model
-    else:
-        return device, dataset, dataset_val, model

docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/functional.py ADDED Viewed

@@ -0,0 +1,97 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import numbers
+from collections.abc import Iterable, Sequence
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms import functional
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+INTER_MODE = {
+    "NEAREST": cv2.INTER_NEAREST,
+    "BILINEAR": cv2.INTER_LINEAR,
+    "BICUBIC": cv2.INTER_CUBIC,
+}
+PAD_MOD = {
+    "constant": cv2.BORDER_CONSTANT,
+    "edge": cv2.BORDER_REPLICATE,
+    "reflect": cv2.BORDER_DEFAULT,
+    "symmetric": cv2.BORDER_REFLECT,
+}
+def _is_tensor_image(img):
+    return torch.is_tensor(img) and img.ndimension() == 3
+def _is_numpy_image(img):
+    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+def normalize(tensor, mean, std):
+    """Normalize a tensor image with mean and standard deviation.
+    See ``Normalize`` for more details.
+    Args:
+        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channely.
+    Returns:
+        Tensor: Normalized Tensor image.
+    """
+    if _is_tensor_image(tensor):
+        for t, m, s in zip(tensor, mean, std, strict=False):
+            t.sub_(m).div_(s)
+        return tensor
+    elif _is_numpy_image(tensor):
+        return (tensor.astype(np.float32) - 255.0 * np.array(mean)) / np.array(std)
+    else:
+        raise RuntimeError("Undefined type")
+def resize(img, size, interpolation="BILINEAR"):
+    """Resize the input CV Image to the given size.
+    Args:
+        img (np.ndarray): Image to be resized.
+        size (tuple or int): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaing
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (str, optional): Desired interpolation. Default is ``BILINEAR``
+    Returns:
+        cv Image: Resized image.
+    """
+    if not _is_numpy_image(img):
+        raise TypeError("img should be CV Image. Got {}".format(type(img)))
+    if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
+        raise TypeError("Got inappropriate size arg: {}".format(size))
+    # TODO(Nikos): Try to remove the opencv dependency
+    if isinstance(size, int):
+        h, w, c = img.shape
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            return cv2.resize(
+                img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
+            )
+        else:
+            oh = size
+            ow = int(size * w / h)
+            return cv2.resize(
+                img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
+            )
+    else:
+        oh, ow = size
+        return cv2.resize(
+            img, dsize=(int(ow), int(oh)), interpolation=INTER_MODE[interpolation]
+        )

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/matching_post_processor.py RENAMED Viewed

@@ -4,6 +4,7 @@
 #
 import json
 import logging
+import math
 import statistics
 import docling_ibm_models.tableformer.settings as s
@@ -403,45 +404,63 @@ class MatchingPostProcessor:
                 # Push horizontally
                 if x1_min < x2_min:
                     # Move box1 to the left and box2 to the right
-                    box1["bbox"][2] -= overlap_x
-                    box2["bbox"][0] += overlap_x
+                    box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
+                    box2["bbox"][0] += math.floor(overlap_x / 2)
                 else:
                     # Move box2 to the left and box1 to the right
-                    box2["bbox"][2] -= overlap_x
-                    box1["bbox"][0] += overlap_x
+                    box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
+                    box1["bbox"][0] += math.floor(overlap_x / 2)
             else:
                 # Push vertically
                 if y1_min < y2_min:
                     # Move box1 up and box2 down
-                    box1["bbox"][3] -= overlap_y
-                    box2["bbox"][1] += overlap_y
+                    box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
+                    box2["bbox"][1] += math.floor(overlap_y / 2)
                 else:
                     # Move box2 up and box1 down
-                    box2["bbox"][3] -= overlap_y
-                    box1["bbox"][1] += overlap_y
+                    box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
+                    box1["bbox"][1] += math.floor(overlap_y / 2)
+            # Will flip coordinates in proper order, if previous operations reversed it
+            box1["bbox"] = [
+                min(box1["bbox"][0], box1["bbox"][2]),
+                min(box1["bbox"][1], box1["bbox"][3]),
+                max(box1["bbox"][0], box1["bbox"][2]),
+                max(box1["bbox"][1], box1["bbox"][3]),
+            ]
+            box2["bbox"] = [
+                min(box2["bbox"][0], box2["bbox"][2]),
+                min(box2["bbox"][1], box2["bbox"][3]),
+                max(box2["bbox"][0], box2["bbox"][2]),
+                max(box2["bbox"][1], box2["bbox"][3]),
+            ]
             return box1, box2
         def do_boxes_overlap(box1, box2):
-            # print("{} - {}".format(box1["bbox"], box2["bbox"]))
-            # Extract coordinates from the bounding boxes
-            x1_min, y1_min, x1_max, y1_max = box1["bbox"]
-            x2_min, y2_min, x2_max, y2_max = box2["bbox"]
-            # Check if one box is to the left of the other
-            if x1_max < x2_min or x2_max < x1_min:
+            B1 = box1["bbox"]
+            B2 = box2["bbox"]
+            if (
+                (B1[0] >= B2[2])
+                or (B1[2] <= B2[0])
+                or (B1[3] <= B2[1])
+                or (B1[1] >= B2[3])
+            ):
                 return False
-            # Check if one box is above the other
-            if y1_max < y2_min or y2_max < y1_min:
-                return False
-            return True
+            else:
+                return True
         def find_overlapping_pairs_indexes(bboxes):
             overlapping_indexes = []
             # Compare each box with every other box (combinations)
             for i in range(len(bboxes)):
                 for j in range(i + 1, len(bboxes)):
-                    if do_boxes_overlap(bboxes[i], bboxes[j]):
-                        bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
+                    if i != j:
+                        if bboxes[i] != bboxes[j]:
+                            if do_boxes_overlap(bboxes[i], bboxes[j]):
+                                bboxes[i], bboxes[j] = correct_overlap(
+                                    bboxes[i], bboxes[j]
+                                )
             return overlapping_indexes, bboxes
@@ -1144,7 +1163,7 @@ class MatchingPostProcessor:
                 new_pdf_cells.append(pdf_cells[i])
         return new_pdf_cells
-    def process(self, matching_details):
+    def process(self, matching_details, correct_overlapping_cells=False):
         r"""
         Do post processing, see details in the comments below
@@ -1348,9 +1367,10 @@ class MatchingPostProcessor:
         table_cells_wo = po2
         max_cell_id = po3
-        # As the last step - correct cell bboxes in a way that they don't overlap:
-        if len(table_cells_wo) <= 300:  # For performance reasons
-            table_cells_wo = self._find_overlapping(table_cells_wo)
+        if correct_overlapping_cells:
+            # As the last step - correct cell bboxes in a way that they don't overlap:
+            if len(table_cells_wo) <= 300:  # For performance reasons
+                table_cells_wo = self._find_overlapping(table_cells_wo)
         self._log().debug("*** final_matches_wo")
         self._log().debug(final_matches_wo)

{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py RENAMED Viewed

@@ -127,13 +127,17 @@ class CellMatcher:
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                if isinstance(word["bbox"], list):
+                    continue
+                elif isinstance(word["bbox"], dict):
+                    word["bbox"] = [
+                        word["bbox"]["l"],
+                        word["bbox"]["t"],
+                        word["bbox"]["r"],
+                        word["bbox"]["b"],
+                    ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]
         # BBOXES transformed...
@@ -145,9 +149,13 @@ class CellMatcher:
         table_cells = self._build_table_cells(
             html_seq, otsl_seq, table_bboxes_page, table_classes
         )
-        matches, matches_counter = self._intersection_over_pdf_match(
-            table_cells, pdf_cells
-        )
+        matches = {}
+        matches_counter = 0
+        if len(pdf_cells) > 0:
+            matches, matches_counter = self._intersection_over_pdf_match(
+                table_cells, pdf_cells
+            )
         self._log().debug("matches_counter: {}".format(matches_counter))
@@ -188,13 +196,14 @@ class CellMatcher:
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                word["bbox"] = [
+                    word["bbox"]["l"],
+                    word["bbox"]["t"],
+                    word["bbox"]["r"],
+                    word["bbox"]["b"],
+                ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]

docling-ibm-models 1.1.3__tar.gz → 1.3.2__tar.gz

docling-ibm-models 1.1.3tar.gz → 1.3.2tar.gz