PyPI - docling-ibm-models - Versions diffs - 1.3.1__tar.gz → 1.3.2__tar.gz - Mend

docling-ibm-models 1.3.1tar.gz → 1.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.3.1
+Version: 1.3.2
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former

{docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/common.py RENAMED Viewed

@@ -48,32 +48,6 @@ def validate_config(config):
     return True
-def parse_arguments():
-    r"""
-    Parse the input arguments
-    A ValueError exception will be thrown in case the config file is invalid
-    """
-    parser = argparse.ArgumentParser(description="Train the TableModel")
-    parser.add_argument(
-        "-c", "--config", required=True, default=None, help="configuration file (JSON)"
-    )
-    args = parser.parse_args()
-    config_filename = args.config
-    assert os.path.isfile(config_filename), "FAILURE: Config file not found."
-    return read_config(config_filename)
-def read_config(config_filename):
-    with open(config_filename, "r") as fd:
-        config = json.load(fd)
-    # Validate the config file
-    validate_config(config)
-    return config
 def safe_get_parameter(input_dict, index_path, default=None, required=False):
     r"""
     Safe get parameter from a nested dictionary.
@@ -130,71 +104,3 @@ def get_prepared_data_filename(prepared_data_part, dataset_name):
     if "<POSTFIX>" in template:
         template = template.replace("<POSTFIX>", dataset_name)
     return template
-def create_dataset_and_model(config, purpose, fixed_padding=False):
-    r"""
-    Gets a model from configuration
-    Parameters
-    ---------
-    config : Dictionary
-        The configuration of the model
-    purpose : string
-        One of "train", "eval", "predict"
-    fixed_padding : bool
-        Parameter passed to the constructor of the DataLoader
-    Returns
-    -------
-    In case a Model cannot be initialized return None, None, None. Otherwise:
-    device : selected device
-    dataset : Instance of the DataLoader
-    model : Instance of the model
-    """
-    from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
-    model_type = config["model"]["type"]
-    model = None
-    # Get env vars:
-    use_cpu_only = os.environ.get("USE_CPU_ONLY", False)
-    use_cuda_only = not use_cpu_only
-    # Use the cpu for the evaluation
-    device = "cpu"  # Default, run on CPU
-    num_gpus = torch.cuda.device_count()  # Check if GPU is available
-    if use_cuda_only:
-        device = "cuda:0" if num_gpus > 0 else "cpu"  # Run on first available GPU
-    else:
-        device = "cpu"
-    # Create the DataLoader
-    # loader = DataLoader(config, purpose, fixed_padding=fixed_padding)
-    dataset = TFDataset(config, purpose, fixed_padding=fixed_padding)
-    dataset.set_device(device)
-    dataset_val = None
-    if config["train"]["validation"] and purpose == "train":
-        dataset_val = TFDataset(config, "val", fixed_padding=fixed_padding)
-        dataset_val.set_device(device)
-    if model_type == "TableModel04_rs":
-        from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa: F401
-            TableModel04_rs,
-        )
-    # Find the model class and create an instance of it
-    for candidate in BaseModel.__subclasses__():
-        if candidate.__name__ == model_type:
-            init_data = dataset.get_init_data()
-            model = candidate(config, init_data, purpose, device)
-    if model is None:
-        logger.warn("Not found model: " + str(model_type))
-        return None, None, None
-    logger.info("Found model: " + str(model_type))
-    if purpose == s.PREDICT_PURPOSE:
-        return device, dataset, model
-    else:
-        return device, dataset, dataset_val, model

docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/functional.py ADDED Viewed

@@ -0,0 +1,97 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import numbers
+from collections.abc import Iterable, Sequence
+import cv2
+import numpy as np
+import torch
+from torchvision.transforms import functional
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+INTER_MODE = {
+    "NEAREST": cv2.INTER_NEAREST,
+    "BILINEAR": cv2.INTER_LINEAR,
+    "BICUBIC": cv2.INTER_CUBIC,
+}
+PAD_MOD = {
+    "constant": cv2.BORDER_CONSTANT,
+    "edge": cv2.BORDER_REPLICATE,
+    "reflect": cv2.BORDER_DEFAULT,
+    "symmetric": cv2.BORDER_REFLECT,
+}
+def _is_tensor_image(img):
+    return torch.is_tensor(img) and img.ndimension() == 3
+def _is_numpy_image(img):
+    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+def normalize(tensor, mean, std):
+    """Normalize a tensor image with mean and standard deviation.
+    See ``Normalize`` for more details.
+    Args:
+        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channely.
+    Returns:
+        Tensor: Normalized Tensor image.
+    """
+    if _is_tensor_image(tensor):
+        for t, m, s in zip(tensor, mean, std, strict=False):
+            t.sub_(m).div_(s)
+        return tensor
+    elif _is_numpy_image(tensor):
+        return (tensor.astype(np.float32) - 255.0 * np.array(mean)) / np.array(std)
+    else:
+        raise RuntimeError("Undefined type")
+def resize(img, size, interpolation="BILINEAR"):
+    """Resize the input CV Image to the given size.
+    Args:
+        img (np.ndarray): Image to be resized.
+        size (tuple or int): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaing
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (str, optional): Desired interpolation. Default is ``BILINEAR``
+    Returns:
+        cv Image: Resized image.
+    """
+    if not _is_numpy_image(img):
+        raise TypeError("img should be CV Image. Got {}".format(type(img)))
+    if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
+        raise TypeError("Got inappropriate size arg: {}".format(size))
+    # TODO(Nikos): Try to remove the opencv dependency
+    if isinstance(size, int):
+        h, w, c = img.shape
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            return cv2.resize(
+                img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
+            )
+        else:
+            oh = size
+            ow = int(size * w / h)
+            return cv2.resize(
+                img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
+            )
+    else:
+        oh, ow = size
+        return cv2.resize(
+            img, dsize=(int(ow), int(oh)), interpolation=INTER_MODE[interpolation]
+        )

{docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_predictor.py RENAMED Viewed

@@ -12,7 +12,6 @@ import numpy as np
 import torch
 import docling_ibm_models.tableformer.common as c
-import docling_ibm_models.tableformer.data_management.functional as F
 import docling_ibm_models.tableformer.data_management.transforms as T
 import docling_ibm_models.tableformer.settings as s
 import docling_ibm_models.tableformer.utils.utils as u
@@ -21,6 +20,9 @@ from docling_ibm_models.tableformer.data_management.matching_post_processor impo
 )
 from docling_ibm_models.tableformer.data_management.tf_cell_matcher import CellMatcher
 from docling_ibm_models.tableformer.models.common.base_model import BaseModel
+from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (
+    TableModel04_rs,
+)
 from docling_ibm_models.tableformer.otsl import otsl_to_html
 from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
@@ -187,16 +189,7 @@ class TFPredictor:
         """
         self._model_type = self._config["model"]["type"]
-        # Added import here to avoid loading turbotransformer library unnecessarily
-        if self._model_type == "TableModel04_rs":
-            from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa
-                TableModel04_rs,
-            )
-        for candidate in BaseModel.__subclasses__():
-            if candidate.__name__ == self._model_type:
-                model = candidate(
-                    self._config, self._init_data, s.PREDICT_PURPOSE, self._device
-                )
+        model = TableModel04_rs(self._config, self._init_data, self._device)
         if model is None:
             err_msg = "Not able to initiate a model for {}".format(self._model_type)
@@ -376,66 +369,6 @@ class TFPredictor:
         return new_bboxes
-    def _pad_image(self, iocr_page):
-        r"""
-        Adds padding to the image
-        Parameters
-        ----------
-        iocr_page : dict
-            Docling provided table data
-        Returns
-        -------
-        new_im: PIL image
-            new, padded image
-        new_image_ratio : float
-            Ratio of padded image size to the original image size
-        """
-        _, old_iw, old_ih = iocr_page["image"].shape
-        margin_i = self._padding_size  # pixels
-        desired_iw = old_iw + (margin_i * 2)
-        desired_ih = old_ih + (margin_i * 2)
-        # Ratio of new image size to the original image size
-        new_image_ratio = desired_iw / old_iw
-        bcolor = (255, 255, 255)
-        # Create empty canvas of background color and desired size
-        padded_image = F.pad(
-            iocr_page["image"],
-            (desired_iw, desired_ih, desired_iw, desired_ih),
-            fill=bcolor,
-        )
-        return padded_image, new_image_ratio
-    def _pre_process_image(self, iocr_page):
-        r"""
-        Pre-process table image in memory, before doing prediction
-        Currently just removes from the image separate PDF cells that only contain "$" sign
-        This is done to remove model confusion when dealing with financial reports
-        Parameters
-        ----------
-        iocr_page : dict
-            Docling provided table data
-        Returns
-        -------
-        iocr_page["image"] : PIL image
-            updated table image with "$" repainted
-        new_image_ratio : float
-            Ratio of padded image size to the original image size
-        """
-        new_image_ratio = 1.0
-        ic, iw, ih = iocr_page["image"].shape
-        return iocr_page["image"], new_image_ratio
     def _merge_tf_output(self, docling_output, pdf_cells):
         tf_output = []
         tf_cells_map = {}
@@ -519,6 +452,7 @@ class TFPredictor:
             sf = r
             dim = (width, int(h * r))
         # resize the image
+        # TODO(Nikos): Try to remove cv2 dependency
         resized = cv2.resize(image, dim, interpolation=inter)
         # return the resized image
         return resized, sf

docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/transforms.py ADDED Viewed

@@ -0,0 +1,91 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+from __future__ import division
+import collections
+import numbers
+import random
+import torch
+from docling_ibm_models.tableformer.data_management import functional as F
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+    def __call__(self, tensor, target=None):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(tensor, self.mean, self.std), target
+    def __repr__(self):
+        return self.__class__.__name__ + "(mean={0}, std={1})".format(
+            self.mean, self.std
+        )
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+        (h, w), output size will be matched to this. If size is an int,
+        smaller edge of the image will be matched to this number.
+        i.e, if height > width, then image will be rescaled to
+        (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+        ``BILINEAR``
+    """
+    def __init__(self, size, interpolation="BILINEAR"):
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img, target=None):
+        """
+        Args:
+        img (np.ndarray): Image to be scaled.
+        Returns:
+        np.ndarray: Rescaled image.
+        """
+        # Resize bboxes (in pixels)
+        x_scale = 0
+        y_scale = 0
+        if img.shape[1] > 0:
+            x_scale = self.size[0] / img.shape[1]
+        if img.shape[0] > 0:
+            y_scale = self.size[1] / img.shape[0]
+        # loop over bboxes
+        if target is not None:
+            if target["boxes"] is not None:
+                target_ = target.copy()
+                target_["boxes"][:, 0] = x_scale * target_["boxes"][:, 0]
+                target_["boxes"][:, 1] = y_scale * target_["boxes"][:, 1]
+                target_["boxes"][:, 2] = x_scale * target_["boxes"][:, 2]
+                target_["boxes"][:, 3] = y_scale * target_["boxes"][:, 3]
+        return F.resize(img, self.size, self.interpolation), target
+    def __repr__(self):
+        interpolate_str = self.interpolation
+        return self.__class__.__name__ + "(size={0}, interpolation={1})".format(
+            self.size, interpolate_str
+        )

{docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py RENAMED Viewed

@@ -26,7 +26,7 @@ class TableModel04_rs(BaseModel, nn.Module):
     TableNet04Model encoder, dual-decoder model with OTSL+ support
     """
-    def __init__(self, config, init_data, purpose, device):
+    def __init__(self, config, init_data, device):
         super(TableModel04_rs, self).__init__(config, init_data, device)
         self._prof = config["predict"].get("profiling", False)

{docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling-ibm-models"
-version = "1.3.1"  # DO NOT EDIT, updated automatically
+version = "1.3.2"  # DO NOT EDIT, updated automatically
 description = "This package contains the AI models used by the Docling PDF conversion package"
 authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"

docling-ibm-models 1.3.1__tar.gz → 1.3.2__tar.gz

docling-ibm-models 1.3.1tar.gz → 1.3.2tar.gz