PyPI - docling-ibm-models - Versions diffs - 0.1.0__py3-none-any.whl - Mend

docling-ibm-models 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

docling_ibm_models/tableformer/data_management/transforms.py ADDED Viewed

@@ -0,0 +1,396 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+from __future__ import division
+import collections
+import numbers
+import random
+import torch
+from docling_ibm_models.tableformer.data_management import functional as F
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    return torch.stack(b, dim=-1)
+def box_xyxy_to_cxcywh(x):
+    x0, y0, x1, y1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
+    return torch.stack(b, dim=-1)
+class Lambda(object):
+    """Apply a user-defined lambda as a transform.
+    Attention: The multiprocessing used in dataloader of pytorch
+    is not friendly with lambda function in Windows
+    Args:
+        lambd (function): Lambda/function to be used for transform.
+    """
+    def __init__(self, lambd):
+        # assert isinstance(lambd, types.LambdaType)
+        self.lambd = lambd
+        # if 'Windows' in platform.system():
+        #     raise RuntimeError("Can't pickle lambda funciton in windows system")
+    def __call__(self, img):
+        return self.lambd(img)
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+class RandomTransforms(object):
+    """Base class for a list of transformations with randomness
+    Args:
+        transforms (list or tuple): list of transformations
+    """
+    def __init__(self, transforms):
+        assert isinstance(transforms, (list, tuple))
+        self.transforms = transforms
+    def __call__(self, *args, **kwargs):
+        raise NotImplementedError()
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+class RandomChoice(RandomTransforms):
+    """Apply single transformation randomly picked from a list"""
+    def __call__(self, img, target):
+        t = random.choice(self.transforms)
+        return t(img, target)
+class RandomCrop(object):
+    def __init__(self, size, margin_crop):
+        self.size = list(size)
+        self.margin_crop = list(margin_crop)
+        # margin_crop: w, h
+    def __call__(self, img, target):
+        # img (w,h,ch)
+        image_height, image_width = img.shape[0], img.shape[1]
+        """
+        img (np.ndarray): Image to be cropped.
+        x: Upper pixel coordinate.
+        y: Left pixel coordinate.
+        h: Height of the cropped image.
+        w: Width of the cropped image.
+        """
+        if image_width > 0 and image_height > 0:
+            cropped_image = F.crop(
+                img,
+                self.margin_crop[1],
+                self.margin_crop[0],
+                image_height - (self.margin_crop[1] * 2),
+                image_width - (self.margin_crop[0] * 2),
+            )
+            target_ = target.copy()
+            target_["boxes"][:, 0] = target_["boxes"][:, 0] - self.margin_crop[0]
+            target_["boxes"][:, 1] = target_["boxes"][:, 1] - self.margin_crop[1]
+            target_["boxes"][:, 2] = target_["boxes"][:, 2] - self.margin_crop[0]
+            target_["boxes"][:, 3] = target_["boxes"][:, 3] - self.margin_crop[1]
+        else:
+            cropped_image = img
+        return cropped_image, target_
+class RandomPad(object):
+    def __init__(self, max_pad):
+        self.max_pad = max_pad
+    def __call__(self, img, target):
+        pad_x = random.randint(0, self.max_pad)
+        pad_y = random.randint(0, self.max_pad)
+        pad_x1 = random.randint(0, self.max_pad)
+        pad_y1 = random.randint(0, self.max_pad)
+        img = img.copy()
+        padded_image = F.pad(img, (pad_x, pad_y, pad_x1, pad_y1), fill=(255, 255, 255))
+        target_ = target.copy()
+        if target["boxes"] is not None:
+            target_["boxes"][:, 0] = target_["boxes"][:, 0] + pad_x
+            target_["boxes"][:, 1] = target_["boxes"][:, 1] + pad_y
+            target_["boxes"][:, 2] = target_["boxes"][:, 2] + pad_x
+            target_["boxes"][:, 3] = target_["boxes"][:, 3] + pad_y
+        return padded_image, target_
+class ColorJitter(object):
+    """Randomly change the brightness, contrast and saturation of an image.
+    Args:
+        brightness (float): How much to jitter brightness. brightness_factor
+            is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
+        contrast (float): How much to jitter contrast. contrast_factor
+            is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
+        saturation (float): How much to jitter saturation. saturation_factor
+            is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
+        hue(float): How much to jitter hue. hue_factor is chosen uniformly from
+            [-hue, hue]. Should be >=0 and <= 0.5.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        assert isinstance(brightness, float) or (
+            isinstance(brightness, collections.Iterable) and len(brightness) == 2
+        )
+        assert isinstance(contrast, float) or (
+            isinstance(contrast, collections.Iterable) and len(contrast) == 2
+        )
+        assert isinstance(saturation, float) or (
+            isinstance(saturation, collections.Iterable) and len(saturation) == 2
+        )
+        assert isinstance(hue, float) or (
+            isinstance(hue, collections.Iterable) and len(hue) == 2
+        )
+        self.brightness = brightness
+        self.contrast = contrast
+        self.saturation = saturation
+        self.hue = hue
+    @staticmethod
+    def get_params(brightness, contrast, saturation, hue):
+        """Get a randomized transform to be applied on image.
+        Arguments are same as that of __init__.
+        Returns:
+            Transform which randomly adjusts brightness, contrast and
+            saturation in a random order.
+        """
+        transforms = []
+        if isinstance(brightness, numbers.Number):
+            if brightness > 0:
+                brightness_factor = random.uniform(
+                    max(0, 1 - brightness), 1 + brightness
+                )
+                transforms.append(
+                    Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
+                )
+            if contrast > 0:
+                contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
+                transforms.append(
+                    Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
+                )
+            if saturation > 0:
+                saturation_factor = random.uniform(
+                    max(0, 1 - saturation), 1 + saturation
+                )
+                transforms.append(
+                    Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
+                )
+            if hue > 0:
+                hue_factor = random.uniform(-hue, hue)
+                transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+        else:
+            if brightness[0] > 0 and brightness[1] > 0:
+                brightness_factor = random.uniform(brightness[0], brightness[1])
+                transforms.append(
+                    Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
+                )
+            if contrast[0] > 0 and contrast[1] > 0:
+                contrast_factor = random.uniform(contrast[0], contrast[1])
+                transforms.append(
+                    Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
+                )
+            if saturation[0] > 0 and saturation[1] > 0:
+                saturation_factor = random.uniform(saturation[0], saturation[1])
+                transforms.append(
+                    Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
+                )
+            if hue[0] > 0 and hue[1] > 0:
+                hue_factor = random.uniform(hue[0], hue[1])
+                transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+        random.shuffle(transforms)
+        transform = ComposeSingle(transforms)
+        return transform
+    def __call__(self, img, target):
+        """
+        Args:
+            img (np.ndarray): Input image.
+        Returns:
+            np.ndarray: Color jittered image.
+        """
+        transform = self.get_params(
+            self.brightness, self.contrast, self.saturation, self.hue
+        )
+        return transform(img), target
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        format_string += "brightness={0}".format(self.brightness)
+        format_string += ", contrast={0}".format(self.contrast)
+        format_string += ", saturation={0}".format(self.saturation)
+        format_string += ", hue={0})".format(self.hue)
+        return format_string
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+    def __call__(self, tensor, target=None):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        return F.normalize(tensor, self.mean, self.std), target
+    def __repr__(self):
+        return self.__class__.__name__ + "(mean={0}, std={1})".format(
+            self.mean, self.std
+        )
+class NoTransformation(object):
+    """Do Nothing"""
+    def __call__(self, img, target):
+        return img, target
+class Compose(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, target):
+        for t in self.transforms:
+            img, target = t(img, target)
+        return img, target
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+class ComposeSingle(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img):
+        for t in self.transforms:
+            img = t(img)
+        return img
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+        (h, w), output size will be matched to this. If size is an int,
+        smaller edge of the image will be matched to this number.
+        i.e, if height > width, then image will be rescaled to
+        (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+        ``BILINEAR``
+    """
+    def __init__(self, size, interpolation="BILINEAR"):
+        self.size = size
+        self.interpolation = interpolation
+    def __call__(self, img, target=None):
+        """
+        Args:
+        img (np.ndarray): Image to be scaled.
+        Returns:
+        np.ndarray: Rescaled image.
+        """
+        # Resize bboxes (in pixels)
+        x_scale = 0
+        y_scale = 0
+        if img.shape[1] > 0:
+            x_scale = self.size[0] / img.shape[1]
+        if img.shape[0] > 0:
+            y_scale = self.size[1] / img.shape[0]
+        # loop over bboxes
+        if target is not None:
+            if target["boxes"] is not None:
+                target_ = target.copy()
+                target_["boxes"][:, 0] = x_scale * target_["boxes"][:, 0]
+                target_["boxes"][:, 1] = y_scale * target_["boxes"][:, 1]
+                target_["boxes"][:, 2] = x_scale * target_["boxes"][:, 2]
+                target_["boxes"][:, 3] = y_scale * target_["boxes"][:, 3]
+        return F.resize(img, self.size, self.interpolation), target
+    def __repr__(self):
+        interpolate_str = self.interpolation
+        return self.__class__.__name__ + "(size={0}, interpolation={1})".format(
+            self.size, interpolate_str
+        )

docling_ibm_models/tableformer/models/__init__.py ADDED Viewed

File without changes

docling_ibm_models/tableformer/models/common/__init__.py ADDED Viewed

File without changes

docling_ibm_models/tableformer/models/common/base_model.py ADDED Viewed

@@ -0,0 +1,279 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import glob
+import logging
+import os
+import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+import torch
+import docling_ibm_models.tableformer.settings as s
+LOG_LEVEL = logging.INFO
+# LOG_LEVEL = logging.DEBUG
+class BaseModel(ABC):
+    r"""
+    BaseModel provides some common functionality for all models:
+    - Saves checkpoint files for each epoch
+    - Loads the model from the best available checkpoint
+    - Save repository branch and commit
+    """
+    def __init__(self, config, init_data, device):
+        r"""
+        Inputs:
+            config: The configuration file
+            init_data: Dictionary with initialization data. This dictionary can be used to pass any
+                       kind of initialization data for the models
+            device: The device used to move the tensors of the model
+        """
+        super(BaseModel, self).__init__()
+        # Set config and device
+        self._config = config
+        self._init_data = init_data
+        self._device = device
+        self._save_dir = config["model"]["save_dir"]
+        self._load_checkpoint = None
+        if "load_checkpoint" in config["model"]:
+            self._load_checkpoint = config["model"]["load_checkpoint"]
+        self._branch_name = "dev/next"
+        self._commit_sha = "1"
+        # Keep a dictionary with the starting times per epoch.
+        # NOTICE: Epochs start from 0
+        self._epoch_start_ts = {0: time.time()}
+    def _log(self):
+        # Setup a custom logger
+        return s.get_custom_logger(self.__class__.__name__, LOG_LEVEL)
+    @abstractmethod
+    def predict(self, img, max_steps, beam_size, return_attention):
+        pass
+    def count_parameters(self):
+        r"""Counts the number of trainable parameters of this model
+        Output:
+            num_parameters: number of trainable parameters
+        """
+        num_parameters = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        return num_parameters
+    def get_code_version(self):
+        r"""Gets the source control version of this model code
+        Returns
+        -------
+        branch_name : str
+            The name of the Git branch of this model code
+        commit_sha : str
+            The unique identifier of the Git commit of this model code
+        """
+        return self._branch_name, self._commit_sha
+    def get_save_directory(self):
+        r"""
+        Return the save directory
+        """
+        return self._save_dir
+    def is_saved(self):
+        r"""
+        This method returns True if both conditions are met:
+        1. There is a checkpoint file for the model.
+        2. The checkpoint file corresponds to the last training epoch set in the configuration file.
+        """
+        # Get the saved_model
+        saved_model, _ = self._load_best_checkpoint()
+        if saved_model is None:
+            return False
+        epochs = self._config["train"]["epochs"]
+        self._log().debug(
+            "Best epoch in saved model: {}; Number of epochs in config: {}".format(
+                saved_model["epoch"], epochs
+            )
+        )
+        if epochs == saved_model["epoch"] + 1:
+            return True
+        return False
+    def save(self, epoch=None, optimizers=None, losses=None, model_parameters=None):
+        r"""
+        Save the model data to the disk as a pickle file.
+        Parameters
+        ----------
+        epoch: Training epoch
+        optimizers: Dictionary with the optimizers. The key specifies what the optimizer is
+                     used for. The 'state_dict' of each optimizer will be saved in the
+                     checkpoint file.
+        losses: Dictionary with the losses. The key specifies what the loss is used for. Each
+                value is a list
+        model_parameters: Dictionary with model specific parameters that we need to save in the
+                              checkpoint file.
+        Returns
+        -------
+        True if success, False otherwise
+        """
+        # Get the checkpoint_filename
+        c_filename = self._build_checkpoint_filename(epoch)
+        self._log().debug("Trying to save checkpoint file: {}".format(c_filename))
+        # Prepare a dictionary with all data we want to save
+        optimizers_state_dict = None
+        if optimizers is not None:
+            optimizers_state_dict = {k: v.state_dict() for k, v in optimizers.items()}
+        model_data = {
+            "model_state_dict": self.state_dict(),
+            "epoch": epoch,
+            "optimizers": optimizers_state_dict,
+            "losses": losses,
+            "model_parameters": model_parameters,
+        }
+        # Add the processing time per epoch
+        now = time.time()
+        self._epoch_start_ts[epoch + 1] = now
+        if epoch in self._epoch_start_ts:
+            dt = now - self._epoch_start_ts[epoch]
+            model_data["epoch_start_ts"] = self._epoch_start_ts[epoch]
+            model_data["epoch_dt"] = dt
+        # Create the save directory
+        Path(self._save_dir).mkdir(parents=True, exist_ok=True)
+        # Save the model
+        torch.save(model_data, c_filename)
+        # Return true if file is present, otherwise false
+        if not os.path.isfile(c_filename):
+            self._log().error("Cannot find the file to save: " + c_filename)
+            return False
+        # store code branch name and commit
+        version_file = os.path.join(self._save_dir, "_version")
+        with open(version_file, "w") as text_file:
+            print("Model is using code [commit:branch]", file=text_file)
+            print("{}:{}".format(self._commit_sha, self._branch_name), file=text_file)
+        return True
+    def load(self, optimizers=None):
+        r"""
+        Load the model data from the disk.
+        The method will iterate over all *.check files and try to load the one from the highest
+        epoch.
+        Input:
+            -optimizers: Dictionary with optimizers. If it is not null the keys will be used to
+                         associate the corresponding state_dicts from the checkpoint file and update
+                         the internal states of the provided optimizers.
+        Output:
+            - Success: True/ False
+            - epoch: Loaded epoch or -1 if there are no checkpoint files
+            - optimizers: Dictionary with loaded optimizers or empty dictionary of there is no
+                          checkpoint file
+            - losses: Dictionary with loaded losses or empty dictionary of there is no checkpoint
+                      file
+            - model_parameters: Dictionary with the model parameters or empty dictionary if there
+                                are no checkpoint files
+        """
+        # Get the saved_model
+        saved_model, _ = self._load_best_checkpoint()
+        # Restore the model
+        if saved_model is None:
+            self._log().debug("No saved model checkpoint found")
+            return False, -1, optimizers, {}, {}
+        self._log().debug("Loading model from checkpoint file")
+        self.load_state_dict(saved_model["model_state_dict"])
+        epoch = 0
+        if "epoch" in saved_model:
+            epoch = saved_model["epoch"]
+        losses = {}
+        if "losses" in saved_model:
+            losses = saved_model["losses"]
+        model_parameters = saved_model["model_parameters"]
+        if optimizers is not None:
+            for key, optimizer_state_dict in saved_model["optimizers"].items():
+                optimizers[key].load_state_dict(optimizer_state_dict)
+        # Reset the start_ts of the next epoch
+        self._epoch_start_ts[epoch + 1] = time.time()
+        return True, epoch, optimizers, losses, model_parameters
+    def _load_best_checkpoint(self):
+        r"""
+        If a "load_checkpoint" file has been provided, load this one.
+        Otherwise use the "save_dir" and load the one with the most advanced epoch
+        Returns
+        -------
+        saved_model : dictionary
+            Checkpoint file contents generated by torch.load, or None
+        checkpoint_file : string
+            Filename of the loaded checkpoint, or None
+        """
+        checkpoint_files = []
+        # If a "load_checkpoint" file is provided, try to load it
+        if self._load_checkpoint is not None:
+            if not os.path.exists(self._load_checkpoint):
+                self._log().error(
+                    "Cannot load the checkpoint: {}".format(self._load_checkpoint)
+                )
+                return None, None
+            checkpoint_files.append(self._load_checkpoint)
+        else:
+            # Iterate over all check files from the directory by reverse alphabetical order
+            # This will get the biggest epoch first
+            checkpoint_files = glob.glob(os.path.join(self._save_dir, "*.check"))
+            checkpoint_files.sort(reverse=True)
+        for checkpoint_file in checkpoint_files:
+            try:
+                # Try to load the file
+                self._log().info(
+                    "Loading model checkpoint file: {}".format(checkpoint_file)
+                )
+                saved_model = torch.load(checkpoint_file, map_location=self._device)
+                return saved_model, checkpoint_file
+            except RuntimeError:
+                self._log().error("Cannot load file: {}".format(checkpoint_file))
+        return None, None
+    def _build_checkpoint_filename(self, epoch):
+        r"""
+        Construct the full path for the filename of this checkpoint
+        """
+        dataset_name = self._config["dataset"]["name"]
+        model_type = self._config["model"]["type"]
+        model_name = self._config["model"]["name"]
+        filename = "{}_{}_{}_{:03}.check".format(
+            model_type, model_name, dataset_name, epoch
+        )
+        c_filename = os.path.join(self._save_dir, filename)
+        return c_filename

docling_ibm_models/tableformer/models/table04_rs/__init__.py ADDED Viewed

File without changes