PyPI - docling-ibm-models - Versions diffs - 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl - Mend

docling-ibm-models 1.3.1py3-none-any.whl → 1.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

docling_ibm_models/tableformer/data_management/tf_predictor.py CHANGED Viewed

@@ -12,7 +12,6 @@ import numpy as np
 import torch
 import docling_ibm_models.tableformer.common as c
-import docling_ibm_models.tableformer.data_management.functional as F
 import docling_ibm_models.tableformer.data_management.transforms as T
 import docling_ibm_models.tableformer.settings as s
 import docling_ibm_models.tableformer.utils.utils as u
@@ -21,6 +20,9 @@ from docling_ibm_models.tableformer.data_management.matching_post_processor impo
 )
 from docling_ibm_models.tableformer.data_management.tf_cell_matcher import CellMatcher
 from docling_ibm_models.tableformer.models.common.base_model import BaseModel
+from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (
+    TableModel04_rs,
+)
 from docling_ibm_models.tableformer.otsl import otsl_to_html
 from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
@@ -187,16 +189,7 @@ class TFPredictor:
         """
         self._model_type = self._config["model"]["type"]
-        # Added import here to avoid loading turbotransformer library unnecessarily
-        if self._model_type == "TableModel04_rs":
-            from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa
-                TableModel04_rs,
-            )
-        for candidate in BaseModel.__subclasses__():
-            if candidate.__name__ == self._model_type:
-                model = candidate(
-                    self._config, self._init_data, s.PREDICT_PURPOSE, self._device
-                )
+        model = TableModel04_rs(self._config, self._init_data, self._device)
         if model is None:
             err_msg = "Not able to initiate a model for {}".format(self._model_type)
@@ -376,66 +369,6 @@ class TFPredictor:
         return new_bboxes
-    def _pad_image(self, iocr_page):
-        r"""
-        Adds padding to the image
-        Parameters
-        ----------
-        iocr_page : dict
-            Docling provided table data
-        Returns
-        -------
-        new_im: PIL image
-            new, padded image
-        new_image_ratio : float
-            Ratio of padded image size to the original image size
-        """
-        _, old_iw, old_ih = iocr_page["image"].shape
-        margin_i = self._padding_size  # pixels
-        desired_iw = old_iw + (margin_i * 2)
-        desired_ih = old_ih + (margin_i * 2)
-        # Ratio of new image size to the original image size
-        new_image_ratio = desired_iw / old_iw
-        bcolor = (255, 255, 255)
-        # Create empty canvas of background color and desired size
-        padded_image = F.pad(
-            iocr_page["image"],
-            (desired_iw, desired_ih, desired_iw, desired_ih),
-            fill=bcolor,
-        )
-        return padded_image, new_image_ratio
-    def _pre_process_image(self, iocr_page):
-        r"""
-        Pre-process table image in memory, before doing prediction
-        Currently just removes from the image separate PDF cells that only contain "$" sign
-        This is done to remove model confusion when dealing with financial reports
-        Parameters
-        ----------
-        iocr_page : dict
-            Docling provided table data
-        Returns
-        -------
-        iocr_page["image"] : PIL image
-            updated table image with "$" repainted
-        new_image_ratio : float
-            Ratio of padded image size to the original image size
-        """
-        new_image_ratio = 1.0
-        ic, iw, ih = iocr_page["image"].shape
-        return iocr_page["image"], new_image_ratio
     def _merge_tf_output(self, docling_output, pdf_cells):
         tf_output = []
         tf_cells_map = {}
@@ -519,6 +452,7 @@ class TFPredictor:
             sf = r
             dim = (width, int(h * r))
         # resize the image
+        # TODO(Nikos): Try to remove cv2 dependency
         resized = cv2.resize(image, dim, interpolation=inter)
         # return the resized image
         return resized, sf

docling_ibm_models/tableformer/data_management/transforms.py CHANGED Viewed

@@ -13,248 +13,6 @@ import torch
 from docling_ibm_models.tableformer.data_management import functional as F
-def box_cxcywh_to_xyxy(x):
-    x_c, y_c, w, h = x.unbind(-1)
-    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
-    return torch.stack(b, dim=-1)
-def box_xyxy_to_cxcywh(x):
-    x0, y0, x1, y1 = x.unbind(-1)
-    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
-    return torch.stack(b, dim=-1)
-class Lambda(object):
-    """Apply a user-defined lambda as a transform.
-    Attention: The multiprocessing used in dataloader of pytorch
-    is not friendly with lambda function in Windows
-    Args:
-        lambd (function): Lambda/function to be used for transform.
-    """
-    def __init__(self, lambd):
-        # assert isinstance(lambd, types.LambdaType)
-        self.lambd = lambd
-        # if 'Windows' in platform.system():
-        #     raise RuntimeError("Can't pickle lambda funciton in windows system")
-    def __call__(self, img):
-        return self.lambd(img)
-    def __repr__(self):
-        return self.__class__.__name__ + "()"
-class RandomTransforms(object):
-    """Base class for a list of transformations with randomness
-    Args:
-        transforms (list or tuple): list of transformations
-    """
-    def __init__(self, transforms):
-        assert isinstance(transforms, (list, tuple))
-        self.transforms = transforms
-    def __call__(self, *args, **kwargs):
-        raise NotImplementedError()
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        for t in self.transforms:
-            format_string += "\n"
-            format_string += "    {0}".format(t)
-        format_string += "\n)"
-        return format_string
-class RandomChoice(RandomTransforms):
-    """Apply single transformation randomly picked from a list"""
-    def __call__(self, img, target):
-        t = random.choice(self.transforms)
-        return t(img, target)
-class RandomCrop(object):
-    def __init__(self, size, margin_crop):
-        self.size = list(size)
-        self.margin_crop = list(margin_crop)
-        # margin_crop: w, h
-    def __call__(self, img, target):
-        # img (w,h,ch)
-        image_height, image_width = img.shape[0], img.shape[1]
-        """
-        img (np.ndarray): Image to be cropped.
-        x: Upper pixel coordinate.
-        y: Left pixel coordinate.
-        h: Height of the cropped image.
-        w: Width of the cropped image.
-        """
-        if image_width > 0 and image_height > 0:
-            cropped_image = F.crop(
-                img,
-                self.margin_crop[1],
-                self.margin_crop[0],
-                image_height - (self.margin_crop[1] * 2),
-                image_width - (self.margin_crop[0] * 2),
-            )
-            target_ = target.copy()
-            target_["boxes"][:, 0] = target_["boxes"][:, 0] - self.margin_crop[0]
-            target_["boxes"][:, 1] = target_["boxes"][:, 1] - self.margin_crop[1]
-            target_["boxes"][:, 2] = target_["boxes"][:, 2] - self.margin_crop[0]
-            target_["boxes"][:, 3] = target_["boxes"][:, 3] - self.margin_crop[1]
-        else:
-            cropped_image = img
-        return cropped_image, target_
-class RandomPad(object):
-    def __init__(self, max_pad):
-        self.max_pad = max_pad
-    def __call__(self, img, target):
-        pad_x = random.randint(0, self.max_pad)
-        pad_y = random.randint(0, self.max_pad)
-        pad_x1 = random.randint(0, self.max_pad)
-        pad_y1 = random.randint(0, self.max_pad)
-        img = img.copy()
-        padded_image = F.pad(img, (pad_x, pad_y, pad_x1, pad_y1), fill=(255, 255, 255))
-        target_ = target.copy()
-        if target["boxes"] is not None:
-            target_["boxes"][:, 0] = target_["boxes"][:, 0] + pad_x
-            target_["boxes"][:, 1] = target_["boxes"][:, 1] + pad_y
-            target_["boxes"][:, 2] = target_["boxes"][:, 2] + pad_x
-            target_["boxes"][:, 3] = target_["boxes"][:, 3] + pad_y
-        return padded_image, target_
-class ColorJitter(object):
-    """Randomly change the brightness, contrast and saturation of an image.
-    Args:
-        brightness (float): How much to jitter brightness. brightness_factor
-            is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
-        contrast (float): How much to jitter contrast. contrast_factor
-            is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
-        saturation (float): How much to jitter saturation. saturation_factor
-            is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
-        hue(float): How much to jitter hue. hue_factor is chosen uniformly from
-            [-hue, hue]. Should be >=0 and <= 0.5.
-    """
-    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
-        assert isinstance(brightness, float) or (
-            isinstance(brightness, collections.Iterable) and len(brightness) == 2
-        )
-        assert isinstance(contrast, float) or (
-            isinstance(contrast, collections.Iterable) and len(contrast) == 2
-        )
-        assert isinstance(saturation, float) or (
-            isinstance(saturation, collections.Iterable) and len(saturation) == 2
-        )
-        assert isinstance(hue, float) or (
-            isinstance(hue, collections.Iterable) and len(hue) == 2
-        )
-        self.brightness = brightness
-        self.contrast = contrast
-        self.saturation = saturation
-        self.hue = hue
-    @staticmethod
-    def get_params(brightness, contrast, saturation, hue):
-        """Get a randomized transform to be applied on image.
-        Arguments are same as that of __init__.
-        Returns:
-            Transform which randomly adjusts brightness, contrast and
-            saturation in a random order.
-        """
-        transforms = []
-        if isinstance(brightness, numbers.Number):
-            if brightness > 0:
-                brightness_factor = random.uniform(
-                    max(0, 1 - brightness), 1 + brightness
-                )
-                transforms.append(
-                    Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
-                )
-            if contrast > 0:
-                contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
-                transforms.append(
-                    Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
-                )
-            if saturation > 0:
-                saturation_factor = random.uniform(
-                    max(0, 1 - saturation), 1 + saturation
-                )
-                transforms.append(
-                    Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
-                )
-            if hue > 0:
-                hue_factor = random.uniform(-hue, hue)
-                transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
-        else:
-            if brightness[0] > 0 and brightness[1] > 0:
-                brightness_factor = random.uniform(brightness[0], brightness[1])
-                transforms.append(
-                    Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
-                )
-            if contrast[0] > 0 and contrast[1] > 0:
-                contrast_factor = random.uniform(contrast[0], contrast[1])
-                transforms.append(
-                    Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
-                )
-            if saturation[0] > 0 and saturation[1] > 0:
-                saturation_factor = random.uniform(saturation[0], saturation[1])
-                transforms.append(
-                    Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
-                )
-            if hue[0] > 0 and hue[1] > 0:
-                hue_factor = random.uniform(hue[0], hue[1])
-                transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
-        random.shuffle(transforms)
-        transform = ComposeSingle(transforms)
-        return transform
-    def __call__(self, img, target):
-        """
-        Args:
-            img (np.ndarray): Input image.
-        Returns:
-            np.ndarray: Color jittered image.
-        """
-        transform = self.get_params(
-            self.brightness, self.contrast, self.saturation, self.hue
-        )
-        return transform(img), target
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        format_string += "brightness={0}".format(self.brightness)
-        format_string += ", contrast={0}".format(self.contrast)
-        format_string += ", saturation={0}".format(self.saturation)
-        format_string += ", hue={0})".format(self.hue)
-        return format_string
 class Normalize(object):
     """Normalize a tensor image with mean and standard deviation.
     Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
@@ -284,69 +42,6 @@ class Normalize(object):
         )
-class NoTransformation(object):
-    """Do Nothing"""
-    def __call__(self, img, target):
-        return img, target
-class Compose(object):
-    """Composes several transforms together.
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.ToTensor(),
-        >>> ])
-    """
-    def __init__(self, transforms):
-        self.transforms = transforms
-    def __call__(self, img, target):
-        for t in self.transforms:
-            img, target = t(img, target)
-        return img, target
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        for t in self.transforms:
-            format_string += "\n"
-            format_string += "    {0}".format(t)
-        format_string += "\n)"
-        return format_string
-class ComposeSingle(object):
-    """Composes several transforms together.
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.ToTensor(),
-        >>> ])
-    """
-    def __init__(self, transforms):
-        self.transforms = transforms
-    def __call__(self, img):
-        for t in self.transforms:
-            img = t(img)
-        return img
-    def __repr__(self):
-        format_string = self.__class__.__name__ + "("
-        for t in self.transforms:
-            format_string += "\n"
-            format_string += "    {0}".format(t)
-        format_string += "\n)"
-        return format_string
 class Resize(object):
     """Resize the input PIL Image to the given size.
     Args:

docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py CHANGED Viewed

@@ -26,7 +26,7 @@ class TableModel04_rs(BaseModel, nn.Module):
     TableNet04Model encoder, dual-decoder model with OTSL+ support
     """
-    def __init__(self, config, init_data, purpose, device):
+    def __init__(self, config, init_data, device):
         super(TableModel04_rs, self).__init__(config, init_data, device)
         self._prof = config["predict"].get("profiling", False)

{docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.3.1
+Version: 1.3.2
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former

{docling_ibm_models-1.3.1.dist-info → docling_ibm_models-1.3.2.dist-info}/RECORD RENAMED Viewed

@@ -1,32 +1,28 @@
 docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
 docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO9DhOcbvPS5k,6023
+docling_ibm_models/tableformer/common.py,sha256=VLBQ_9JWl4EsmBMSftyooIXId8FN4iTVqTIho4eNZrg,3041
 docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
-docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
+docling_ibm_models/tableformer/data_management/functional.py,sha256=kJntHEXFz2SP7obEcHyjAqZNZC9qh-U75MwUJALLADI,3143
 docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
 docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=GaBW5px3xX9JaHVASZArKiQ-qfrzX0oj-E_6P3-OvuU,21238
-docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
-docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=32rox4--vqFddCG6oJ1_RQpIoc8nmq4ADvPpgphVR60,40959
-docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
+docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=J_AjIGnpT0SkSV12comBlDa8Ga86WnsyJvKkIok4ohs,38834
+docling_ibm_models/tableformer/data_management/transforms.py,sha256=NNaz_7GI7FCVmu_rJuenqH5VfzRSljJHUHpNQQ8Mq3Q,2983
 docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
 docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
 docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
-docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=FtmWZNOKjQFLG5GtBCvvU23rWrIsDu3gqfcfl68soPg,12275
+docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
 docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
 docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
 docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
-docling_ibm_models/tableformer/test_dataset_cache.py,sha256=zvVJvUnYz4GxAQfPUmLTHUbqj0Yhi2vwgOBnsRgt1rI,818
-docling_ibm_models/tableformer/test_prepare_image.py,sha256=oPmU93-yWIkCeUYulGQ1p676Vq-zcjw2EX24WA5lspA,3155
 docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
 docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
 docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
 docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
-docling_ibm_models-1.3.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling_ibm_models-1.3.1.dist-info/METADATA,sha256=W7euvW9ItpwNBZbYXelzFY_O1sseHNe0HO4sJdO3Hbo,7088
-docling_ibm_models-1.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling_ibm_models-1.3.1.dist-info/RECORD,,
+docling_ibm_models-1.3.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling_ibm_models-1.3.2.dist-info/METADATA,sha256=JoQmgI44L9riX-SDDwu0w9rWu9l4hsuKoaGok5cnoHE,7088
+docling_ibm_models-1.3.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling_ibm_models-1.3.2.dist-info/RECORD,,

docling-ibm-models 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl

docling-ibm-models 1.3.1py3-none-any.whl → 1.3.2py3-none-any.whl