PyPI - docling-ibm-models - Versions diffs - 0.1.0__py3-none-any.whl - Mend

docling-ibm-models 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

docling_ibm_models/tableformer/test_prepare_image.py ADDED Viewed

@@ -0,0 +1,99 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import glob
+import os
+import numpy as np
+from PIL import Image
+import docling_ibm_models.tableformer.common as c
+from docling_ibm_models.tableformer.data_management.data_transformer import (
+    DataTransformer,
+)
+def dump_np(img_np: np.array, fn, n=6):
+    # Expect to receive a numpy array for an image with the shape [channels, rows, columns]
+    s = img_np.shape
+    if s[0] not in [1, 2, 3, 4] or len(s) != 3:
+        print("Image of invalid shape: {}".format(s))
+        return
+    channels = s[0]
+    rows = s[1]
+    cols = s[2]
+    w = n + 6
+    with open(fn, "w") as fd:
+        for r in range(rows):
+            for col in range(cols):
+                for ch in range(channels):
+                    x = img_np[ch][r][col]
+                    if isinstance(x, np.float32):
+                        f_str = "0:>{}.{}f".format(w, n)
+                    elif isinstance(x, np.uint8):
+                        f_str = "0:>{}".format(w)
+                    else:
+                        return False
+                    x_str = ("{" + f_str + "}").format(x)
+                    fd.write(x_str)
+                    if ch < channels - 1:
+                        fd.write(" ")
+                fd.write("\n")
+    return True
+def dump_channels(save_dir, fn_prefix, img_np: np.array):
+    # Dump the np array into 3 files per channel
+    img_np_ch0 = img_np[0, :, :]
+    img_np_ch1 = img_np[1, :, :]
+    img_np_ch2 = img_np[2, :, :]
+    txt_ch0_fn = os.path.join(save_dir, fn_prefix + "_ch0.txt")
+    txt_ch1_fn = os.path.join(save_dir, fn_prefix + "_ch1.txt")
+    txt_ch2_fn = os.path.join(save_dir, fn_prefix + "_ch2.txt")
+    np.savetxt(txt_ch0_fn, img_np_ch0)
+    np.savetxt(txt_ch1_fn, img_np_ch1)
+    np.savetxt(txt_ch2_fn, img_np_ch2)
+    print(f"{txt_ch0_fn}")
+    print(f"{txt_ch1_fn}")
+    print(f"{txt_ch2_fn}")
+def prepare_image(config):
+    transformer = DataTransformer(config)
+    predict_dir = config["predict"]["predict_dir"]
+    use_normalization = config["dataset"]["image_normalization"]["state"]
+    pattern = os.path.join(predict_dir, "*.png")
+    for img_fn in glob.glob(pattern):
+        print(f"img_fn: {img_fn}")
+        with Image.open(img_fn) as img:
+            # Dump the initial image in txt files
+            img_np = np.array(img)
+            # Reshape the image in order to print it
+            img_np_m = np.moveaxis(img_np, 2, 0)
+            print(
+                "orig. img_np.shape: {}, reshaped image: {}".format(
+                    img_np.shape, img_np_m.shape
+                )
+            )
+            original_fn = img_fn + "_python.txt"
+            dump_np(img_np_m, original_fn)
+            r_img_ten = transformer.rescale_in_memory(img, use_normalization)
+            print("npimgc: {} - {}".format(r_img_ten.type(), r_img_ten.size()))
+            # Dump the processed image tensor in txt files
+            r_img_np = r_img_ten.numpy()
+            prepared_fn = img_fn + "_python_prepared.txt"
+            dump_np(r_img_np, prepared_fn)
+if __name__ == "__main__":
+    config = c.parse_arguments()
+    prepare_image(config)

docling_ibm_models/tableformer/utils/__init__.py ADDED Viewed

File without changes

docling_ibm_models/tableformer/utils/app_profiler.py ADDED Viewed

@@ -0,0 +1,243 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import time
+from collections import deque
+from statistics import mean, median
+class SingletonClass(type):
+    r"""
+    Generic singleton metaclass
+    """
+    def __init__(self, name, bases, dic):
+        self._instance = None
+        super().__init__(name, bases, dic)
+    def __call__(cls, *args, **kwargs):
+        # Create a singleton if needed
+        if cls._instance is None:
+            singleton = cls.__new__(cls)
+            singleton.__init__(*args, **kwargs)
+            cls._instance = singleton
+        return cls._instance
+class Profiler:
+    r"""
+    Application specific profiler
+    Decompose the application into "sections". Each section is a label.
+    The total time a section consumes is split into "intervals"
+    Use the `begin`, `end` methods to mark the begining and end of an interval for
+    a certain section
+    """
+    def __init__(self):
+        self._section_dts = {}  # section name -> sum(section intervals)
+        self._section_calls = {}  # section name -> number of invocations
+        self._section_kB = {}  # section name -> max kB of used heap
+        # section name -> beginning of the last interval
+        self._last_begin = {}
+    def begin(self, section_name, enable=True):
+        r"""
+        Mark the beginning of an interval
+        Parameters
+        ----------
+        section_name : string
+            Name of the section
+        enable : bool
+            The actual interval entry takes place only if enable is true
+        Return
+        ------
+            True if the interval has actuall begun
+        """
+        if not enable:
+            return False
+        self._last_begin[section_name] = time.time()
+        return True
+    def end(self, section_name, enable=True):
+        r"""
+        Mark the end of an interval for a certain section
+        Parameters
+        ----------
+        section_name : string
+            Name of the section
+        enable : bool
+            The actual interval entry takes place only if enable is true
+        Return
+        ------
+            True if the section name is valid and an interval for this section has already begun
+            False otherwise
+        """
+        if not enable:
+            return False
+        if section_name not in self._last_begin:
+            return False
+        dt = time.time() - self._last_begin[section_name]
+        if section_name not in self._section_dts:
+            self._section_dts[section_name] = dt
+            self._section_calls[section_name] = 1
+        else:
+            self._section_dts[section_name] += dt
+            self._section_calls[section_name] += 1
+        return True
+    def get_data(self, section_names=None):
+        r"""
+        Return a dict with profiling data for the specified sections.
+        Parameter
+        ---------
+        section_names : list of string
+            List with the section names to get their accumulative dt
+            If it is None, all sections are returned
+        Return
+        ------
+        dict of dicts
+            Outer key: section name
+            Inner keys: "dt": Accumulative time for that section, "cells": Number of calls
+        """
+        # Filter the section names to apply
+        filtered_names = list(
+            filter(lambda x: x in section_names, self._section_dts.keys())
+            if section_names is not None
+            else self._section_dts.keys()
+        )
+        data = {}
+        for section_name in filtered_names:
+            data[section_name] = {
+                "dt": self._section_dts[section_name],
+                "calls": self._section_calls[section_name],
+                "kB": self._section_kB[section_name],
+            }
+        return data
+class AppProfiler(Profiler, metaclass=SingletonClass):
+    r"""
+    AppProfiler is a singleton of the Profiler for application wide usage
+    """
+    def __init__(self):
+        super(AppProfiler, self).__init__()
+class AggProfiler(metaclass=SingletonClass):
+    r"""
+    Generic wrapper of Profiler that enables aggregation of profiling statistics around Cycles
+    - When a new cycle begins a new Profiler is created to keep the profiling data per section
+    - Keep the last n cycles in a sliding window manner
+    - At every time we can get profiling data about the last cycle and statistics over the last n
+      cycles
+    """
+    def __init__(self, window_size=20):
+        self._window_size = window_size
+        # deque with up to the last "window_size" Profilers. The newest at index 0
+        self._cycles = deque()
+    def start_agg(self, enable=True):
+        r"""
+        Returns
+        -------
+        0: not enabled
+        1: a new scope has started
+        """
+        if not enable:
+            return 0
+        # Add a new profiler
+        self._cycles.appendleft(Profiler())
+        # In case the deque has grown too much, remove the oldest Profiler
+        if len(self._cycles) > self._window_size:
+            self._cycles.pop()
+        return 1
+    def begin(self, section_name, enable=True):
+        if not enable:
+            return False
+        if len(self._cycles) == 0:
+            print("AggProfiler begin | Start Aggregator not initialized.")
+            return False
+        profiler = self._cycles[0]
+        return profiler.begin(section_name)
+    def end(self, section_name, enable=True):
+        if not enable:
+            return False
+        if len(self._cycles) == 0:
+            print("AggProfiler end | Start Aggregator not initialized.")
+            return False
+        profiler = self._cycles[0]
+        return profiler.end(section_name)
+    def get_data(self):
+        r"""
+        Get profiling data for:
+        - The last cycle
+        - Aggragated statistics (avg, median) per section and per metric across all cycles
+        - The dt numbers for the mean/median is the average time for each section ACROSS the cycle
+        - There is NO need to compute average by yourself.
+        Returns
+        -------
+        dict with the structure:
+        - window: int with the size of the time sliding window
+        - last: dict with the metrics for the last cycle (as provided by the Profiler)
+        - mean: dict with the mean metrics per section across the cycle
+            - section_name
+                - metric_name: mean of the metric values
+        - median: dict with the median metrics per section across the cycle
+            - section_name
+                - metric_name: median of the metric values
+        """
+        last_data = self._cycles[0].get_data()
+        data = {
+            "window": len(self._cycles),
+            "last": last_data,
+            "mean": {},
+            "median": {},
+        }
+        # Section -> metric -> [values]
+        section_metric_values = {}
+        # Collect the metrics
+        for i, p in enumerate(self._cycles):
+            p_data = p.get_data()
+            for section_name, m_dict in p_data.items():
+                for m_name, m_val in m_dict.items():
+                    if section_name not in section_metric_values:
+                        section_metric_values[section_name] = {}
+                    s_metrics = section_metric_values[section_name]
+                    if m_name not in s_metrics:
+                        s_metrics[m_name] = []
+                    s_metrics[m_name].append(m_val)
+        # Aggregate the metrics
+        for section_name, m_dict in section_metric_values.items():
+            for m_name, m_values in m_dict.items():
+                if section_name not in data["mean"]:
+                    data["mean"][section_name] = {}
+                if section_name not in data["median"]:
+                    data["median"][section_name] = {}
+                mean_v = mean(m_values)
+                median_v = median(m_values)
+                data["mean"][section_name][m_name] = mean_v
+                data["median"][section_name][m_name] = median_v
+        return data

docling_ibm_models/tableformer/utils/torch_utils.py ADDED Viewed

@@ -0,0 +1,216 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import torch
+def model_info(model, verbose=False):
+    # Plots a line-by-line description of a PyTorch model
+    n_p = sum(x.numel() for x in model.parameters())  # number parameters
+    n_g = sum(
+        x.numel() for x in model.parameters() if x.requires_grad
+    )  # number gradients
+    if verbose:
+        print(
+            "%5s %40s %9s %12s %20s %10s %10s"
+            % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma")
+        )
+        for i, (name, p) in enumerate(model.named_parameters()):
+            name = name.replace("module_list.", "")
+            print(
+                "%5g %40s %9s %12g %20s %10.3g %10.3g"
+                % (
+                    i,
+                    name,
+                    p.requires_grad,
+                    p.numel(),
+                    list(p.shape),
+                    p.mean(),
+                    p.std(),
+                )
+            )
+    try:  # FLOPS
+        from thop import profile
+        macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
+        fs = ", %.1f GFLOPS" % (macs / 1e9 * 2)
+    except Exception:
+        fs = ""
+    print(
+        "Model Summary: %g layers, %g parameters, %g gradients%s"
+        % (len(list(model.parameters())), n_p, n_g, fs)
+    )
+# def init_seeds(seed=0):
+#     torch.manual_seed(seed)
+#
+#     # Reduce randomness (may be slower on Tesla GPUs)
+#     # https://pytorch.org/docs/stable/notes/randomness.html
+#     if seed == 0:
+#         cudnn.deterministic = False
+#         cudnn.benchmark = True
+#
+#
+# def select_device(device='', apex=False, batch_size=None):
+#     # device = 'cpu' or '0' or '0,1,2,3'
+#     cpu_request = device.lower() == 'cpu'
+#     if device and not cpu_request:  # if device requested other than 'cpu'
+#         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
+#         # check availablity
+#         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device
+#
+#     cuda = False if cpu_request else torch.cuda.is_available()
+#     if cuda:
+#         c = 1024 ** 2  # bytes to MB
+#         ng = torch.cuda.device_count()
+#         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
+#             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % \
+#                 (batch_size, ng)
+#         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
+#         # apex for mixed precision https://github.com/NVIDIA/apex
+#         s = 'Using CUDA ' + ('Apex ' if apex else '')
+#         for i in range(0, ng):
+#             if i == 1:
+#                 s = ' ' * len(s)
+#             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
+#                   (s, i, x[i].name, x[i].total_memory / c))
+#     else:
+#         print('Using CPU')
+#
+#     print('')  # skip a line
+#     return torch.device('cuda:0' if cuda else 'cpu')
+#
+#
+# def time_synchronized():
+#     torch.cuda.synchronize() if torch.cuda.is_available() else None
+#     return time.time()
+#
+#
+# def initialize_weights(model):
+#     for m in model.modules():
+#         t = type(m)
+#         if t is nn.Conv2d:
+#             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+#         elif t is nn.BatchNorm2d:
+#             m.eps = 1e-4
+#             m.momentum = 0.03
+#         elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+#             m.inplace = True
+#
+#
+# def find_modules(model, mclass=nn.Conv2d):
+#     # finds layer indices matching module class 'mclass'
+#     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
+#
+#
+# def fuse_conv_and_bn(conv, bn):
+#     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+#     with torch.no_grad():
+#         # init
+#         fusedconv = torch.nn.Conv2d(conv.in_channels,
+#                                     conv.out_channels,
+#                                     kernel_size=conv.kernel_size,
+#                                     stride=conv.stride,
+#                                     padding=conv.padding,
+#                                     bias=True)
+#
+#         # prepare filters
+#         w_conv = conv.weight.clone().view(conv.out_channels, -1)
+#         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+#         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
+#
+#         # prepare spatial bias
+#         if conv.bias is not None:
+#             b_conv = conv.bias
+#         else:
+#             b_conv = torch.zeros(conv.weight.size(0))
+#         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+#         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+#
+#         return fusedconv
+#
+#
+# def load_classifier(name='resnet101', n=2):
+#     # Loads a pretrained model reshaped to n-class output
+#     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
+#     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
+#
+#     # Display model properties
+#     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean',
+#               'model.std']:
+#         print(x + ' =', eval(x))
+#
+#     # Reshape output to n classes
+#     filters = model.last_linear.weight.shape[1]
+#     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
+#     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
+#     model.last_linear.out_features = n
+#     return model
+#
+#
+# def scale_img(img, ratio=1.0, same_shape=True):  # img(16,3,256,416), r=ratio
+#     # scales img(bs,3,y,x) by ratio
+#     h, w = img.shape[2:]
+#     s = (int(h * ratio), int(w * ratio))  # new size
+#     img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
+#     if not same_shape:  # pad/crop img
+#         gs = 64  # (pixels) grid size
+#         h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
+#     return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
+#
+#
+# class ModelEMA:
+#     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+#     Keep a moving average of everything in the model state_dict (parameters and buffers).
+#     This is intended to allow functionality like
+#     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+#     A smoothed version of the weights is necessary for some training schemes to perform well.
+#     E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
+#     RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
+#     smoothing of weights to match results. Pay attention to the decay constant you are using
+#     relative to your update count per epoch.
+#     To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
+#     disable validation of the EMA weights. Validation will have to be done manually in a separate
+#     process, or after the training stops converging.
+#     This class is sensitive where it is initialized in the sequence of model init,
+#     GPU assignment and distributed training wrappers.
+#     I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and
+#     single-GPU.
+#     """
+#
+#     def __init__(self, model, decay=0.9999, device=''):
+#         # make a copy of the model for accumulating moving average of weights
+#         self.ema = deepcopy(model)
+#         self.ema.eval()
+#         self.updates = 0  # number of EMA updates
+#         # decay exponential ramp (to help early epochs)
+#         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
+#         self.device = device  # perform ema on different device from model if set
+#         if device:
+#             self.ema.to(device=device)
+#         for p in self.ema.parameters():
+#             p.requires_grad_(False)
+#
+#     def update(self, model):
+#         self.updates += 1
+#         d = self.decay(self.updates)
+#         with torch.no_grad():
+#             if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
+#                 msd, esd = model.module.state_dict(), self.ema.module.state_dict()
+#             else:
+#                 msd, esd = model.state_dict(), self.ema.state_dict()
+#
+#             for k, v in esd.items():
+#                 if v.dtype.is_floating_point:
+#                     v *= d
+#                     v += (1. - d) * msd[k].detach()
+#
+#     def update_attr(self, model):
+#         # Assign attributes (which may change during training)
+#         for k in model.__dict__.keys():
+#             if not k.startswith('_'):
+#                 setattr(self.ema, k, getattr(model, k))