PyPI - docling-ibm-models - Versions diffs - 1.1.7__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

docling-ibm-models 1.1.7py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

docling_ibm_models/layoutmodel/layout_predictor.py CHANGED Viewed

@@ -14,29 +14,6 @@ MODEL_CHECKPOINT_FN = "model.pt"
 DEFAULT_NUM_THREADS = 4
-# Classes:
-CLASSES_MAP = {
-    0: "background",
-    1: "Caption",
-    2: "Footnote",
-    3: "Formula",
-    4: "List-item",
-    5: "Page-footer",
-    6: "Page-header",
-    7: "Picture",
-    8: "Section-header",
-    9: "Table",
-    10: "Text",
-    11: "Title",
-    12: "Document Index",
-    13: "Code",
-    14: "Checkbox-Selected",
-    15: "Checkbox-Unselected",
-    16: "Form",
-    17: "Key-Value Region",
-}
 class LayoutPredictor:
     r"""
     Document layout prediction using ONNX
@@ -69,6 +46,31 @@ class LayoutPredictor:
         ------
         FileNotFoundError when the model's ONNX file is missing
         """
+        # Initialize classes map:
+        self._classes_map = {
+            0: "background",
+            1: "Caption",
+            2: "Footnote",
+            3: "Formula",
+            4: "List-item",
+            5: "Page-footer",
+            6: "Page-header",
+            7: "Picture",
+            8: "Section-header",
+            9: "Table",
+            10: "Text",
+            11: "Title",
+            12: "Document Index",
+            13: "Code",
+            14: "Checkbox-Selected",
+            15: "Checkbox-Unselected",
+            16: "Form",
+            17: "Key-Value Region",
+        }
+        # Blacklisted classes
+        self._black_classes = set(["Form", "Key-Value Region"])
         # Set basic params
         self._threshold = 0.6  # Score threshold
         self._image_size = 640
@@ -159,13 +161,19 @@ class LayoutPredictor:
         )
         # Yield output
-        for label, box, score in zip(labels[0], boxes[0], scores[0]):
+        for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
+            # Filter out blacklisted classes
+            label = self._classes_map[label_idx]
+            if label in self._black_classes:
+                continue
+            # Check against threshold
             if score > self._threshold:
                 yield {
                     "l": box[0] / self._image_size * w,
                     "t": box[1] / self._image_size * h,
                     "r": box[2] / self._image_size * w,
                     "b": box[3] / self._image_size * h,
-                    "label": CLASSES_MAP[label],
+                    "label": label,
                     "confidence": score,
                 }

docling_ibm_models/tableformer/data_management/tf_cell_matcher.py CHANGED Viewed

@@ -129,12 +129,15 @@ class CellMatcher:
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
         if len(pdf_cells) > 0:
             for word in pdf_cells:
-                word["bbox"] = [
-                    word["bbox"]["l"],
-                    word["bbox"]["t"],
-                    word["bbox"]["r"],
-                    word["bbox"]["b"],
-                ]
+                if isinstance(word["bbox"], list):
+                    continue
+                elif isinstance(word["bbox"], dict):
+                    word["bbox"] = [
+                        word["bbox"]["l"],
+                        word["bbox"]["t"],
+                        word["bbox"]["r"],
+                        word["bbox"]["b"],
+                    ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]
         # BBOXES transformed...

docling_ibm_models/tableformer/data_management/tf_predictor.py CHANGED Viewed

@@ -524,7 +524,12 @@ class TFPredictor:
         return resized, sf
     def multi_table_predict(
-        self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
+        self,
+        iocr_page,
+        table_bboxes,
+        do_matching=True,
+        correct_overlapping_cells=False,
+        sort_row_col_indexes=True,
     ):
         multi_tf_output = []
         page_image = iocr_page["image"]
@@ -563,56 +568,70 @@ class TFPredictor:
             # PROCESS PREDICTED RESULTS, TO TURN PREDICTED COL/ROW IDs into Indexes
             # Indexes should be in increasing order, without gaps
-            # Fix col/row indexes
-            # Arranges all col/row indexes sequentially without gaps using input IDs
-            indexing_start_cols = []  # Index of original start col IDs (not indexes)
-            indexing_end_cols = []  # Index of original end col IDs (not indexes)
-            indexing_start_rows = []  # Index of original start row IDs (not indexes)
-            indexing_end_rows = []  # Index of original end row IDs (not indexes)
-            # First, collect all possible predicted IDs, to be used as indexes
-            # ID's returned by Tableformer are sequential, but might contain gaps
-            for tf_response_cell in tf_responses:
-                start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
-                end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
-                start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
-                end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
-                # Collect all possible col/row IDs:
-                if start_col_offset_idx not in indexing_start_cols:
-                    indexing_start_cols.append(start_col_offset_idx)
-                if end_col_offset_idx not in indexing_end_cols:
-                    indexing_end_cols.append(end_col_offset_idx)
-                if start_row_offset_idx not in indexing_start_rows:
-                    indexing_start_rows.append(start_row_offset_idx)
-                if end_row_offset_idx not in indexing_end_rows:
-                    indexing_end_rows.append(end_row_offset_idx)
-            indexing_start_cols.sort()
-            indexing_end_cols.sort()
-            indexing_start_rows.sort()
-            indexing_end_rows.sort()
-            # After this - put actual indexes of IDs back into predicted structure...
-            for tf_response_cell in tf_responses:
-                tf_response_cell["start_col_offset_idx"] = indexing_start_cols.index(
-                    tf_response_cell["start_col_offset_idx"]
-                )
-                tf_response_cell["end_col_offset_idx"] = (
-                    tf_response_cell["start_col_offset_idx"]
-                    + tf_response_cell["col_span"]
-                )
-                tf_response_cell["start_row_offset_idx"] = indexing_start_rows.index(
-                    tf_response_cell["start_row_offset_idx"]
-                )
-                tf_response_cell["end_row_offset_idx"] = (
-                    tf_response_cell["start_row_offset_idx"]
-                    + tf_response_cell["row_span"]
-                )
-            # Counting matched cols/rows from actual indexes (and not ids)
-            predict_details["num_cols"] = len(indexing_end_cols)
-            predict_details["num_rows"] = len(indexing_end_rows)
+            if sort_row_col_indexes:
+                # Fix col/row indexes
+                # Arranges all col/row indexes sequentially without gaps using input IDs
+                indexing_start_cols = (
+                    []
+                )  # Index of original start col IDs (not indexes)
+                indexing_end_cols = []  # Index of original end col IDs (not indexes)
+                indexing_start_rows = (
+                    []
+                )  # Index of original start row IDs (not indexes)
+                indexing_end_rows = []  # Index of original end row IDs (not indexes)
+                # First, collect all possible predicted IDs, to be used as indexes
+                # ID's returned by Tableformer are sequential, but might contain gaps
+                for tf_response_cell in tf_responses:
+                    start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
+                    end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
+                    start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
+                    end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
+                    # Collect all possible col/row IDs:
+                    if start_col_offset_idx not in indexing_start_cols:
+                        indexing_start_cols.append(start_col_offset_idx)
+                    if end_col_offset_idx not in indexing_end_cols:
+                        indexing_end_cols.append(end_col_offset_idx)
+                    if start_row_offset_idx not in indexing_start_rows:
+                        indexing_start_rows.append(start_row_offset_idx)
+                    if end_row_offset_idx not in indexing_end_rows:
+                        indexing_end_rows.append(end_row_offset_idx)
+                indexing_start_cols.sort()
+                indexing_end_cols.sort()
+                indexing_start_rows.sort()
+                indexing_end_rows.sort()
+                # After this - put actual indexes of IDs back into predicted structure...
+                for tf_response_cell in tf_responses:
+                    tf_response_cell["start_col_offset_idx"] = (
+                        indexing_start_cols.index(
+                            tf_response_cell["start_col_offset_idx"]
+                        )
+                    )
+                    tf_response_cell["end_col_offset_idx"] = (
+                        tf_response_cell["start_col_offset_idx"]
+                        + tf_response_cell["col_span"]
+                    )
+                    tf_response_cell["start_row_offset_idx"] = (
+                        indexing_start_rows.index(
+                            tf_response_cell["start_row_offset_idx"]
+                        )
+                    )
+                    tf_response_cell["end_row_offset_idx"] = (
+                        tf_response_cell["start_row_offset_idx"]
+                        + tf_response_cell["row_span"]
+                    )
+                # Counting matched cols/rows from actual indexes (and not ids)
+                predict_details["num_cols"] = len(indexing_end_cols)
+                predict_details["num_rows"] = len(indexing_end_rows)
+            else:
+                otsl_seq = predict_details["prediction"]["rs_seq"]
+                predict_details["num_cols"] = otsl_seq.index("nl")
+                predict_details["num_rows"] = otsl_seq.count("nl")
             # Put results into multi_tf_output
             multi_tf_output.append(
                 {"tf_responses": tf_responses, "predict_details": predict_details}
@@ -667,13 +686,20 @@ class TFPredictor:
                 )
                 if outputs_coord is not None:
-                    bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
-                    prediction["bboxes"] = bbox_pred.tolist()
+                    if len(outputs_coord) == 0:
+                        prediction["bboxes"] = []
+                    else:
+                        bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
+                        prediction["bboxes"] = bbox_pred.tolist()
                 else:
                     prediction["bboxes"] = []
                 if outputs_class is not None:
-                    result_class = torch.argmax(outputs_class, dim=1)
-                    prediction["classes"] = result_class.tolist()
+                    if len(outputs_class) == 0:
+                        prediction["classes"] = []
+                    else:
+                        result_class = torch.argmax(outputs_class, dim=1)
+                        prediction["classes"] = result_class.tolist()
                 else:
                     prediction["classes"] = []
                 if self._remove_padding:
@@ -788,13 +814,20 @@ class TFPredictor:
                 )
                 if outputs_coord is not None:
-                    bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
-                    prediction["bboxes"] = bbox_pred.tolist()
+                    if len(outputs_coord) == 0:
+                        prediction["bboxes"] = []
+                    else:
+                        bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
+                        prediction["bboxes"] = bbox_pred.tolist()
                 else:
                     prediction["bboxes"] = []
                 if outputs_class is not None:
-                    result_class = torch.argmax(outputs_class, dim=1)
-                    prediction["classes"] = result_class.tolist()
+                    if len(outputs_class) == 0:
+                        prediction["classes"] = []
+                    else:
+                        result_class = torch.argmax(outputs_class, dim=1)
+                        prediction["classes"] = result_class.tolist()
                 else:
                     prediction["classes"] = []
                 if self._remove_padding:

docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py CHANGED Viewed

@@ -308,8 +308,12 @@ class TableModel04_rs(BaseModel, nn.Module):
         if len(outputs_coord1) > 0:
             outputs_coord1 = torch.stack(outputs_coord1)
+        else:
+            outputs_coord1 = torch.empty(0)
         if len(outputs_class1) > 0:
             outputs_class1 = torch.stack(outputs_class1)
+        else:
+            outputs_class1 = torch.empty(0)
         outputs_class = outputs_class1
         outputs_coord = outputs_coord1

docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py CHANGED Viewed

@@ -149,11 +149,11 @@ class Tag_Transformer(nn.Module):
         self._positional_encoding = PositionalEncoding(embed_dim)
         self._td_encode = td_encode
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
+        )
         self._encoder = nn.TransformerEncoder(
-            nn.TransformerEncoderLayer(
-                d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
-            ),
-            num_layers=encoder_layers,
+            encoder_layer, num_layers=encoder_layers, enable_nested_tensor=False
         )
         self._decoder = TMTransformerDecoder(

docling_ibm_models/tableformer/utils/app_profiler.py CHANGED Viewed

@@ -6,6 +6,8 @@ import time
 from collections import deque
 from statistics import mean, median
+from docling_ibm_models.tableformer.utils.mem_monitor import MemMonitor
 class SingletonClass(type):
     r"""
@@ -37,11 +39,13 @@ class Profiler:
     def __init__(self):
         self._section_dts = {}  # section name -> sum(section intervals)
         self._section_calls = {}  # section name -> number of invocations
-        self._section_kB = {}  # section name -> max kB of used heap
+        self._section_kB = {}  # section name -> max kB of used heap (resident set size)
         # section name -> beginning of the last interval
         self._last_begin = {}
+        self._mem_monitor = MemMonitor()
     def begin(self, section_name, enable=True):
         r"""
         Mark the beginning of an interval
@@ -83,13 +87,20 @@ class Profiler:
         if section_name not in self._last_begin:
             return False
+        # Get memory
+        kB = self._mem_monitor.get_memory()
+        if isinstance(kB, dict):
+            kB = kB["resident"]
         dt = time.time() - self._last_begin[section_name]
         if section_name not in self._section_dts:
             self._section_dts[section_name] = dt
             self._section_calls[section_name] = 1
+            self._section_kB[section_name] = kB
         else:
             self._section_dts[section_name] += dt
             self._section_calls[section_name] += 1
+            self._section_kB[section_name] = max(kB, self._section_kB[section_name])
         return True

docling_ibm_models/tableformer/utils/mem_monitor.py ADDED Viewed

@@ -0,0 +1,175 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import os
+import platform
+import re
+class MemMonitor:
+    r"""
+    Memory monitor for Linux
+    It supports 2 approaches for extracting memory information:
+    - linux-native: It parse the `/proc` pseudo-files. It is available only for Linux
+    - psutil: Use the `psutil` library
+    ## Linux-Native approach
+    The linux-native approach implements 2 methods to extract the memory fields:
+    1. The `get_memory()` method:
+    - It is very fast
+    - It parses the `/proc/<pid>/statm` pseudo-file
+    - It Contains the following fields:
+        size       (1) total program size
+                   (same as VmSize in /proc/[pid]/status)
+        resident   (2) resident set size
+                   (same as VmRSS in /proc/[pid]/status)
+        shared     (3) number of resident shared pages (i.e., backed by a file)
+                   (same as RssFile+RssShmem in /proc/[pid]/status)
+        text       (4) text (code)
+        lib        (5) library (unused since Linux 2.6; always 0)
+        data       (6) data + stack
+        dt         (7) dirty pages (unused since Linux 2.6; always 0)
+    2. The `get_memory_full()` method:
+    - It is slower to parse but contains more detailed information
+    - It uses regex to parse the `/proc/<pid>/status` pseudo-file
+    - It contains the following fields:
+        VmPeak: Peak virtual memory size.
+        VmSize: Virtual memory size.
+        VmLck: Locked memory size (see mlock(2)).
+        VmPin: Pinned memory size (since Linux 3.2). These are pages that can't be moved because
+               something needs to directly access physical memory.
+        VmHWM: Peak resident set size ("high water mark").
+        VmRSS: Resident set size.  Note that the value here is the sum of RssAnon, RssFile, and
+               RssShmem.
+        RssAnon: Size of resident anonymous memory.  (since Linux 4.5).
+        RssFile: Size of resident file mappings.  (since Linux 4.5).
+        RssShmem: Size of resident shared memory (includes System V shared memory, mappings from
+                  tmpfs(5), and shared anonymous mappings).  (since Linux 4.5).
+        VmData, VmStk, VmExe: Size of data, stack, and text segments.
+        VmLib: Shared library code size.
+        VmPTE: Page table entries size (since Linux 2.6.10).
+        VmPMD: Size of second-level page tables (added in Linux 4.0; removed in Linux 4.15).
+        VmSwap: Swapped-out virtual memory size by anonymous private pages; shmem swap usage is
+                not included (since Linux 2.6.34).
+    ## The psutil library
+    - Apparently the psutil library parses the `/proc/<pid>/statm`
+    - The memory_info() function returns the fields: rss, vms, shared, text, lib, data, dirty
+    ## Field mappings
+    These are the fields returned by psutil memory_info() and their mapping in the /proc files:
+    (I put ? when I am not 100% about the mapping)
+    | psutil  | /proc/$$/status    | /proc/$$/statm |
+    |---------|--------------------|----------------|
+    | rss     | VmRSS              | resident       |
+    | vms     | VmSize             | size           |
+    | shared  | RssFile + RssShmem | shared         |
+    | text    | VmExe ?            | text           |
+    | lib     | RssShmem ?         | lib            |
+    | data    | VmData + VmStk     | data           |
+    | dirty   | VmSwap ?           | dt             |
+    """
+    def __init__(self, enable=True):
+        self._enable = enable
+        self._pid = os.getpid()
+        # Create regex for each memory field of the /proc/status pseudo-file
+        self._status_fields = [
+            "VmPeak",
+            "VmSize",
+            "VmLck",
+            "VmPin",
+            "VmHWM",
+            "VmRSS",
+            "RssAnon",
+            "RssFile",
+            "RssShmem",
+            "VmData",
+            "VmStk",
+            "VmExe",
+            "VmLib",
+            "VmPTE",
+            "VmPMD",
+            "VmSwap",
+        ]
+        self._status_regex = {}
+        for mem_field in self._status_fields:
+            regex_str = r"({}:)(\s+)(\d*)(.*)".format(mem_field)
+            self._status_regex[mem_field] = re.compile(regex_str)
+    def get_memory_full(self) -> dict:
+        r"""
+        - Parse /proc/<pid>status to get all memory info.
+        - The method returns a dict with the fields self._status_fields
+        - This method is SLOW. Unless you need the full memory info, better to use `get_memory`
+        The returned values are in kB
+        """
+        if not self._enable:
+            return -2
+        if platform.system() != "Linux":
+            return -1
+        pid_fn = "/proc/{}/status".format(self._pid)
+        # Dict to collect all memory fields
+        memory = {}
+        with open(pid_fn, "r") as fn:
+            for ll in fn:
+                for mem_field in self._status_fields:
+                    regex = self._status_regex[mem_field]
+                    m = regex.match(ll)
+                    if m is not None:
+                        memory[mem_field] = int(m.group(3))
+                if len(memory) == len(self._status_fields):
+                    break
+        return memory
+    def get_memory(self) -> dict:
+        r"""
+        - Parse /proc/<pid>statm to get the most important memory fields
+        - This is a fast implementation.
+        - The method returns a dict with the fields:
+            "size", "resident", "shared", "text", "lib", "data", "dt"
+        - Check the documentation at the top for a mapping across the various fields
+        The returned values are in kB
+        """
+        if not self._enable:
+            return -2
+        if platform.system() != "Linux":
+            return -1
+        pid_fn = "/proc/{}/statm".format(self._pid)
+        # Dict to collect all memory fields
+        memory = {}
+        with open(pid_fn, "r") as fn:
+            ll = fn.read()
+            # The values are in pages
+            # Each page is 4096 bytes (4kB)
+            data = [int(x) << 2 for x in ll.split(" ")]
+            memory = {
+                "size": data[0],
+                "resident": data[1],
+                "shared": data[2],
+                "text": data[3],
+                "lib": data[4],
+                "data": data[5],
+                "dt": data[6],
+            }
+        return memory

{docling_ibm_models-1.1.7.dist-info → docling_ibm_models-1.2.1.dist-info}/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) [year] [fullname]
+Copyright (c) 2024 International Business Machines
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{docling_ibm_models-1.1.7.dist-info → docling_ibm_models-1.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.1.7
+Version: 1.2.1
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former

{docling_ibm_models-1.1.7.dist-info → docling_ibm_models-1.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-docling_ibm_models/layoutmodel/layout_predictor.py,sha256=y1Z51hUKju8SCvejCWWr04xbV_CmJrJ32FgA56tkgNU,5565
+docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
 docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO9DhOcbvPS5k,6023
 docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
 docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
 docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
-docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=kzOjSmXkYrxc0de8wHbDJMvwKXelxYf4OccHTRqnpco,21081
+docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=GaBW5px3xX9JaHVASZArKiQ-qfrzX0oj-E_6P3-OvuU,21238
 docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
-docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=Ha--59Rfs3V78p3q__q5cuEoewrTld18qhX8VqAQrYc,39730
+docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=32rox4--vqFddCG6oJ1_RQpIoc8nmq4ADvPpgphVR60,40959
 docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
 docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,18 +15,18 @@ docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoK
 docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
 docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
-docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=7iGkrTNLzjC1yn1zuA3N6DvBvbrcO_BR5tmHG3RKmXs,12159
-docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=4106qxxH0w92CVOFzFuCb87tRMvqAUP3X3F1WT5Z47A,6371
+docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=FtmWZNOKjQFLG5GtBCvvU23rWrIsDu3gqfcfl68soPg,12275
+docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
 docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
 docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
 docling_ibm_models/tableformer/test_dataset_cache.py,sha256=zvVJvUnYz4GxAQfPUmLTHUbqj0Yhi2vwgOBnsRgt1rI,818
 docling_ibm_models/tableformer/test_prepare_image.py,sha256=oPmU93-yWIkCeUYulGQ1p676Vq-zcjw2EX24WA5lspA,3155
 docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFwtOWNnS-0EZVUrjTpeevbke8,7993
+docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
+docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
 docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
 docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
-docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
-docling_ibm_models-1.1.7.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
-docling_ibm_models-1.1.7.dist-info/METADATA,sha256=o2f2zLxzqrkoDZ0gdBXDoCJNNJ3FyACeKIMemFE0LBs,7172
-docling_ibm_models-1.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling_ibm_models-1.1.7.dist-info/RECORD,,
+docling_ibm_models-1.2.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling_ibm_models-1.2.1.dist-info/METADATA,sha256=xYeasIJ2_l_UYBLsElHklPP9-VTn2ppFRVFIaKRDpj4,7172
+docling_ibm_models-1.2.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling_ibm_models-1.2.1.dist-info/RECORD,,

docling_ibm_models/tableformer/utils/variance.py DELETED Viewed

@@ -1,175 +0,0 @@
-#
-# Copyright IBM Corp. 2024 - 2024
-# SPDX-License-Identifier: MIT
-#
-import logging
-import numpy as np
-import docling_ibm_models.tableformer.settings as s
-LOG_LEVEL = logging.INFO
-class MyWelford:
-    r"""
-    Running computation of the sample mean and sample variance using Welford's algorithm
-    """
-    def __init__(self):
-        self._i = 0  # Running index
-        self._m = 0  # Running mean
-        self._s = 0  # (n - 1) * variance
-    def reset(self):
-        r"""
-        Reset the object
-        """
-        self._i = 0
-        self._m = 0
-        self._s = 0
-    def add(self, xi):
-        r"""
-        Invoke add each time a new sample arrives
-        Inputs:
-           xi: The next sample of data
-        """
-        self._i += 1
-        old_m = self._m
-        self._m = self._m + (xi - self._m) / self._i
-        self._s = self._s + (xi - self._m) * (xi - old_m)
-    def results(self):
-        r"""
-        Get the computed mean, variance and standard deviation up to now
-        Outputs:
-            m: Sample mean
-            v: Sample variance
-            std: Sample standard deviation
-        """
-        if self._i <= 1:
-            return None, None, None
-        # v = self._s / (self._i - 1)  # Sample variance
-        v = self._s / (self._i)  # Population variance
-        std = np.sqrt(v)
-        return self._m, v, std
-class MyWelfordImg(MyWelford):
-    r"""
-    Welford algorithm to calculate running mean and sample variance for images
-    """
-    def __init__(self):
-        super(MyWelfordImg, self).__init__()
-    def add(self, img):
-        r"""
-        Input:
-            img: An image numpy array (channel, width, height). The only requirement is to have the
-                 channels as the first dimension and have 3 dimensions in total
-        """
-        channels = img.shape[0]
-        flat_dim = img.shape[1] * img.shape[2]
-        img_r = img.reshape(channels, flat_dim)
-        for i in range(flat_dim):
-            super(MyWelfordImg, self).add(img_r[:, i])
-class ChanVarianceImg:
-    r"""
-    Chan's algorithm to compute a running variance with support of sub-samples
-    In this implementation each sub-sample is an images
-    Math for the original paper:
-    https://github.ibm.com/nli/variance_formulae
-    """
-    def __init__(self):
-        r""" """
-        self._first = True
-        # Size of the calculated dataset
-        self._n = 0
-        # Sum of the samples for the 3 image channels
-        self._t = 0
-        # Sum of the square differences of the deviations of the samples from the mean
-        self._s = 0
-    def add(self, img):
-        r"""
-        Add the provided image to the computation of the dataset statistics
-        Input:
-            img: An image numpy array (channel, width, height). The only requirement is to have the
-                 channels as the first dimension and have 3 dimensions in total
-        """
-        ch = img.shape[0]
-        n = img.shape[1] * img.shape[2]
-        img = img.reshape(ch, n)
-        img_t = img.sum(axis=1)
-        img_t_v = img_t.reshape(ch, 1)
-        diff = (img - (img_t_v / n)) ** 2
-        img_s = diff.sum(axis=1)
-        if not self._first:
-            c = (self._n / (n * (self._n + n))) * (
-                ((n / self._n) * self._t - img_t) ** 2
-            )
-            self._s += img_s + c
-            self._t += img_t
-        else:
-            self._s = img_s
-            self._t = img_t
-            self._first = False
-        self._n += n
-    def results(self):
-        r"""
-        Get the computed statistics
-        Output:
-            mean: Mean for the complete dataset
-            var: Population variance for the complete dataset
-            std: Population standard deviation for the complete dataset
-        """
-        mean = list(self._t / self._n)
-        var = list(self._s / self._n)  # Population variance
-        std = list(np.sqrt(var))
-        return mean, var, std
-    def reset(self):
-        r"""
-        Reset the object to start over again
-        """
-        self._n = 0
-        self._t = 0
-        self._s = 0
-        self._first = True
-if __name__ == "__main__":
-    logger = s.get_custom_logger("variance", LOG_LEVEL)
-    n = 50000
-    channels = 3
-    width = 448
-    height = 448
-    my = ChanVarianceImg()
-    # Generate random images
-    for i in range(n):
-        logger.info(i)
-        img = 255 * np.random.rand(channels, width, height)
-        my.add(img)
-    # Calculate the statistics
-    m, v, std = my.results()
-    assert m.shape == (3,), "Wrong mean dimension"
-    assert v.shape == (3,), "Wrong variance dimension"
-    assert std.shape == (3,), "Wrong std dimension"

{docling_ibm_models-1.1.7.dist-info → docling_ibm_models-1.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

docling-ibm-models 1.1.7__py3-none-any.whl → 1.2.1__py3-none-any.whl

docling-ibm-models 1.1.7py3-none-any.whl → 1.2.1py3-none-any.whl