PyPI - docling-ibm-models - Versions diffs - 1.1.5__tar.gz → 1.1.7__tar.gz - Mend

docling-ibm-models 1.1.5tar.gz → 1.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.1.5
+Version: 1.1.7
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/docling_ibm_models/tableformer/data_management/matching_post_processor.py RENAMED Viewed

@@ -4,6 +4,7 @@
 #
 import json
 import logging
+import math
 import statistics
 import docling_ibm_models.tableformer.settings as s
@@ -403,45 +404,63 @@ class MatchingPostProcessor:
                 # Push horizontally
                 if x1_min < x2_min:
                     # Move box1 to the left and box2 to the right
-                    box1["bbox"][2] -= overlap_x
-                    box2["bbox"][0] += overlap_x
+                    box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
+                    box2["bbox"][0] += math.floor(overlap_x / 2)
                 else:
                     # Move box2 to the left and box1 to the right
-                    box2["bbox"][2] -= overlap_x
-                    box1["bbox"][0] += overlap_x
+                    box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
+                    box1["bbox"][0] += math.floor(overlap_x / 2)
             else:
                 # Push vertically
                 if y1_min < y2_min:
                     # Move box1 up and box2 down
-                    box1["bbox"][3] -= overlap_y
-                    box2["bbox"][1] += overlap_y
+                    box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
+                    box2["bbox"][1] += math.floor(overlap_y / 2)
                 else:
                     # Move box2 up and box1 down
-                    box2["bbox"][3] -= overlap_y
-                    box1["bbox"][1] += overlap_y
+                    box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
+                    box1["bbox"][1] += math.floor(overlap_y / 2)
+            # Will flip coordinates in proper order, if previous operations reversed it
+            box1["bbox"] = [
+                min(box1["bbox"][0], box1["bbox"][2]),
+                min(box1["bbox"][1], box1["bbox"][3]),
+                max(box1["bbox"][0], box1["bbox"][2]),
+                max(box1["bbox"][1], box1["bbox"][3]),
+            ]
+            box2["bbox"] = [
+                min(box2["bbox"][0], box2["bbox"][2]),
+                min(box2["bbox"][1], box2["bbox"][3]),
+                max(box2["bbox"][0], box2["bbox"][2]),
+                max(box2["bbox"][1], box2["bbox"][3]),
+            ]
             return box1, box2
         def do_boxes_overlap(box1, box2):
-            # print("{} - {}".format(box1["bbox"], box2["bbox"]))
-            # Extract coordinates from the bounding boxes
-            x1_min, y1_min, x1_max, y1_max = box1["bbox"]
-            x2_min, y2_min, x2_max, y2_max = box2["bbox"]
-            # Check if one box is to the left of the other
-            if x1_max < x2_min or x2_max < x1_min:
+            B1 = box1["bbox"]
+            B2 = box2["bbox"]
+            if (
+                (B1[0] >= B2[2])
+                or (B1[2] <= B2[0])
+                or (B1[3] <= B2[1])
+                or (B1[1] >= B2[3])
+            ):
                 return False
-            # Check if one box is above the other
-            if y1_max < y2_min or y2_max < y1_min:
-                return False
-            return True
+            else:
+                return True
         def find_overlapping_pairs_indexes(bboxes):
             overlapping_indexes = []
             # Compare each box with every other box (combinations)
             for i in range(len(bboxes)):
                 for j in range(i + 1, len(bboxes)):
-                    if do_boxes_overlap(bboxes[i], bboxes[j]):
-                        bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
+                    if i != j:
+                        if bboxes[i] != bboxes[j]:
+                            if do_boxes_overlap(bboxes[i], bboxes[j]):
+                                bboxes[i], bboxes[j] = correct_overlap(
+                                    bboxes[i], bboxes[j]
+                                )
             return overlapping_indexes, bboxes
@@ -1144,7 +1163,7 @@ class MatchingPostProcessor:
                 new_pdf_cells.append(pdf_cells[i])
         return new_pdf_cells
-    def process(self, matching_details):
+    def process(self, matching_details, correct_overlapping_cells=False):
         r"""
         Do post processing, see details in the comments below
@@ -1348,9 +1367,10 @@ class MatchingPostProcessor:
         table_cells_wo = po2
         max_cell_id = po3
-        # As the last step - correct cell bboxes in a way that they don't overlap:
-        if len(table_cells_wo) <= 300:  # For performance reasons
-            table_cells_wo = self._find_overlapping(table_cells_wo)
+        if correct_overlapping_cells:
+            # As the last step - correct cell bboxes in a way that they don't overlap:
+            if len(table_cells_wo) <= 300:  # For performance reasons
+                table_cells_wo = self._find_overlapping(table_cells_wo)
         self._log().debug("*** final_matches_wo")
         self._log().debug(final_matches_wo)

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py RENAMED Viewed

@@ -127,13 +127,14 @@ class CellMatcher:
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                word["bbox"] = [
+                    word["bbox"]["l"],
+                    word["bbox"]["t"],
+                    word["bbox"]["r"],
+                    word["bbox"]["b"],
+                ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]
         # BBOXES transformed...
@@ -145,9 +146,13 @@ class CellMatcher:
         table_cells = self._build_table_cells(
             html_seq, otsl_seq, table_bboxes_page, table_classes
         )
-        matches, matches_counter = self._intersection_over_pdf_match(
-            table_cells, pdf_cells
-        )
+        matches = {}
+        matches_counter = 0
+        if len(pdf_cells) > 0:
+            matches, matches_counter = self._intersection_over_pdf_match(
+                table_cells, pdf_cells
+            )
         self._log().debug("matches_counter: {}".format(matches_counter))
@@ -188,13 +193,14 @@ class CellMatcher:
             Dictionary with all details about the mathings between the table and pdf cells
         """
         pdf_cells = copy.deepcopy(iocr_page["tokens"])
-        for word in pdf_cells:
-            word["bbox"] = [
-                word["bbox"]["l"],
-                word["bbox"]["t"],
-                word["bbox"]["r"],
-                word["bbox"]["b"],
-            ]
+        if len(pdf_cells) > 0:
+            for word in pdf_cells:
+                word["bbox"] = [
+                    word["bbox"]["l"],
+                    word["bbox"]["t"],
+                    word["bbox"]["r"],
+                    word["bbox"]["b"],
+                ]
         table_bboxes = prediction["bboxes"]
         table_classes = prediction["classes"]

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/docling_ibm_models/tableformer/data_management/tf_predictor.py RENAMED Viewed

@@ -523,8 +523,9 @@ class TFPredictor:
         # return the resized image
         return resized, sf
-    def multi_table_predict(self, iocr_page, table_bboxes, do_matching=True):
-        # def multi_table_predict(self, iocr_page, page_image, table_bboxes):
+    def multi_table_predict(
+        self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
+    ):
         multi_tf_output = []
         page_image = iocr_page["image"]
@@ -546,7 +547,12 @@ class TFPredictor:
             # Predict
             if do_matching:
                 tf_responses, predict_details = self.predict(
-                    iocr_page, table_bbox, table_image, scale_factor, None
+                    iocr_page,
+                    table_bbox,
+                    table_image,
+                    scale_factor,
+                    None,
+                    correct_overlapping_cells,
                 )
             else:
                 tf_responses, predict_details = self.predict_dummy(
@@ -696,7 +702,12 @@ class TFPredictor:
             prediction["bboxes"] = corrected_bboxes
         # Match the cells
-        matching_details = {"table_cells": [], "matches": {}}
+        matching_details = {
+            "table_cells": [],
+            "matches": {},
+            "pdf_cells": [],
+            "prediction_bboxes_page": [],
+        }
         # Table bbox upscaling will scale predicted bboxes too within cell matcher
         scaled_table_bbox = [
@@ -728,7 +739,13 @@ class TFPredictor:
         return tf_output, matching_details
     def predict(
-        self, iocr_page, table_bbox, table_image, scale_factor, eval_res_preds=None
+        self,
+        iocr_page,
+        table_bbox,
+        table_image,
+        scale_factor,
+        eval_res_preds=None,
+        correct_overlapping_cells=False,
     ):
         r"""
         Predict the table out of an image in memory
@@ -739,6 +756,8 @@ class TFPredictor:
             Docling provided table data
         eval_res_preds : dict
             Ready predictions provided by the evaluation results
+        correct_overlapping_cells : boolean
+            Enables or disables last post-processing step, that fixes cell bboxes to remove overlap
         Returns
         -------
@@ -803,7 +822,12 @@ class TFPredictor:
             prediction["bboxes"] = corrected_bboxes
         # Match the cells
-        matching_details = {"table_cells": [], "matches": {}}
+        matching_details = {
+            "table_cells": [],
+            "matches": {},
+            "pdf_cells": [],
+            "prediction_bboxes_page": [],
+        }
         # Table bbox upscaling will scale predicted bboxes too within cell matcher
         scaled_table_bbox = [
@@ -819,10 +843,15 @@ class TFPredictor:
             )
         # Post-processing
         if len(prediction["bboxes"]) > 0:
-            if self.enable_post_process:
-                AggProfiler().begin("post_process", self._prof)
-                matching_details = self._post_processor.process(matching_details)
-                AggProfiler().end("post_process", self._prof)
+            if (
+                len(iocr_page["tokens"]) > 0
+            ):  # There are at least some pdf cells to match with
+                if self.enable_post_process:
+                    AggProfiler().begin("post_process", self._prof)
+                    matching_details = self._post_processor.process(
+                        matching_details, correct_overlapping_cells
+                    )
+                    AggProfiler().end("post_process", self._prof)
         # Generate the expected Docling responses
         AggProfiler().begin("generate_docling_response", self._prof)

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py RENAMED Viewed

@@ -157,7 +157,12 @@ class BBoxDecoder(nn.Module):
             predictions_classes.append(self._class_embed(h))
         if len(predictions_bboxes) > 0:
             predictions_bboxes = torch.stack([x[0] for x in predictions_bboxes])
+        else:
+            predictions_bboxes = torch.empty(0)
         if len(predictions_classes) > 0:
             predictions_classes = torch.stack([x[0] for x in predictions_classes])
+        else:
+            predictions_classes = torch.empty(0)
         return predictions_classes, predictions_bboxes

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/docling_ibm_models/tableformer/otsl.py RENAMED Viewed

@@ -123,6 +123,9 @@ def otsl_check_right(rs_split, x, y):
 def otsl_to_html(rs_list, logdebug):
+    if len(rs_list) == 0:
+        return []
     if rs_list[0] not in ["fcel", "ched", "rhed", "srow", "ecel"]:
         # Most likely already HTML...
         return rs_list

{docling_ibm_models-1.1.5 → docling_ibm_models-1.1.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling-ibm-models"
-version = "1.1.5"  # DO NOT EDIT, updated automatically
+version = "1.1.7"  # DO NOT EDIT, updated automatically
 description = "This package contains the AI models used by the Docling PDF conversion package"
 authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"