PyPI - docling-ibm-models - Versions diffs - 1.3.2__tar.gz → 1.4.0__tar.gz - Mend

docling-ibm-models 1.3.2tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{docling_ibm_models-1.3.2 → docling_ibm_models-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.3.2
+Version: 1.4.0
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -24,7 +24,6 @@ Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
 Requires-Dist: lxml (>=4.9.1,<5.0.0)
 Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
 Requires-Dist: numpy (>=1.24.4,<2.0.0)
-Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
 Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
 Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
 Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"

{docling_ibm_models-1.3.2 → docling_ibm_models-1.4.0}/docling_ibm_models/layoutmodel/layout_predictor.py RENAMED Viewed

@@ -7,7 +7,8 @@ from collections.abc import Iterable
 from typing import Union
 import numpy as np
-import onnxruntime as ort
+import torch
+import torchvision.transforms as T
 from PIL import Image
 MODEL_CHECKPOINT_FN = "model.pt"
@@ -16,14 +17,14 @@ DEFAULT_NUM_THREADS = 4
 class LayoutPredictor:
     r"""
-    Document layout prediction using ONNX
+    Document layout prediction using torch
     """
     def __init__(
         self, artifact_path: str, num_threads: int = None, use_cpu_only: bool = False
     ):
         r"""
-        Provide the artifact path that contains the LayoutModel ONNX file
+        Provide the artifact path that contains the LayoutModel file
         The number of threads is decided, in the following order, by:
         1. The init method parameter `num_threads`, if it is set.
@@ -38,13 +39,13 @@ class LayoutPredictor:
         Parameters
         ----------
-        artifact_path: Path for the model ONNX file.
+        artifact_path: Path for the model torch file.
         num_threads: (Optional) Number of threads to run the inference.
         use_cpu_only: (Optional) If True, it forces CPU as the execution provider.
         Raises
         ------
-        FileNotFoundError when the model's ONNX file is missing
+        FileNotFoundError when the model's torch file is missing
         """
         # Initialize classes map:
         self._classes_map = {
@@ -75,46 +76,27 @@ class LayoutPredictor:
         self._threshold = 0.6  # Score threshold
         self._image_size = 640
         self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
+        self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
+        # Model file
+        self._torch_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
+        if not os.path.isfile(self._torch_fn):
+            raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn))
         # Get env vars
-        self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
         if num_threads is None:
             num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS))
         self._num_threads = num_threads
-        # Decide the execution providers
-        if (
-            not self._use_cpu_only
-            and "CUDAExecutionProvider" in ort.get_available_providers()
-        ):
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        else:
-            providers = ["CPUExecutionProvider"]
-        self._providers = providers
-        # Model ONNX file
-        self._onnx_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
-        if not os.path.isfile(self._onnx_fn):
-            raise FileNotFoundError("Missing ONNX file: {}".format(self._onnx_fn))
-        # ONNX options
-        self._options = ort.SessionOptions()
-        self._options.intra_op_num_threads = self._num_threads
-        self.sess = ort.InferenceSession(
-            self._onnx_fn,
-            sess_options=self._options,
-            providers=self._providers,
-        )
+        self.model = torch.jit.load(self._torch_fn)
     def info(self) -> dict:
         r"""
         Get information about the configuration of LayoutPredictor
         """
         info = {
-            "onnx_file": self._onnx_fn,
-            "intra_op_num_threads": self._num_threads,
+            "torch_file": self._torch_fn,
             "use_cpu_only": self._use_cpu_only,
-            "providers": self._providers,
             "image_size": self._image_size,
             "threshold": self._threshold,
         }
@@ -147,33 +129,35 @@ class LayoutPredictor:
             raise TypeError("Not supported input image format")
         w, h = page_img.size
-        page_img = page_img.resize((self._image_size, self._image_size))
-        page_data = np.array(page_img, dtype=np.uint8) / np.float32(255.0)
-        page_data = np.expand_dims(np.transpose(page_data, axes=[2, 0, 1]), axis=0)
+        orig_size = torch.tensor([w, h])[None]
-        # Predict
-        labels, boxes, scores = self.sess.run(
-            output_names=None,
-            input_feed={
-                "images": page_data,
-                "orig_target_sizes": self._size,
-            },
+        transforms = T.Compose(
+            [
+                T.Resize((640, 640)),
+                T.ToTensor(),
+            ]
         )
+        img = transforms(page_img)[None]
+        # Predict
+        with torch.no_grad():
+            labels, boxes, scores = self.model(img, orig_size)
         # Yield output
         for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
             # Filter out blacklisted classes
-            label = self._classes_map[label_idx]
+            label_idx = int(label_idx.item())
+            score = float(score.item())
+            label = self._classes_map[label_idx + 1]
             if label in self._black_classes:
                 continue
             # Check against threshold
             if score > self._threshold:
                 yield {
-                    "l": box[0] / self._image_size * w,
-                    "t": box[1] / self._image_size * h,
-                    "r": box[2] / self._image_size * w,
-                    "b": box[3] / self._image_size * h,
+                    "l": box[0],
+                    "t": box[1],
+                    "r": box[2],
+                    "b": box[3],
                     "label": label,
                     "confidence": score,
                 }

{docling_ibm_models-1.3.2 → docling_ibm_models-1.4.0}/docling_ibm_models/tableformer/common.py RENAMED Viewed

@@ -48,6 +48,16 @@ def validate_config(config):
     return True
+def read_config(config_filename):
+    with open(config_filename, "r") as fd:
+        config = json.load(fd)
+    # Validate the config file
+    validate_config(config)
+    return config
 def safe_get_parameter(input_dict, index_path, default=None, required=False):
     r"""
     Safe get parameter from a nested dictionary.

{docling_ibm_models-1.3.2 → docling_ibm_models-1.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling-ibm-models"
-version = "1.3.2"  # DO NOT EDIT, updated automatically
+version = "1.4.0"  # DO NOT EDIT, updated automatically
 description = "This package contains the AI models used by the Docling PDF conversion package"
 authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -30,7 +30,6 @@ torchvision = [
   {markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^0"},
   {markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~0.17.2"}
 ]
-onnxruntime = "^1.16.2"
 numpy = "^1.24.4"
 lxml = "^4.9.1"
 jsonlines = "^3.1.0"
@@ -55,6 +54,9 @@ types-requests = "^2.31.0.2"
 flake8-pyproject = "^1.2.3"
 pylint = "^2.17.5"
+[tool.poetry.group.dev.dependencies]
+ipykernel = "^6.29.5"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"