PyPI - docling-ibm-models - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

docling-ibm-models 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

docling_ibm_models/layoutmodel/layout_predictor.py CHANGED Viewed

@@ -7,7 +7,8 @@ from collections.abc import Iterable
 from typing import Union
 import numpy as np
-import onnxruntime as ort
+import torch
+import torchvision.transforms as T
 from PIL import Image
 MODEL_CHECKPOINT_FN = "model.pt"
@@ -16,14 +17,14 @@ DEFAULT_NUM_THREADS = 4
 class LayoutPredictor:
     r"""
-    Document layout prediction using ONNX
+    Document layout prediction using torch
     """
     def __init__(
         self, artifact_path: str, num_threads: int = None, use_cpu_only: bool = False
     ):
         r"""
-        Provide the artifact path that contains the LayoutModel ONNX file
+        Provide the artifact path that contains the LayoutModel file
         The number of threads is decided, in the following order, by:
         1. The init method parameter `num_threads`, if it is set.
@@ -38,13 +39,13 @@ class LayoutPredictor:
         Parameters
         ----------
-        artifact_path: Path for the model ONNX file.
+        artifact_path: Path for the model torch file.
         num_threads: (Optional) Number of threads to run the inference.
         use_cpu_only: (Optional) If True, it forces CPU as the execution provider.
         Raises
         ------
-        FileNotFoundError when the model's ONNX file is missing
+        FileNotFoundError when the model's torch file is missing
         """
         # Initialize classes map:
         self._classes_map = {
@@ -75,46 +76,27 @@ class LayoutPredictor:
         self._threshold = 0.6  # Score threshold
         self._image_size = 640
         self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
+        self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
+        # Model file
+        self._torch_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
+        if not os.path.isfile(self._torch_fn):
+            raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn))
         # Get env vars
-        self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
         if num_threads is None:
             num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS))
         self._num_threads = num_threads
-        # Decide the execution providers
-        if (
-            not self._use_cpu_only
-            and "CUDAExecutionProvider" in ort.get_available_providers()
-        ):
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        else:
-            providers = ["CPUExecutionProvider"]
-        self._providers = providers
-        # Model ONNX file
-        self._onnx_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
-        if not os.path.isfile(self._onnx_fn):
-            raise FileNotFoundError("Missing ONNX file: {}".format(self._onnx_fn))
-        # ONNX options
-        self._options = ort.SessionOptions()
-        self._options.intra_op_num_threads = self._num_threads
-        self.sess = ort.InferenceSession(
-            self._onnx_fn,
-            sess_options=self._options,
-            providers=self._providers,
-        )
+        self.model = torch.jit.load(self._torch_fn)
     def info(self) -> dict:
         r"""
         Get information about the configuration of LayoutPredictor
         """
         info = {
-            "onnx_file": self._onnx_fn,
-            "intra_op_num_threads": self._num_threads,
+            "torch_file": self._torch_fn,
             "use_cpu_only": self._use_cpu_only,
-            "providers": self._providers,
             "image_size": self._image_size,
             "threshold": self._threshold,
         }
@@ -147,33 +129,35 @@ class LayoutPredictor:
             raise TypeError("Not supported input image format")
         w, h = page_img.size
-        page_img = page_img.resize((self._image_size, self._image_size))
-        page_data = np.array(page_img, dtype=np.uint8) / np.float32(255.0)
-        page_data = np.expand_dims(np.transpose(page_data, axes=[2, 0, 1]), axis=0)
+        orig_size = torch.tensor([w, h])[None]
-        # Predict
-        labels, boxes, scores = self.sess.run(
-            output_names=None,
-            input_feed={
-                "images": page_data,
-                "orig_target_sizes": self._size,
-            },
+        transforms = T.Compose(
+            [
+                T.Resize((640, 640)),
+                T.ToTensor(),
+            ]
         )
+        img = transforms(page_img)[None]
+        # Predict
+        with torch.no_grad():
+            labels, boxes, scores = self.model(img, orig_size)
         # Yield output
         for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
             # Filter out blacklisted classes
-            label = self._classes_map[label_idx]
+            label_idx = int(label_idx.item())
+            score = float(score.item())
+            label = self._classes_map[label_idx + 1]
             if label in self._black_classes:
                 continue
             # Check against threshold
             if score > self._threshold:
                 yield {
-                    "l": box[0] / self._image_size * w,
-                    "t": box[1] / self._image_size * h,
-                    "r": box[2] / self._image_size * w,
-                    "b": box[3] / self._image_size * h,
+                    "l": box[0],
+                    "t": box[1],
+                    "r": box[2],
+                    "b": box[3],
                     "label": label,
                     "confidence": score,
                 }

{docling_ibm_models-1.3.3.dist-info → docling_ibm_models-1.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 1.3.3
+Version: 1.4.0
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -24,7 +24,6 @@ Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
 Requires-Dist: lxml (>=4.9.1,<5.0.0)
 Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
 Requires-Dist: numpy (>=1.24.4,<2.0.0)
-Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
 Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
 Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
 Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"

{docling_ibm_models-1.3.3.dist-info → docling_ibm_models-1.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
+docling_ibm_models/layoutmodel/layout_predictor.py,sha256=IDIT8UVzj-U3spWe4nWNxKZqcHkY58teiX_quRu1e0M,5253
 docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
 docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,7 +22,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4
 docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
 docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
 docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
-docling_ibm_models-1.3.3.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling_ibm_models-1.3.3.dist-info/METADATA,sha256=jnXUHlZVDGc0ozdZaL_79WyQe9eW4pgT9gH9ya04CO8,7088
-docling_ibm_models-1.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling_ibm_models-1.3.3.dist-info/RECORD,,
+docling_ibm_models-1.4.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling_ibm_models-1.4.0.dist-info/METADATA,sha256=mlcfKB6GKPyvWSxfC8nztOnhjxO3BquVbbQPLdV0LQw,7043
+docling_ibm_models-1.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling_ibm_models-1.4.0.dist-info/RECORD,,

{docling_ibm_models-1.3.3.dist-info → docling_ibm_models-1.4.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling_ibm_models-1.3.3.dist-info → docling_ibm_models-1.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

docling-ibm-models 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

docling-ibm-models 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl