PyPI - docling-ibm-models - Versions diffs - 2.0.7__tar.gz → 3.0.0__tar.gz - Mend

docling-ibm-models 2.0.7tar.gz → 3.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 2.0.7
+Version: 3.0.0
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -18,15 +18,18 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Dist: Pillow (>=10.0.0,<11.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
 Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
 Requires-Dist: numpy (>=1.24.4,<3.0.0)
 Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
+Requires-Dist: safetensors[torch] (>=0.4.3,<1)
 Requires-Dist: torch (>=2.2.2,<3.0.0)
 Requires-Dist: torchvision (>=0,<1)
 Requires-Dist: tqdm (>=4.64.0,<5.0.0)
+Requires-Dist: transformers (>=4.42.0,<5.0.0)
 Description-Content-Type: text/markdown
 [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)

docling_ibm_models-3.0.0/docling_ibm_models/layoutmodel/layout_predictor.py ADDED Viewed

@@ -0,0 +1,175 @@
+#
+# Copyright IBM Corp. 2024 - 2024
+# SPDX-License-Identifier: MIT
+#
+import logging
+import os
+from collections.abc import Iterable
+from typing import Union
+import numpy as np
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+_log = logging.getLogger(__name__)
+class LayoutPredictor:
+    """
+    Document layout prediction using safe tensors
+    """
+    def __init__(
+        self,
+        artifact_path: str,
+        device: str = "cpu",
+        num_threads: int = 4,
+    ):
+        """
+        Provide the artifact path that contains the LayoutModel file
+        Parameters
+        ----------
+        artifact_path: Path for the model torch file.
+        device: (Optional) device to run the inference.
+        num_threads: (Optional) Number of threads to run the inference if device = 'cpu'
+        Raises
+        ------
+        FileNotFoundError when the model's torch file is missing
+        """
+        # Initialize classes map:
+        self._classes_map = {
+            0: "background",
+            1: "Caption",
+            2: "Footnote",
+            3: "Formula",
+            4: "List-item",
+            5: "Page-footer",
+            6: "Page-header",
+            7: "Picture",
+            8: "Section-header",
+            9: "Table",
+            10: "Text",
+            11: "Title",
+            12: "Document Index",
+            13: "Code",
+            14: "Checkbox-Selected",
+            15: "Checkbox-Unselected",
+            16: "Form",
+            17: "Key-Value Region",
+        }
+        # Blacklisted classes
+        self._black_classes = set()  # ["Form", "Key-Value Region"])
+        # Set basic params
+        self._threshold = 0.3  # Score threshold
+        self._image_size = 640
+        self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
+        # Set number of threads for CPU
+        self._device = torch.device(device)
+        self._num_threads = num_threads
+        if device == "cpu":
+            torch.set_num_threads(self._num_threads)
+        # Model file and configurations
+        self._st_fn = os.path.join(artifact_path, "model.safetensors")
+        if not os.path.isfile(self._st_fn):
+            raise FileNotFoundError("Missing safe tensors file: {}".format(self._st_fn))
+        # Load model and move to device
+        processor_config = os.path.join(artifact_path, "preprocessor_config.json")
+        model_config = os.path.join(artifact_path, "config.json")
+        self._image_processor = RTDetrImageProcessor.from_json_file(processor_config)
+        self._model = RTDetrForObjectDetection.from_pretrained(
+            artifact_path, config=model_config
+        ).to(self._device)
+        self._model.eval()
+        _log.debug("LayoutPredictor settings: {}".format(self.info()))
+    def info(self) -> dict:
+        """
+        Get information about the configuration of LayoutPredictor
+        """
+        info = {
+            "safe_tensors_file": self._st_fn,
+            "device": self._device.type,
+            "num_threads": self._num_threads,
+            "image_size": self._image_size,
+            "threshold": self._threshold,
+        }
+        return info
+    @torch.inference_mode()
+    def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]:
+        """
+        Predict bounding boxes for a given image.
+        The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as:
+        [left, top, right, bottom]
+        Parameter
+        ---------
+        origin_img: Image to be predicted as a PIL Image object or numpy array.
+        Yield
+        -----
+        Bounding box as a dict with the keys: "label", "confidence", "l", "t", "r", "b"
+        Raises
+        ------
+        TypeError when the input image is not supported
+        """
+        # Convert image format
+        if isinstance(orig_img, Image.Image):
+            page_img = orig_img.convert("RGB")
+        elif isinstance(orig_img, np.ndarray):
+            page_img = Image.fromarray(orig_img).convert("RGB")
+        else:
+            raise TypeError("Not supported input image format")
+        resize = {"height": self._image_size, "width": self._image_size}
+        inputs = self._image_processor(
+            images=page_img,
+            return_tensors="pt",
+            size=resize,
+        ).to(self._device)
+        outputs = self._model(**inputs)
+        results = self._image_processor.post_process_object_detection(
+            outputs,
+            target_sizes=torch.tensor([page_img.size[::-1]]),
+            threshold=self._threshold,
+        )
+        w, h = page_img.size
+        result = results[0]
+        for score, label_id, box in zip(
+            result["scores"], result["labels"], result["boxes"]
+        ):
+            score = float(score.item())
+            label_id = int(label_id.item()) + 1  # Advance the label_id
+            label_str = self._classes_map[label_id]
+            # Filter out blacklisted classes
+            if label_str in self._black_classes:
+                continue
+            bbox_float = [float(b.item()) for b in box]
+            l = min(w, max(0, bbox_float[0]))
+            t = min(h, max(0, bbox_float[1]))
+            r = min(w, max(0, bbox_float[2]))
+            b = min(h, max(0, bbox_float[3]))
+            yield {
+                "l": l,
+                "t": t,
+                "r": r,
+                "b": b,
+                "label": label_str,
+                "confidence": score,
+            }

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py RENAMED Viewed

@@ -96,10 +96,10 @@ class MatchingPostProcessor:
                         if cell["cell_class"] <= 1:
                             allow_class = False
                     else:
-                        print("***")
-                        print("no cell_class in...")
-                        print(cell)
-                        print("***")
+                        self._log().debug("***")
+                        self._log().debug("no cell_class in...")
+                        self._log().debug(cell)
+                        self._log().debug("***")
                     if allow_class:
                         match_list = matches[pdf_cell_id]
                         for match in match_list:

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py RENAMED Viewed

@@ -264,7 +264,7 @@ class CellMatcher:
         r, o = otsl.html_to_otsl(table_html_structure, None, False, False, True, False)
         if not r:
             ermsg = "ERR#: COULD NOT CONVERT TO RS THIS TABLE TO COMPUTE SPANS"
-            print(ermsg)
+            self._log().debug(ermsg)
         else:
             otsl_spans = o["otsl_spans"]

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py RENAMED Viewed

@@ -2,14 +2,17 @@
 # Copyright IBM Corp. 2024 - 2024
 # SPDX-License-Identifier: MIT
 #
+import glob
 import json
 import logging
 import os
 from itertools import groupby
+from pathlib import Path
 import cv2
 import numpy as np
 import torch
+from safetensors.torch import load_model
 import docling_ibm_models.tableformer.common as c
 import docling_ibm_models.tableformer.data_management.transforms as T
@@ -30,6 +33,8 @@ from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
 # LOG_LEVEL = logging.DEBUG
 LOG_LEVEL = logging.WARN
+logger = s.get_custom_logger(__name__, LOG_LEVEL)
 class bcolors:
     HEADER = "\033[95m"
@@ -53,17 +58,17 @@ def otsl_sqr_chk(rs_list, logdebug):
         totcelnum = rs_list.count("fcel") + rs_list.count("ecel")
         if logdebug:
-            print("Total number of cells = {}".format(totcelnum))
+            logger.debug("Total number of cells = {}".format(totcelnum))
         for ind, ln in enumerate(rs_list_split):
             ln.append("nl")
             if logdebug:
-                print("{}".format(ln))
+                logger.debug("{}".format(ln))
             if len(ln) != init_tag_len:
                 isSquare = False
         if isSquare:
             if logdebug:
-                print(
+                logger.debug(
                     "{}*OK* Table is square! *OK*{}".format(
                         bcolors.OKGREEN, bcolors.ENDC
                     )
@@ -71,8 +76,8 @@ def otsl_sqr_chk(rs_list, logdebug):
         else:
             if logdebug:
                 err_name = "{}***** ERR ******{}"
-                print(err_name.format(bcolors.FAIL, bcolors.ENDC))
-                print(
+                logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
+                logger.debug(
                     "{}*ERR* Table is not square! *ERR*{}".format(
                         bcolors.FAIL, bcolors.ENDC
                     )
@@ -80,45 +85,27 @@ def otsl_sqr_chk(rs_list, logdebug):
     return isSquare
-def decide_device(config: dict) -> str:
-    r"""
-    Decide the inference device based on the "predict.device_mode" parameter
-    """
-    device_mode = config["predict"].get("device_mode", "cpu")
-    num_gpus = torch.cuda.device_count()
-    if device_mode == "auto":
-        device = "cuda:0" if num_gpus > 0 else "cpu"
-    elif device_mode in ["gpu", "cuda"]:
-        device = "cuda:0"
-    else:
-        device = "cpu"
-    return device
 class TFPredictor:
     r"""
     Table predictions for the in-memory Docling API
     """
-    def __init__(self, config, num_threads: int = None):
+    def __init__(self, config, device: str = "cpu", num_threads: int = 4):
         r"""
-        The number of threads is decided, in the following order, by:
-        1. The init method parameter `num_threads`, if it is set.
-        2. The envvar "OMP_NUM_THREADS", if it is set.
-        3. The default value 4.
         Parameters
         ----------
-        config : dict
-            Parameters configuration
+        config : dict Parameters configuration
+        device: (Optional) torch device to run the inference.
+        num_threads: (Optional) Number of threads to run the inference if device = 'cpu'
         Raises
         ------
         ValueError
         When the model cannot be found
         """
-        self._device = decide_device(config)
-        self._log().info("Running on device: {}".format(self._device))
+        # self._device = torch.device(device)
+        self._device = device
+        self._log().info("Running on device: {}".format(device))
         self._config = config
         self.enable_post_process = True
@@ -131,11 +118,10 @@ class TFPredictor:
         self._init_word_map()
-        # Set the number of torch threads
-        if num_threads is None:
-            num_threads = int(os.environ.get("OMP_NUM_THREADS", 4))
-        self._num_threads = num_threads
-        torch.set_num_threads(num_threads)
+        # Set the number of threads
+        if device == "cpu":
+            self._num_threads = num_threads
+            torch.set_num_threads(self._num_threads)
         # Load the model
         self._model = self._load_model()
@@ -200,10 +186,21 @@ class TFPredictor:
         if self._model_type == "TableModel02":
             self._remove_padding = True
-        # Load model from checkpoint
-        success, _, _, _, _ = model.load()
-        if not success:
-            err_msg = "Cannot load the model"
+        # Load model from safetensors
+        save_dir = self._config["model"]["save_dir"]
+        models_fn = glob.glob(f"{save_dir}/tableformer_*.safetensors")
+        if not models_fn:
+            err_msg = "Not able to find a model file for {}".format(self._model_type)
+            self._log().error(err_msg)
+            raise ValueError(err_msg)
+        model_fn = models_fn[
+            0
+        ]  # Take the first tableformer safetensors file inside the save_dir
+        missing, unexpected = load_model(model, model_fn, device=self._device)
+        if missing or unexpected:
+            err_msg = "Not able to load the model weights for {}".format(
+                self._model_type
+            )
             self._log().error(err_msg)
             raise ValueError(err_msg)

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/docling_ibm_models/tableformer/otsl.py RENAMED Viewed

@@ -49,15 +49,15 @@ def otsl_sqr_chk(rs_list, name, logdebug):
                 isSquare = False
         if isSquare:
             if logdebug:
-                print(
+                logger.debug(
                     "{}*OK* Table is square! *OK*{}".format(
                         bcolors.OKGREEN, bcolors.ENDC
                     )
                 )
         else:
             err_name = "{}*ERR* " + name + " *ERR*{}"
-            print(err_name.format(bcolors.FAIL, bcolors.ENDC))
-            print(
+            logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
+            logger.debug(
                 "{}*ERR* Table is not square! *ERR*{}".format(
                     bcolors.FAIL, bcolors.ENDC
                 )
@@ -89,9 +89,9 @@ def otsl_tags_cells_sync_chk(rs_list, cells, name, logdebug):
             countCellTags += 1
     if countCellTags != len(cells):
         err_name = "{}*!ERR* " + name + " *ERR!*{}"
-        print(err_name.format(bcolors.FAIL, bcolors.ENDC))
+        logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
         err_msg = "{}*!ERR* Tags are not in sync with cells! *ERR!*{}"
-        print(err_msg.format(bcolors.FAIL, bcolors.ENDC))
+        logger.debug(err_msg.format(bcolors.FAIL, bcolors.ENDC))
         isGood = False
     return isGood
@@ -131,11 +131,13 @@ def otsl_to_html(rs_list, logdebug):
         return rs_list
     html_table = []
     if logdebug:
-        print("{}*Reconstructing HTML...*{}".format(bcolors.WARNING, bcolors.ENDC))
+        logger.debug(
+            "{}*Reconstructing HTML...*{}".format(bcolors.WARNING, bcolors.ENDC)
+        )
     if not otsl_sqr_chk(rs_list, "---", logdebug):
         # PAD TABLE TO SQUARE
-        print("{}*Padding to square...*{}".format(bcolors.WARNING, bcolors.ENDC))
+        logger.debug("{}*Padding to square...*{}".format(bcolors.WARNING, bcolors.ENDC))
         rs_list = otsl_pad_to_sqr(rs_list, "lcel")
     # 2D structure, line by line:
@@ -144,7 +146,7 @@ def otsl_to_html(rs_list, logdebug):
     ]
     if logdebug:
-        print("")
+        logger.debug("")
     # Sequentially store indexes of 2D spans that were registered to avoid re-registering them
     registry_2d_span = []
@@ -182,9 +184,9 @@ def otsl_to_html(rs_list, logdebug):
                         span = True
                 # Check if it has vertical span:
                 if rs_row_ind + 1 < len(rs_list_split):
-                    # print(">>>")
-                    # print(rs_list_split[rs_row_ind + 1])
-                    # print(">>> rs_cell_ind = {}".format(rs_cell_ind))
+                    # logger.debug(">>>")
+                    # logger.debug(rs_list_split[rs_row_ind + 1])
+                    # logger.debug(">>> rs_cell_ind = {}".format(rs_cell_ind))
                     if rs_list_split[rs_row_ind + 1][rs_cell_ind] == "ucel":
                         ddist = otsl_check_down(rs_list_split, rs_cell_ind, rs_row_ind)
                         span = True
@@ -198,12 +200,12 @@ def otsl_to_html(rs_list, logdebug):
                         span = True
                         # Check if this 2D span was already registered,
                         # If not - register, if yes - cancel span
-                        # print("rs_cell_ind: {}, xrdist:{}".format(rs_cell_ind, xrdist))
-                        # print("rs_row_ind: {}, xddist:{}".format(rs_cell_ind, xrdist))
+                        # logger.debug("rs_cell_ind: {}, xrdist:{}".format(rs_cell_ind, xrdist))
+                        # logger.debug("rs_row_ind: {}, xddist:{}".format(rs_cell_ind, xrdist))
                         for x in range(rs_cell_ind, xrdist + rs_cell_ind):
                             for y in range(rs_row_ind, xddist + rs_row_ind):
                                 reg2dind = str(x) + "_" + str(y)
-                                # print(reg2dind)
+                                # logger.debug(reg2dind)
                                 if reg2dind in registry_2d_span:
                                     # Cell of the span is already in, cancel current span
                                     span = False
@@ -232,9 +234,13 @@ def otsl_to_html(rs_list, logdebug):
         html_table.extend(html_list)
     if logdebug:
-        print("*********************** registry_2d_span ***************************")
-        print(registry_2d_span)
-        print("********************************************************************")
+        logger.debug(
+            "*********************** registry_2d_span ***************************"
+        )
+        logger.debug(registry_2d_span)
+        logger.debug(
+            "********************************************************************"
+        )
     return html_table
@@ -316,20 +322,24 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
     current_line_expands = []
     if logdebug:
-        print("")
-        print("*** {}: {} ***".format(table["split"], table["filename"]))
+        logger.debug("")
+        logger.debug("*** {}: {} ***".format(table["split"], table["filename"]))
     colnum = 0
     if extra_debug:
-        print("========================== Input HTML ============================")
-        print(table_html_structure["tokens"])
-        print("==================================================================")
+        logger.debug(
+            "========================== Input HTML ============================"
+        )
+        logger.debug(table_html_structure["tokens"])
+        logger.debug(
+            "=================================================================="
+        )
     if logdebug:
-        print("********")
-        print("* OTSL *")
-        print("********")
+        logger.debug("********")
+        logger.debug("* OTSL *")
+        logger.debug("********")
     for i in range(len(table_html_structure["tokens"])):
         html_tag = table_html_structure["tokens"][i]
@@ -377,7 +387,7 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
                         extra_columns = pre_line_len - cur_line_len - 1
                         if extra_columns > 0:
                             if extra_debug:
-                                print(
+                                logger.debug(
                                     "Extra columns needed in row: {}".format(
                                         extra_columns
                                     )
@@ -534,11 +544,11 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
                 writer.write(out_line)
     if logdebug:
-        print("{}Reconstructed HTML:{}".format(bcolors.OKGREEN, bcolors.ENDC))
-        print(rHTML)
+        logger.debug("{}Reconstructed HTML:{}".format(bcolors.OKGREEN, bcolors.ENDC))
+        logger.debug(rHTML)
         # original HTML
         oHTML = out_line["html"]["html_structure"]
-        print("{}Original HTML:{}".format(bcolors.OKBLUE, bcolors.ENDC))
-        print(oHTML)
+        logger.debug("{}Original HTML:{}".format(bcolors.OKBLUE, bcolors.ENDC))
+        logger.debug(oHTML)
     return True, out_line

{docling_ibm_models-2.0.7 → docling_ibm_models-3.0.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling-ibm-models"
-version = "2.0.7"  # DO NOT EDIT, updated automatically
+version = "3.0.0"  # DO NOT EDIT, updated automatically
 description = "This package contains the AI models used by the Docling PDF conversion package"
 authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -24,12 +24,14 @@ packages = [
 python = "^3.9"
 torch = "^2.2.2"
 torchvision = "^0"
+transformers = "^4.42.0"
 numpy = ">=1.24.4,<3.0.0"
 jsonlines = "^3.1.0"
 Pillow = "^10.0.0"
 tqdm = "^4.64.0"
 opencv-python-headless = "^4.6.0.66"
 huggingface_hub = ">=0.23,<1"
+safetensors = {version=">=0.4.3,<1", extras=["torch"]}
 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "^24.4.2"}
@@ -96,3 +98,16 @@ branch = "main"
 parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
 parser_angular_minor_types = "feat"
 parser_angular_patch_types = "fix,perf"
+# [tool.mypy]
+# pretty = true
+# no_implicit_optional = true
+# python_version = "3.10"
+#
+# [[tool.mypy.overrides]]
+# module = [
+#     "torchvision.*",
+#     "transformers.*"
+# ]
+# ignore_missing_imports = true

docling_ibm_models-2.0.7/docling_ibm_models/layoutmodel/layout_predictor.py DELETED Viewed

@@ -1,167 +0,0 @@
-#
-# Copyright IBM Corp. 2024 - 2024
-# SPDX-License-Identifier: MIT
-#
-import os
-from collections.abc import Iterable
-from typing import Union
-import numpy as np
-import torch
-import torchvision.transforms as T
-from PIL import Image
-MODEL_CHECKPOINT_FN = "model.pt"
-DEFAULT_NUM_THREADS = 4
-class LayoutPredictor:
-    r"""
-    Document layout prediction using torch
-    """
-    def __init__(
-        self, artifact_path: str, num_threads: int = None, use_cpu_only: bool = False
-    ):
-        r"""
-        Provide the artifact path that contains the LayoutModel file
-        The number of threads is decided, in the following order, by:
-        1. The init method parameter `num_threads`, if it is set.
-        2. The envvar "OMP_NUM_THREADS", if it is set.
-        3. The default value DEFAULT_NUM_THREADS.
-        The execution provided is decided, in the following order:
-        1. If the init method parameter `cpu_only` is True or the envvar "USE_CPU_ONLY" is set,
-           it uses the "CPUExecutionProvider".
-        3. Otherwise if the "CUDAExecutionProvider" is present, use:
-            ["CUDAExecutionProvider", "CPUExecutionProvider"]:
-        Parameters
-        ----------
-        artifact_path: Path for the model torch file.
-        num_threads: (Optional) Number of threads to run the inference.
-        use_cpu_only: (Optional) If True, it forces CPU as the execution provider.
-        Raises
-        ------
-        FileNotFoundError when the model's torch file is missing
-        """
-        # Initialize classes map:
-        self._classes_map = {
-            0: "background",
-            1: "Caption",
-            2: "Footnote",
-            3: "Formula",
-            4: "List-item",
-            5: "Page-footer",
-            6: "Page-header",
-            7: "Picture",
-            8: "Section-header",
-            9: "Table",
-            10: "Text",
-            11: "Title",
-            12: "Document Index",
-            13: "Code",
-            14: "Checkbox-Selected",
-            15: "Checkbox-Unselected",
-            16: "Form",
-            17: "Key-Value Region",
-        }
-        # Blacklisted classes
-        self._black_classes = set(["Form", "Key-Value Region"])
-        # Set basic params
-        self._threshold = 0.6  # Score threshold
-        self._image_size = 640
-        self._size = np.asarray([[self._image_size, self._image_size]], dtype=np.int64)
-        self._use_cpu_only = use_cpu_only or ("USE_CPU_ONLY" in os.environ)
-        # Model file
-        self._torch_fn = os.path.join(artifact_path, MODEL_CHECKPOINT_FN)
-        if not os.path.isfile(self._torch_fn):
-            raise FileNotFoundError("Missing torch file: {}".format(self._torch_fn))
-        # Get env vars
-        if num_threads is None:
-            num_threads = int(os.environ.get("OMP_NUM_THREADS", DEFAULT_NUM_THREADS))
-        self._num_threads = num_threads
-        self.model = torch.jit.load(self._torch_fn)
-    def info(self) -> dict:
-        r"""
-        Get information about the configuration of LayoutPredictor
-        """
-        info = {
-            "torch_file": self._torch_fn,
-            "use_cpu_only": self._use_cpu_only,
-            "image_size": self._image_size,
-            "threshold": self._threshold,
-        }
-        return info
-    def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]:
-        r"""
-        Predict bounding boxes for a given image.
-        The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as:
-        [left, top, right, bottom]
-        Parameter
-        ---------
-        origin_img: Image to be predicted as a PIL Image object or numpy array.
-        Yield
-        -----
-        Bounding box as a dict with the keys: "label", "confidence", "l", "t", "r", "b"
-        Raises
-        ------
-        TypeError when the input image is not supported
-        """
-        # Convert image format
-        if isinstance(orig_img, Image.Image):
-            page_img = orig_img.convert("RGB")
-        elif isinstance(orig_img, np.ndarray):
-            page_img = Image.fromarray(orig_img).convert("RGB")
-        else:
-            raise TypeError("Not supported input image format")
-        w, h = page_img.size
-        orig_size = torch.tensor([w, h])[None]
-        transforms = T.Compose(
-            [
-                T.Resize((640, 640)),
-                T.ToTensor(),
-            ]
-        )
-        img = transforms(page_img)[None]
-        # Predict
-        with torch.no_grad():
-            labels, boxes, scores = self.model(img, orig_size)
-        # Yield output
-        for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
-            # Filter out blacklisted classes
-            label_idx = int(label_idx.item())
-            score = float(score.item())
-            label = self._classes_map[label_idx + 1]
-            if label in self._black_classes:
-                continue
-            # Check against threshold
-            if score > self._threshold:
-                l = min(w, max(0, box[0]))
-                t = min(h, max(0, box[1]))
-                r = min(w, max(0, box[2]))
-                b = min(h, max(0, box[3]))
-                yield {
-                    "l": l,
-                    "t": t,
-                    "r": r,
-                    "b": b,
-                    "label": label,
-                    "confidence": score,
-                }