PyPI - onnxtr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

onnxtr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

onnxtr/__init__.py +2 -0
onnxtr/contrib/__init__.py +0 -0
onnxtr/contrib/artefacts.py +131 -0
onnxtr/contrib/base.py +105 -0
onnxtr/file_utils.py +33 -0
onnxtr/io/__init__.py +5 -0
onnxtr/io/elements.py +455 -0
onnxtr/io/html.py +28 -0
onnxtr/io/image.py +56 -0
onnxtr/io/pdf.py +42 -0
onnxtr/io/reader.py +85 -0
onnxtr/models/__init__.py +4 -0
onnxtr/models/_utils.py +141 -0
onnxtr/models/builder.py +355 -0
onnxtr/models/classification/__init__.py +2 -0
onnxtr/models/classification/models/__init__.py +1 -0
onnxtr/models/classification/models/mobilenet.py +120 -0
onnxtr/models/classification/predictor/__init__.py +1 -0
onnxtr/models/classification/predictor/base.py +57 -0
onnxtr/models/classification/zoo.py +76 -0
onnxtr/models/detection/__init__.py +2 -0
onnxtr/models/detection/core.py +101 -0
onnxtr/models/detection/models/__init__.py +3 -0
onnxtr/models/detection/models/differentiable_binarization.py +159 -0
onnxtr/models/detection/models/fast.py +160 -0
onnxtr/models/detection/models/linknet.py +160 -0
onnxtr/models/detection/postprocessor/__init__.py +0 -0
onnxtr/models/detection/postprocessor/base.py +144 -0
onnxtr/models/detection/predictor/__init__.py +1 -0
onnxtr/models/detection/predictor/base.py +54 -0
onnxtr/models/detection/zoo.py +73 -0
onnxtr/models/engine.py +50 -0
onnxtr/models/predictor/__init__.py +1 -0
onnxtr/models/predictor/base.py +175 -0
onnxtr/models/predictor/predictor.py +145 -0
onnxtr/models/preprocessor/__init__.py +1 -0
onnxtr/models/preprocessor/base.py +118 -0
onnxtr/models/recognition/__init__.py +2 -0
onnxtr/models/recognition/core.py +28 -0
onnxtr/models/recognition/models/__init__.py +5 -0
onnxtr/models/recognition/models/crnn.py +226 -0
onnxtr/models/recognition/models/master.py +145 -0
onnxtr/models/recognition/models/parseq.py +134 -0
onnxtr/models/recognition/models/sar.py +134 -0
onnxtr/models/recognition/models/vitstr.py +166 -0
onnxtr/models/recognition/predictor/__init__.py +1 -0
onnxtr/models/recognition/predictor/_utils.py +86 -0
onnxtr/models/recognition/predictor/base.py +79 -0
onnxtr/models/recognition/utils.py +89 -0
onnxtr/models/recognition/zoo.py +69 -0
onnxtr/models/zoo.py +114 -0
onnxtr/transforms/__init__.py +1 -0
onnxtr/transforms/base.py +112 -0
onnxtr/utils/__init__.py +4 -0
onnxtr/utils/common_types.py +18 -0
onnxtr/utils/data.py +126 -0
onnxtr/utils/fonts.py +41 -0
onnxtr/utils/geometry.py +498 -0
onnxtr/utils/multithreading.py +50 -0
onnxtr/utils/reconstitution.py +70 -0
onnxtr/utils/repr.py +64 -0
onnxtr/utils/visualization.py +291 -0
onnxtr/utils/vocabs.py +71 -0
onnxtr/version.py +1 -0
onnxtr-0.1.0.dist-info/LICENSE +201 -0
onnxtr-0.1.0.dist-info/METADATA +481 -0
onnxtr-0.1.0.dist-info/RECORD +70 -0
onnxtr-0.1.0.dist-info/WHEEL +5 -0
onnxtr-0.1.0.dist-info/top_level.txt +2 -0
onnxtr-0.1.0.dist-info/zip-safe +1 -0

onnxtr/models/zoo.py ADDED Viewed

@@ -0,0 +1,114 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any
+from .detection.zoo import detection_predictor
+from .predictor import OCRPredictor
+from .recognition.zoo import recognition_predictor
+__all__ = ["ocr_predictor"]
+def _predictor(
+    det_arch: Any,
+    reco_arch: Any,
+    assume_straight_pages: bool = True,
+    preserve_aspect_ratio: bool = True,
+    symmetric_pad: bool = True,
+    det_bs: int = 4,
+    reco_bs: int = 1024,
+    detect_orientation: bool = False,
+    straighten_pages: bool = False,
+    detect_language: bool = False,
+    **kwargs,
+) -> OCRPredictor:
+    # Detection
+    det_predictor = detection_predictor(
+        det_arch,
+        batch_size=det_bs,
+        assume_straight_pages=assume_straight_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+    )
+    # Recognition
+    reco_predictor = recognition_predictor(
+        reco_arch,
+        batch_size=reco_bs,
+    )
+    return OCRPredictor(
+        det_predictor,
+        reco_predictor,
+        assume_straight_pages=assume_straight_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+        detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
+        detect_language=detect_language,
+        **kwargs,
+    )
+def ocr_predictor(
+    det_arch: Any = "fast_base",
+    reco_arch: Any = "crnn_vgg16_bn",
+    assume_straight_pages: bool = True,
+    preserve_aspect_ratio: bool = True,
+    symmetric_pad: bool = True,
+    export_as_straight_boxes: bool = False,
+    detect_orientation: bool = False,
+    straighten_pages: bool = False,
+    detect_language: bool = False,
+    **kwargs: Any,
+) -> OCRPredictor:
+    """End-to-end OCR architecture using one model for localization, and another for text recognition.
+    >>> import numpy as np
+    >>> from onnxtr.models import ocr_predictor
+    >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn')
+    >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
+    >>> out = model([input_page])
+    Args:
+    ----
+        det_arch: name of the detection architecture or the model itself to use
+            (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
+        reco_arch: name of the recognition architecture or the model itself to use
+            (e.g. 'crnn_vgg16_bn', 'sar_resnet31')
+        assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
+            without rotated textual elements.
+        preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before
+            running the detection model on it.
+        symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right.
+        export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions
+            (potentially rotated) as straight bounding boxes.
+        detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
+            page. Doing so will slightly deteriorate the overall latency.
+        straighten_pages: if True, estimates the page general orientation
+            based on the segmentation map median line orientation.
+            Then, rotates page before passing it again to the deep learning detection module.
+            Doing so will improve performances for documents with page-uniform rotations.
+        detect_language: if True, the language prediction will be added to the predictions for each
+            page. Doing so will slightly deteriorate the overall latency.
+        kwargs: keyword args of `OCRPredictor`
+    Returns:
+    -------
+        OCR predictor
+    """
+    return _predictor(
+        det_arch,
+        reco_arch,
+        assume_straight_pages=assume_straight_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+        export_as_straight_boxes=export_as_straight_boxes,
+        detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
+        detect_language=detect_language,
+        **kwargs,
+    )

onnxtr/transforms/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .base import *

onnxtr/transforms/base.py ADDED Viewed

@@ -0,0 +1,112 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Tuple, Union
+import cv2
+import numpy as np
+__all__ = ["Resize", "Normalize"]
+class Resize:
+    """Resize the input image to the given size"""
+    def __init__(
+        self,
+        size: Union[int, Tuple[int, int]],
+        interpolation=cv2.INTER_LINEAR,
+        preserve_aspect_ratio: bool = False,
+        symmetric_pad: bool = False,
+    ) -> None:
+        super().__init__()
+        self.size = size
+        self.interpolation = interpolation
+        self.preserve_aspect_ratio = preserve_aspect_ratio
+        self.symmetric_pad = symmetric_pad
+        self.output_size = size if isinstance(size, tuple) else (size, size)
+        if not isinstance(self.size, (int, tuple, list)):
+            raise AssertionError("size should be either a tuple, a list or an int")
+    def __call__(
+        self,
+        img: np.ndarray,
+    ) -> np.ndarray:
+        if img.ndim == 3:
+            h, w = img.shape[0:2]
+        else:
+            h, w = img.shape[1:3]
+        sh, sw = self.size if isinstance(self.size, tuple) else (self.size, self.size)
+        # Calculate aspect ratio of the image
+        aspect = w / h
+        # Compute scaling and padding sizes
+        if self.preserve_aspect_ratio:
+            if aspect > 1:  # Horizontal image
+                new_w = sw
+                new_h = int(sw / aspect)
+            elif aspect < 1:  # Vertical image
+                new_h = sh
+                new_w = int(sh * aspect)
+            else:  # Square image
+                new_h, new_w = sh, sw
+            img_resized = cv2.resize(img, (new_w, new_h), interpolation=self.interpolation)
+            # Calculate padding
+            pad_top = max((sh - new_h) // 2, 0)
+            pad_bottom = max(sh - new_h - pad_top, 0)
+            pad_left = max((sw - new_w) // 2, 0)
+            pad_right = max(sw - new_w - pad_left, 0)
+            # Pad the image
+            img_resized = cv2.copyMakeBorder(  # type: ignore[call-overload]
+                img_resized, pad_top, pad_bottom, pad_left, pad_right, borderType=cv2.BORDER_CONSTANT, value=0
+            )
+            # Ensure the image matches the target size by resizing it again if needed
+            img_resized = cv2.resize(img_resized, (sw, sh), interpolation=self.interpolation)
+        else:
+            # Resize the image without preserving aspect ratio
+            img_resized = cv2.resize(img, (sw, sh), interpolation=self.interpolation)
+        return img_resized
+    def __repr__(self) -> str:
+        interpolate_str = self.interpolation
+        _repr = f"output_size={self.size}, interpolation='{interpolate_str}'"
+        if self.preserve_aspect_ratio:
+            _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
+        return f"{self.__class__.__name__}({_repr})"
+class Normalize:
+    """Normalize the input image"""
+    def __init__(
+        self,
+        mean: Union[float, Tuple[float, float, float]] = (0.485, 0.456, 0.406),
+        std: Union[float, Tuple[float, float, float]] = (0.229, 0.224, 0.225),
+    ) -> None:
+        self.mean = mean
+        self.std = std
+        if not isinstance(self.mean, (float, tuple, list)):
+            raise AssertionError("mean should be either a tuple, a list or a float")
+        if not isinstance(self.std, (float, tuple, list)):
+            raise AssertionError("std should be either a tuple, a list or a float")
+    def __call__(
+        self,
+        img: np.ndarray,
+    ) -> np.ndarray:
+        # Normalize image
+        return (img - np.array(self.mean).astype(img.dtype)) / np.array(self.std).astype(img.dtype)
+    def __repr__(self) -> str:
+        _repr = f"mean={self.mean}, std={self.std}"
+        return f"{self.__class__.__name__}({_repr})"

onnxtr/utils/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .common_types import *
+from .data import *
+from .geometry import *
+from .vocabs import *

onnxtr/utils/common_types.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from pathlib import Path
+from typing import List, Tuple, Union
+__all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]
+Point2D = Tuple[float, float]
+BoundingBox = Tuple[Point2D, Point2D]
+Polygon4P = Tuple[Point2D, Point2D, Point2D, Point2D]
+Polygon = List[Point2D]
+AbstractPath = Union[str, Path]
+AbstractFile = Union[AbstractPath, bytes]
+Bbox = Tuple[float, float, float, float]

onnxtr/utils/data.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/datasets/utils.py
+import hashlib
+import logging
+import os
+import re
+import urllib
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Optional, Union
+from tqdm.auto import tqdm
+__all__ = ["download_from_url"]
+# matches bfd8deac from resnet18-bfd8deac.ckpt
+HASH_REGEX = re.compile(r"-([a-f0-9]*)\.")
+USER_AGENT = "felixdittrich92/OnnxTR"
+def _urlretrieve(url: str, filename: Union[Path, str], chunk_size: int = 1024) -> None:
+    with open(filename, "wb") as fh:
+        with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
+            with tqdm(total=response.length) as pbar:
+                for chunk in iter(lambda: response.read(chunk_size), ""):
+                    if not chunk:
+                        break
+                    pbar.update(chunk_size)
+                    fh.write(chunk)
+def _check_integrity(file_path: Union[str, Path], hash_prefix: str) -> bool:
+    with open(file_path, "rb") as f:
+        sha_hash = hashlib.sha256(f.read()).hexdigest()
+    return sha_hash[: len(hash_prefix)] == hash_prefix
+def download_from_url(
+    url: str,
+    file_name: Optional[str] = None,
+    hash_prefix: Optional[str] = None,
+    cache_dir: Optional[str] = None,
+    cache_subdir: Optional[str] = None,
+) -> Path:
+    """Download a file using its URL
+    >>> from onnxtr.models import download_from_url
+    >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
+    Args:
+    ----
+        url: the URL of the file to download
+        file_name: optional name of the file once downloaded
+        hash_prefix: optional expected SHA256 hash of the file
+        cache_dir: cache directory
+        cache_subdir: subfolder to use in the cache
+    Returns:
+    -------
+        the location of the downloaded file
+    Note:
+    ----
+        You can change cache directory location by using `ONNXTR_CACHE_DIR` environment variable.
+    """
+    if not isinstance(file_name, str):
+        file_name = url.rpartition("/")[-1].split("&")[0]
+    cache_dir = (
+        str(os.environ.get("ONNXTR_CACHE_DIR", os.path.join(os.path.expanduser("~"), ".cache", "onnxtr")))
+        if cache_dir is None
+        else cache_dir
+    )
+    # Check hash in file name
+    if hash_prefix is None:
+        r = HASH_REGEX.search(file_name)
+        hash_prefix = r.group(1) if r else None
+    folder_path = Path(cache_dir) if cache_subdir is None else Path(cache_dir, cache_subdir)
+    file_path = folder_path.joinpath(file_name)
+    # Check file existence
+    if file_path.is_file() and (hash_prefix is None or _check_integrity(file_path, hash_prefix)):
+        logging.info(f"Using downloaded & verified file: {file_path}")
+        return file_path
+    try:
+        # Create folder hierarchy
+        folder_path.mkdir(parents=True, exist_ok=True)
+    except OSError:
+        error_message = f"Failed creating cache direcotry at {folder_path}"
+        if os.environ.get("ONNXTR_CACHE_DIR", ""):
+            error_message += " using path from 'ONNXTR_CACHE_DIR' environment variable."
+        else:
+            error_message += (
+                ". You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed."
+            )
+        logging.error(error_message)
+        raise
+    # Download the file
+    try:
+        print(f"Downloading {url} to {file_path}")
+        _urlretrieve(url, file_path)
+    except (urllib.error.URLError, IOError) as e:  # pragma: no cover
+        if url[:5] == "https":
+            url = url.replace("https:", "http:")
+            print("Failed download. Trying https -> http instead." f" Downloading {url} to {file_path}")
+            _urlretrieve(url, file_path)
+        else:
+            raise e
+    # Remove corrupted files
+    if isinstance(hash_prefix, str) and not _check_integrity(file_path, hash_prefix):  # pragma: no cover
+        # Remove file
+        os.remove(file_path)
+        raise ValueError(f"corrupted download, the hash of {url} does not match its expected value")
+    return file_path

onnxtr/utils/fonts.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
+import platform
+from typing import Optional
+from PIL import ImageFont
+__all__ = ["get_font"]
+def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFont.ImageFont:
+    """Resolves a compatible ImageFont for the system
+    Args:
+    ----
+        font_family: the font family to use
+        font_size: the size of the font upon rendering
+    Returns:
+    -------
+        the Pillow font
+    """
+    # Font selection
+    if font_family is None:
+        try:
+            font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
+        except OSError:  # pragma: no cover
+            font = ImageFont.load_default()
+            logging.warning(
+                "unable to load recommended font family. Loading default PIL font,"
+                "font size issues may be expected."
+                "To prevent this, it is recommended to specify the value of 'font_family'."
+            )
+    else:  # pragma: no cover
+        font = ImageFont.truetype(font_family, font_size)
+    return font