PyPI - onnxtr - Versions diffs - 0.3.2__tar.gz → 0.4.1__tar.gz - Mend

onnxtr 0.3.2tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

{onnxtr-0.3.2 → onnxtr-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.3.2
+Version: 0.4.1
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -225,7 +225,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: numpy<2.0.0,>=1.16.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
 Requires-Dist: scipy<2.0.0,>=1.4.0
 Requires-Dist: opencv-python<5.0.0,>=4.5.0
 Requires-Dist: pypdfium2<5.0.0,>=4.11.0
@@ -233,6 +233,7 @@ Requires-Dist: pyclipper<2.0.0,>=1.2.0
 Requires-Dist: shapely<3.0.0,>=1.6.0
 Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
 Requires-Dist: langdetect<2.0.0,>=1.0.9
+Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
 Requires-Dist: Pillow>=9.2.0
 Requires-Dist: defusedxml>=0.7.0
 Requires-Dist: anyascii>=0.3.2
@@ -275,7 +276,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.4.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -449,6 +450,69 @@ det_model = linknet_resnet18("path_to_custom_model.onnx")
 model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ```
+## Loading models from HuggingFace Hub
+You can also load models from the HuggingFace Hub:
+```python
+from onnxtr.io import DocumentFile
+from onnxtr.models import ocr_predictor, from_hub
+img = DocumentFile.from_images(['<image_path>'])
+# Load your model from the hub
+model = from_hub('onnxtr/my-model')
+# Pass it to the predictor
+# If your model is a recognition model:
+predictor = ocr_predictor(
+    det_arch='db_mobilenet_v3_large',
+    reco_arch=model
+)
+# If your model is a detection model:
+predictor = ocr_predictor(
+    det_arch=model,
+    reco_arch='crnn_mobilenet_v3_small'
+)
+# Get your predictions
+res = predictor(img)
+```
+HF Hub search: [here](https://huggingface.co/models?search=onnxtr).
+Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842)
+Or push your own models to the hub:
+```python
+from onnxtr.models import parseq, push_to_hf_hub, login_to_hub
+from onnxtr.utils.vocabs import VOCABS
+# Login to the hub
+login_to_hub()
+# Recogniton model
+model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"])
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-parseq-multilingual-v1",
+    task="recognition",  # The task for which the model is intended [detection, recognition, classification]
+    arch="parseq",  # The name of the model architecture
+    override=False  # Set to `True` if you want to override an existing model / repository
+)
+# Detection model
+model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx")
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-linknet-resnet18",
+    task="detection",
+    arch="linknet_resnet18",
+    override=True
+)
+```
 ## Models architectures
 Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
@@ -525,8 +589,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -538,7 +602,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -557,6 +621,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.3.2 → onnxtr-0.4.1}/README.md RENAMED Viewed

@@ -7,7 +7,7 @@
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.4.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -181,6 +181,69 @@ det_model = linknet_resnet18("path_to_custom_model.onnx")
 model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ```
+## Loading models from HuggingFace Hub
+You can also load models from the HuggingFace Hub:
+```python
+from onnxtr.io import DocumentFile
+from onnxtr.models import ocr_predictor, from_hub
+img = DocumentFile.from_images(['<image_path>'])
+# Load your model from the hub
+model = from_hub('onnxtr/my-model')
+# Pass it to the predictor
+# If your model is a recognition model:
+predictor = ocr_predictor(
+    det_arch='db_mobilenet_v3_large',
+    reco_arch=model
+)
+# If your model is a detection model:
+predictor = ocr_predictor(
+    det_arch=model,
+    reco_arch='crnn_mobilenet_v3_small'
+)
+# Get your predictions
+res = predictor(img)
+```
+HF Hub search: [here](https://huggingface.co/models?search=onnxtr).
+Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842)
+Or push your own models to the hub:
+```python
+from onnxtr.models import parseq, push_to_hf_hub, login_to_hub
+from onnxtr.utils.vocabs import VOCABS
+# Login to the hub
+login_to_hub()
+# Recogniton model
+model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"])
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-parseq-multilingual-v1",
+    task="recognition",  # The task for which the model is intended [detection, recognition, classification]
+    arch="parseq",  # The name of the model architecture
+    override=False  # Set to `True` if you want to override an existing model / repository
+)
+# Detection model
+model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx")
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-linknet-resnet18",
+    task="detection",
+    arch="linknet_resnet18",
+    override=True
+)
+```
 ## Models architectures
 Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
@@ -257,8 +320,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -270,7 +333,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -289,6 +352,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/__init__.py RENAMED Viewed

@@ -3,3 +3,4 @@ from .classification import *
 from .detection import *
 from .recognition import *
 from .zoo import *
+from .factory import *

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/classification/models/mobilenet.py RENAMED Viewed

@@ -56,6 +56,7 @@ class MobileNetV3(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
     def __call__(

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/detection/models/differentiable_binarization.py RENAMED Viewed

@@ -64,8 +64,10 @@ class DBNet(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages
         self.postprocessor = GeneralDetectionPostProcessor(
             assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
         )

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/detection/models/fast.py RENAMED Viewed

@@ -62,6 +62,7 @@ class FAST(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/detection/models/linknet.py RENAMED Viewed

@@ -64,6 +64,7 @@ class LinkNet(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.cfg = cfg
         self.assume_straight_pages = assume_straight_pages

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/engine.py RENAMED Viewed

@@ -90,6 +90,8 @@ class Engine:
     def __init__(self, url: str, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any) -> None:
         engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig()
         archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
+        # Store model path for each model
+        self.model_path = archive_path
         self.session_options = engine_cfg.session_options
         self.providers = engine_cfg.providers
         self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options)

onnxtr-0.4.1/onnxtr/models/factory/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .hub import *

onnxtr-0.4.1/onnxtr/models/factory/hub.py ADDED Viewed

@@ -0,0 +1,224 @@
+# Copyright (C) 2021-2024, Mindee | Felix Dittrich.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+# Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py
+import json
+import logging
+import os
+import shutil
+import subprocess
+import textwrap
+from pathlib import Path
+from typing import Any, Optional
+from huggingface_hub import (
+    HfApi,
+    Repository,
+    get_token,
+    get_token_permission,
+    hf_hub_download,
+    login,
+)
+from onnxtr import models
+from onnxtr.models.engine import EngineConfig
+__all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
+AVAILABLE_ARCHS = {
+    "classification": models.classification.zoo.ORIENTATION_ARCHS,
+    "detection": models.detection.zoo.ARCHS,
+    "recognition": models.recognition.zoo.ARCHS,
+}
+def login_to_hub() -> None:  # pragma: no cover
+    """Login to huggingface hub"""
+    access_token = get_token()
+    if access_token is not None and get_token_permission(access_token):
+        logging.info("Huggingface Hub token found and valid")
+        login(token=access_token, write_permission=True)
+    else:
+        login()
+    # check if git lfs is installed
+    try:
+        subprocess.call(["git", "lfs", "version"])
+    except FileNotFoundError:
+        raise OSError(
+            "Looks like you do not have git-lfs installed, please install. \
+                      You can install from https://git-lfs.github.com/. \
+                      Then run `git lfs install` (you only have to do this once)."
+        )
+def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None:
+    """Save model and config to disk for pushing to huggingface hub
+    Args:
+    ----
+        model: Onnx model to be saved
+        save_dir: directory to save model and config
+        arch: architecture name
+        task: task name
+    """
+    save_directory = Path(save_dir)
+    shutil.copy2(model.model_path, save_directory / "model.onnx")
+    config_path = save_directory / "config.json"
+    # add model configuration
+    model_config = model.cfg
+    model_config["arch"] = arch
+    model_config["task"] = task
+    with config_path.open("w") as f:
+        json.dump(model_config, f, indent=2, ensure_ascii=False)
+def push_to_hf_hub(
+    model: Any, model_name: str, task: str, override: bool = False, **kwargs
+) -> None:  # pragma: no cover
+    """Save model and its configuration on HF hub
+    >>> from onnxtr.models import login_to_hub, push_to_hf_hub
+    >>> from onnxtr.models.recognition import crnn_mobilenet_v3_small
+    >>> login_to_hub()
+    >>> model = crnn_mobilenet_v3_small()
+    >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')
+    Args:
+    ----
+        model: Onnx model to be saved
+        model_name: name of the model which is also the repository name
+        task: task name
+        override: whether to override the existing model / repo on HF hub
+        **kwargs: keyword arguments for push_to_hf_hub
+    """
+    run_config = kwargs.get("run_config", None)
+    arch = kwargs.get("arch", None)
+    if run_config is None and arch is None:
+        raise ValueError("run_config or arch must be specified")
+    if task not in ["classification", "detection", "recognition"]:
+        raise ValueError("task must be one of classification, detection, recognition")
+    # default readme
+    readme = textwrap.dedent(
+        f"""
+    ---
+    language:
+    - en
+    - fr
+    license: apache-2.0
+    ---
+    <p align="center">
+    <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
+    </p>
+    **Optical Character Recognition made seamless & accessible to anyone, powered by Onnxruntime**
+    ## Task: {task}
+    https://github.com/felixdittrich92/OnnxTR
+    ### Example usage:
+    ```python
+    >>> from onnxtr.io import DocumentFile
+    >>> from onnxtr.models import ocr_predictor, from_hub
+    >>> img = DocumentFile.from_images(['<image_path>'])
+    >>> # Load your model from the hub
+    >>> model = from_hub('onnxtr/my-model')
+    >>> # Pass it to the predictor
+    >>> # If your model is a recognition model:
+    >>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large',
+    >>>                           reco_arch=model)
+    >>> # If your model is a detection model:
+    >>> predictor = ocr_predictor(det_arch=model,
+    >>>                           reco_arch='crnn_mobilenet_v3_small')
+    >>> # Get your predictions
+    >>> res = predictor(img)
+    ```
+    """
+    )
+    # add run configuration to readme if available
+    if run_config is not None:
+        arch = run_config.arch
+        readme += textwrap.dedent(
+            f"""### Run Configuration
+                                  \n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}"""
+        )
+    if arch not in AVAILABLE_ARCHS[task]:
+        raise ValueError(
+            f"Architecture: {arch} for task: {task} not found.\
+                         \nAvailable architectures: {AVAILABLE_ARCHS}"
+        )
+    commit_message = f"Add {model_name} model"
+    local_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "hub", model_name)
+    repo_url = HfApi().create_repo(model_name, token=get_token(), exist_ok=override)
+    repo = Repository(local_dir=local_cache_dir, clone_from=repo_url)
+    with repo.commit(commit_message):
+        _save_model_and_config_for_hf_hub(model, repo.local_dir, arch=arch, task=task)
+        readme_path = Path(repo.local_dir) / "README.md"
+        readme_path.write_text(readme)
+    repo.git_push()
+def from_hub(repo_id: str, engine_cfg: Optional[EngineConfig] = None, **kwargs: Any):
+    """Instantiate & load a pretrained model from HF hub.
+    >>> from onnxtr.models import from_hub
+    >>> model = from_hub("onnxtr/my-model")
+    Args:
+    ----
+        repo_id: HuggingFace model hub repo
+        engine_cfg: configuration for the inference engine (optional)
+        kwargs: kwargs of `hf_hub_download`
+    Returns:
+    -------
+        Model loaded with the checkpoint
+    """
+    # Get the config
+    with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f:
+        cfg = json.load(f)
+        model_path = hf_hub_download(repo_id, filename="model.onnx", **kwargs)
+    arch = cfg["arch"]
+    task = cfg["task"]
+    cfg.pop("arch")
+    cfg.pop("task")
+    if task == "classification":
+        model = models.classification.__dict__[arch](model_path, classes=cfg["classes"], engine_cfg=engine_cfg)
+    elif task == "detection":
+        model = models.detection.__dict__[arch](model_path, engine_cfg=engine_cfg)
+    elif task == "recognition":
+        model = models.recognition.__dict__[arch](
+            model_path, input_shape=cfg["input_shape"], vocab=cfg["vocab"], engine_cfg=engine_cfg
+        )
+    # convert all values which are lists to tuples
+    for key, value in cfg.items():
+        if isinstance(value, list):
+            cfg[key] = tuple(value)
+    # update model cfg
+    model.cfg = cfg
+    return model

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/predictor/base.py RENAMED Viewed

@@ -112,8 +112,8 @@ class _OCRPredictor:
             ]
         )
         return [
-            # We exapnd if the page is wider than tall and the angle is 90 or -90
-            rotate_image(page, angle, expand=page.shape[1] > page.shape[0] and abs(angle) == 90)
+            # expand if height and width are not equal
+            rotate_image(page, angle, expand=page.shape[0] != page.shape[1])
             for page, angle in zip(pages, origin_pages_orientations)
         ]

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/predictor/predictor.py RENAMED Viewed

@@ -100,6 +100,8 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             origin_pages_orientations = None
         if self.straighten_pages:
             pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/recognition/models/crnn.py RENAMED Viewed

@@ -129,8 +129,10 @@ class CRNN(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.vocab = vocab
         self.cfg = cfg
         self.postprocessor = CRNNPostProcessor(self.vocab)
     def __call__(

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/recognition/models/master.py RENAMED Viewed

@@ -53,6 +53,7 @@ class MASTER(Engine):
         self.vocab = vocab
         self.cfg = cfg
         self.postprocessor = MASTERPostProcessor(vocab=self.vocab)
     def __call__(

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/recognition/models/parseq.py RENAMED Viewed

@@ -49,8 +49,10 @@ class PARSeq(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.vocab = vocab
         self.cfg = cfg
         self.postprocessor = PARSeqPostProcessor(vocab=self.vocab)
     def __call__(

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/recognition/models/sar.py RENAMED Viewed

@@ -49,8 +49,10 @@ class SAR(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.vocab = vocab
         self.cfg = cfg
         self.postprocessor = SARPostProcessor(self.vocab)
     def __call__(

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/models/recognition/models/vitstr.py RENAMED Viewed

@@ -57,6 +57,7 @@ class ViTSTR(Engine):
         **kwargs: Any,
     ) -> None:
         super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
         self.vocab = vocab
         self.cfg = cfg

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr/utils/vocabs.py RENAMED Viewed

@@ -17,9 +17,14 @@ VOCABS: Dict[str, str] = {
     "ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
     "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
     "persian_letters": "پچڢڤگ",
-    "hindi_digits": "٠١٢٣٤٥٦٧٨٩",
+    "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
     "arabic_diacritics": "ًٌٍَُِّْ",
     "arabic_punctuation": "؟؛«»—",
+    "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔअंअःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
+    "hindi_digits": "०१२३४५६७८९",
+    "hindi_punctuation": "।,?!:्ॐ॰॥॰",
+    "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
+    "bangla_digits": "০১২৩৪৫৬৭৮৯",
 }
 VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -32,7 +37,7 @@ VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙ
 VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
 VOCABS["arabic"] = (
     VOCABS["digits"]
-    + VOCABS["hindi_digits"]
+    + VOCABS["arabic_digits"]
     + VOCABS["arabic_letters"]
     + VOCABS["persian_letters"]
     + VOCABS["arabic_diacritics"]
@@ -48,10 +53,12 @@ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
 VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
 VOCABS["vietnamese"] = (
     VOCABS["english"]
-    + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
-    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
+    + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
+    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
 )
 VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
+VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
+VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
 VOCABS["multilingual"] = "".join(
     dict.fromkeys(
         VOCABS["french"]

onnxtr-0.4.1/onnxtr/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = 'v0.4.1'

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onnxtr
-Version: 0.3.2
+Version: 0.4.1
 Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
 Author-email: Felix Dittrich <felixdittrich92@gmail.com>
 Maintainer: Felix Dittrich
@@ -225,7 +225,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: numpy<2.0.0,>=1.16.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
 Requires-Dist: scipy<2.0.0,>=1.4.0
 Requires-Dist: opencv-python<5.0.0,>=4.5.0
 Requires-Dist: pypdfium2<5.0.0,>=4.11.0
@@ -233,6 +233,7 @@ Requires-Dist: pyclipper<2.0.0,>=1.2.0
 Requires-Dist: shapely<3.0.0,>=1.6.0
 Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
 Requires-Dist: langdetect<2.0.0,>=1.0.9
+Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
 Requires-Dist: Pillow>=9.2.0
 Requires-Dist: defusedxml>=0.7.0
 Requires-Dist: anyascii>=0.3.2
@@ -275,7 +276,7 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
 [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
-[![Pypi](https://img.shields.io/badge/pypi-v0.3.1-blue.svg)](https://pypi.org/project/OnnxTR/)
+[![Pypi](https://img.shields.io/badge/pypi-v0.4.1-blue.svg)](https://pypi.org/project/OnnxTR/)
 > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
@@ -449,6 +450,69 @@ det_model = linknet_resnet18("path_to_custom_model.onnx")
 model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
 ```
+## Loading models from HuggingFace Hub
+You can also load models from the HuggingFace Hub:
+```python
+from onnxtr.io import DocumentFile
+from onnxtr.models import ocr_predictor, from_hub
+img = DocumentFile.from_images(['<image_path>'])
+# Load your model from the hub
+model = from_hub('onnxtr/my-model')
+# Pass it to the predictor
+# If your model is a recognition model:
+predictor = ocr_predictor(
+    det_arch='db_mobilenet_v3_large',
+    reco_arch=model
+)
+# If your model is a detection model:
+predictor = ocr_predictor(
+    det_arch=model,
+    reco_arch='crnn_mobilenet_v3_small'
+)
+# Get your predictions
+res = predictor(img)
+```
+HF Hub search: [here](https://huggingface.co/models?search=onnxtr).
+Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842)
+Or push your own models to the hub:
+```python
+from onnxtr.models import parseq, push_to_hf_hub, login_to_hub
+from onnxtr.utils.vocabs import VOCABS
+# Login to the hub
+login_to_hub()
+# Recogniton model
+model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"])
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-parseq-multilingual-v1",
+    task="recognition",  # The task for which the model is intended [detection, recognition, classification]
+    arch="parseq",  # The name of the model architecture
+    override=False  # Set to `True` if you want to override an existing model / repository
+)
+# Detection model
+model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx")
+push_to_hf_hub(
+    model,
+    model_name="onnxtr-linknet-resnet18",
+    task="detection",
+    arch="linknet_resnet18",
+    override=True
+)
+```
 ## Models architectures
 Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
@@ -525,8 +589,8 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |Library                          |FUNSD (199 pages)              |CORD  (900 pages)              |
 |---------------------------------|-------------------------------|-------------------------------|
 |docTR (CPU) - v0.8.1             | ~1.29s / Page                 | ~0.60s / Page                 |
-|**OnnxTR (CPU)** - v0.1.2        | ~0.57s / Page                 | **~0.25s / Page**             |
-|**OnnxTR (CPU) 8-bit** - v0.1.2  | **~0.38s / Page**             | **~0.14s / Page**             |
+|**OnnxTR (CPU)** - v0.4.1        | ~0.57s / Page                 | **~0.25s / Page**             |
+|**OnnxTR (CPU) 8-bit** - v0.4.1  | **~0.38s / Page**             | **~0.14s / Page**             |
 |EasyOCR (CPU) - v1.7.1           | ~1.96s / Page                 | ~1.75s / Page                 |
 |**PyTesseract (CPU)** - v0.3.10  | **~0.50s / Page**             | ~0.52s / Page                 |
 |Surya (line) (CPU) - v0.4.4      | ~48.76s / Page                | ~35.49s / Page                |
@@ -538,7 +602,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_
 |-------------------------------------|-------------------------------|-------------------------------|
 |docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
 |**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
-|OnnxTR (GPU) - v0.1.2                | **~0.06s / Page**             | ~0.04s / Page                 |
+|OnnxTR (GPU) - v0.4.1                | **~0.06s / Page**             | ~0.04s / Page                 |
 |EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
 |Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
 |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |
@@ -557,6 +621,16 @@ If you wish to cite please refer to the base project citation, feel free to use
 }
 ```
+```bibtex
+@misc{onnxtr2024,
+    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
+    author={Felix Dittrich},
+    year={2024},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
+}
+```
 ## License
 Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr.egg-info/SOURCES.txt RENAMED Viewed

@@ -45,6 +45,8 @@ onnxtr/models/detection/postprocessor/__init__.py
 onnxtr/models/detection/postprocessor/base.py
 onnxtr/models/detection/predictor/__init__.py
 onnxtr/models/detection/predictor/base.py
+onnxtr/models/factory/__init__.py
+onnxtr/models/factory/hub.py
 onnxtr/models/predictor/__init__.py
 onnxtr/models/predictor/base.py
 onnxtr/models/predictor/predictor.py

{onnxtr-0.3.2 → onnxtr-0.4.1}/onnxtr.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-numpy<2.0.0,>=1.16.0
+numpy<3.0.0,>=1.16.0
 scipy<2.0.0,>=1.4.0
 opencv-python<5.0.0,>=4.5.0
 pypdfium2<5.0.0,>=4.11.0
@@ -6,6 +6,7 @@ pyclipper<2.0.0,>=1.2.0
 shapely<3.0.0,>=1.6.0
 rapidfuzz<4.0.0,>=3.0.0
 langdetect<2.0.0,>=1.0.9
+huggingface-hub<1.0.0,>=0.23.0
 Pillow>=9.2.0
 defusedxml>=0.7.0
 anyascii>=0.3.2

{onnxtr-0.3.2 → onnxtr-0.4.1}/pyproject.toml RENAMED Viewed

@@ -31,7 +31,7 @@ dynamic = ["version"]
 dependencies = [
     # For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
     # Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
-    "numpy>=1.16.0,<2.0.0",
+    "numpy>=1.16.0,<3.0.0",
     "scipy>=1.4.0,<2.0.0",
     "opencv-python>=4.5.0,<5.0.0",
     "pypdfium2>=4.11.0,<5.0.0",
@@ -39,6 +39,7 @@ dependencies = [
     "shapely>=1.6.0,<3.0.0",
     "rapidfuzz>=3.0.0,<4.0.0",
     "langdetect>=1.0.9,<2.0.0",
+    "huggingface-hub>=0.23.0,<1.0.0",
     "Pillow>=9.2.0",
     "defusedxml>=0.7.0",
     "anyascii>=0.3.2",
@@ -126,6 +127,7 @@ module = [
 	"weasyprint.*",
 	"pypdfium2.*",
 	"langdetect.*",
+    "huggingface_hub.*",
     "rapidfuzz.*",
     "anyascii.*",
     "tqdm.*",

{onnxtr-0.3.2 → onnxtr-0.4.1}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ from pathlib import Path
 from setuptools import setup
 PKG_NAME = "onnxtr"
-VERSION = os.getenv("BUILD_VERSION", "0.3.2a0")
+VERSION = os.getenv("BUILD_VERSION", "0.4.1a0")
 if __name__ == "__main__":