PyPI - python-doctr - Versions diffs - 0.8.1__tar.gz → 0.10.0__tar.gz - Mend

python-doctr 0.8.1tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (190) hide show

{python-doctr-0.8.1/python_doctr.egg-info → python_doctr-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: python-doctr
-Version: 0.8.1
+Version: 0.10.0
 Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
 Author-email: Mindee <contact@mindee.com>
 Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,42 +219,45 @@ Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Natural Language :: English
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <4,>=3.8.0
+Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: importlib_metadata
-Requires-Dist: numpy<2.0.0,>=1.16.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
 Requires-Dist: scipy<2.0.0,>=1.4.0
 Requires-Dist: h5py<4.0.0,>=3.1.0
 Requires-Dist: opencv-python<5.0.0,>=4.5.0
-Requires-Dist: pypdfium2<5.0.0,>=4.0.0
+Requires-Dist: pypdfium2<5.0.0,>=4.11.0
 Requires-Dist: pyclipper<2.0.0,>=1.2.0
 Requires-Dist: shapely<3.0.0,>=1.6.0
 Requires-Dist: langdetect<2.0.0,>=1.0.9
 Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
 Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
-Requires-Dist: matplotlib>=3.1.0
-Requires-Dist: weasyprint>=55.0
 Requires-Dist: Pillow>=9.2.0
 Requires-Dist: defusedxml>=0.7.0
-Requires-Dist: mplcursors>=0.3
-Requires-Dist: unidecode>=1.0.0
+Requires-Dist: anyascii>=0.3.2
 Requires-Dist: tqdm>=4.30.0
 Provides-Extra: tf
-Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "tf"
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
+Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
 Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
 Provides-Extra: torch
-Requires-Dist: torch<3.0.0,>=1.12.0; extra == "torch"
-Requires-Dist: torchvision>=0.13.0; extra == "torch"
+Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
+Requires-Dist: torchvision>=0.15.0; extra == "torch"
 Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
+Provides-Extra: html
+Requires-Dist: weasyprint>=55.0; extra == "html"
+Provides-Extra: viz
+Requires-Dist: matplotlib>=3.1.0; extra == "viz"
+Requires-Dist: mplcursors>=0.3; extra == "viz"
+Provides-Extra: contrib
+Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
 Provides-Extra: testing
 Requires-Dist: pytest>=5.3.2; extra == "testing"
 Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
-Requires-Dist: hdf5storage>=0.1.18; extra == "testing"
 Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
 Requires-Dist: requests>=2.20.0; extra == "testing"
 Requires-Dist: psutil>=5.9.5; extra == "testing"
@@ -266,30 +269,33 @@ Provides-Extra: docs
 Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
 Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
 Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
-Requires-Dist: docutils<0.21; extra == "docs"
+Requires-Dist: docutils<0.22; extra == "docs"
 Requires-Dist: recommonmark>=0.7.1; extra == "docs"
 Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
 Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
 Requires-Dist: furo>=2022.3.4; extra == "docs"
 Provides-Extra: dev
-Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "dev"
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
+Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
 Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
-Requires-Dist: torch<3.0.0,>=1.12.0; extra == "dev"
-Requires-Dist: torchvision>=0.13.0; extra == "dev"
+Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
+Requires-Dist: torchvision>=0.15.0; extra == "dev"
 Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
+Requires-Dist: weasyprint>=55.0; extra == "dev"
+Requires-Dist: matplotlib>=3.1.0; extra == "dev"
+Requires-Dist: mplcursors>=0.3; extra == "dev"
 Requires-Dist: pytest>=5.3.2; extra == "dev"
 Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
-Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
 Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
 Requires-Dist: requests>=2.20.0; extra == "dev"
 Requires-Dist: psutil>=5.9.5; extra == "dev"
-Requires-Dist: ruff>=0.1.5; extra == "dev"
-Requires-Dist: mypy>=0.812; extra == "dev"
-Requires-Dist: pre-commit>=2.17.0; extra == "dev"
+Requires-Dist: ruff>=0.3.0; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+Requires-Dist: pre-commit>=3.0.0; extra == "dev"
 Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
 Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
 Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
-Requires-Dist: docutils<0.21; extra == "dev"
+Requires-Dist: docutils<0.22; extra == "dev"
 Requires-Dist: recommonmark>=0.7.1; extra == "dev"
 Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
 Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
@@ -299,7 +305,7 @@ Requires-Dist: furo>=2022.3.4; extra == "dev"
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
 pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
 # Image
 single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
-# Webpage
+# Webpage (requires `weasyprint` to be installed)
 webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
 # Multiple page images
 multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -372,6 +378,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 ```python
+# Display the result (requires matplotlib & mplcursors to be installed)
 result.show()
 ```
@@ -431,17 +438,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
 ### Prerequisites
-Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
-Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
-For MacOS users, you can install them as follows:
-```shell
-brew install cairo pango gdk-pixbuf libffi
-```
-For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
+Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 ### Latest release
@@ -460,12 +457,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
 pip install "python-doctr[tf]"
 # for PyTorch
 pip install "python-doctr[torch]"
+# optional dependencies for visualization, html, and contrib modules can be installed as follows:
+pip install "python-doctr[torch,viz,html,contib]"
 ```
 For MacBooks with M1 chip, you will need some additional packages or specific versions:
 - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
-- PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
+- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
 ### Developer mode
@@ -647,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
 ```python
 import requests
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
 with open('/path/to/your/doc.jpg', 'rb') as f:
-    data = f.read()
-response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("doc.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
 ```
 ### Example notebooks

{python-doctr-0.8.1 → python_doctr-0.10.0}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -37,7 +37,7 @@ from doctr.io import DocumentFile
 pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
 # Image
 single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
-# Webpage
+# Webpage (requires `weasyprint` to be installed)
 webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
 # Multiple page images
 multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -75,6 +75,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 ```python
+# Display the result (requires matplotlib & mplcursors to be installed)
 result.show()
 ```
@@ -134,17 +135,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
 ### Prerequisites
-Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
-Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
-For MacOS users, you can install them as follows:
-```shell
-brew install cairo pango gdk-pixbuf libffi
-```
-For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
+Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 ### Latest release
@@ -163,12 +154,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
 pip install "python-doctr[tf]"
 # for PyTorch
 pip install "python-doctr[torch]"
+# optional dependencies for visualization, html, and contrib modules can be installed as follows:
+pip install "python-doctr[torch,viz,html,contib]"
 ```
 For MacBooks with M1 chip, you will need some additional packages or specific versions:
 - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
-- PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
+- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
 ### Developer mode
@@ -350,9 +343,14 @@ Your API should now be running locally on your port 8002. Access your automatica
 ```python
 import requests
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
 with open('/path/to/your/doc.jpg', 'rb') as f:
-    data = f.read()
-response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("doc.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
 ```
 ### Example notebooks

{python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-from . import io, models, datasets, transforms, utils
+from . import io, models, datasets, contrib, transforms, utils
 from .file_utils import is_tf_available, is_torch_available
 from .version import __version__  # noqa: F401

python_doctr-0.10.0/doctr/contrib/__init__.py ADDED Viewed

File without changes

python_doctr-0.10.0/doctr/contrib/artefacts.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any, Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+from doctr.file_utils import requires_package
+from .base import _BasePredictor
+__all__ = ["ArtefactDetector"]
+default_cfgs: Dict[str, Dict[str, Any]] = {
+    "yolov8_artefact": {
+        "input_shape": (3, 1024, 1024),
+        "labels": ["bar_code", "qr_code", "logo", "photo"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
+    },
+}
+class ArtefactDetector(_BasePredictor):
+    """
+    A class to detect artefacts in images
+    >>> from doctr.io import DocumentFile
+    >>> from doctr.contrib.artefacts import ArtefactDetector
+    >>> doc = DocumentFile.from_images(["path/to/image.jpg"])
+    >>> detector = ArtefactDetector()
+    >>> results = detector(doc)
+    Args:
+    ----
+        arch: the architecture to use
+        batch_size: the batch size to use
+        model_path: the path to the model to use
+        labels: the labels to use
+        input_shape: the input shape to use
+        mask_labels: the mask labels to use
+        conf_threshold: the confidence threshold to use
+        iou_threshold: the intersection over union threshold to use
+        **kwargs: additional arguments to be passed to `download_from_url`
+    """
+    def __init__(
+        self,
+        arch: str = "yolov8_artefact",
+        batch_size: int = 2,
+        model_path: Optional[str] = None,
+        labels: Optional[List[str]] = None,
+        input_shape: Optional[Tuple[int, int, int]] = None,
+        conf_threshold: float = 0.5,
+        iou_threshold: float = 0.5,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
+        self.labels = labels or default_cfgs[arch]["labels"]
+        self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+    def preprocess(self, img: np.ndarray) -> np.ndarray:
+        return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
+    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
+        results = []
+        for batch in zip(output, input_images):
+            for out, img in zip(batch[0], batch[1]):
+                org_height, org_width = img.shape[:2]
+                width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
+                for res in out:
+                    sample_results = []
+                    for row in np.transpose(np.squeeze(res)):
+                        classes_scores = row[4:]
+                        max_score = np.amax(classes_scores)
+                        if max_score >= self.conf_threshold:
+                            class_id = np.argmax(classes_scores)
+                            x, y, w, h = row[0], row[1], row[2], row[3]
+                            # to rescaled xmin, ymin, xmax, ymax
+                            xmin = int((x - w / 2) * width_scale)
+                            ymin = int((y - h / 2) * height_scale)
+                            xmax = int((x + w / 2) * width_scale)
+                            ymax = int((y + h / 2) * height_scale)
+                            sample_results.append({
+                                "label": self.labels[class_id],
+                                "confidence": float(max_score),
+                                "box": [xmin, ymin, xmax, ymax],
+                            })
+                    # Filter out overlapping boxes
+                    boxes = [res["box"] for res in sample_results]
+                    scores = [res["confidence"] for res in sample_results]
+                    keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold)  # type: ignore[arg-type]
+                    sample_results = [sample_results[i] for i in keep_indices]
+                    results.append(sample_results)
+        self._results = results
+        return results
+    def show(self, **kwargs: Any) -> None:
+        """
+        Display the results
+        Args:
+        ----
+            **kwargs: additional keyword arguments to be passed to `plt.show`
+        """
+        requires_package("matplotlib", "`.show()` requires matplotlib installed")
+        import matplotlib.pyplot as plt
+        from matplotlib.patches import Rectangle
+        # visualize the results with matplotlib
+        if self._results and self._inputs:
+            for img, res in zip(self._inputs, self._results):
+                plt.figure(figsize=(10, 10))
+                plt.imshow(img)
+                for obj in res:
+                    xmin, ymin, xmax, ymax = obj["box"]
+                    label = obj["label"]
+                    plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
+                    plt.gca().add_patch(
+                        Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
+                    )
+                plt.show(**kwargs)

python_doctr-0.10.0/doctr/contrib/base.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any, List, Optional
+import numpy as np
+from doctr.file_utils import requires_package
+from doctr.utils.data import download_from_url
+class _BasePredictor:
+    """
+    Base class for all predictors
+    Args:
+    ----
+        batch_size: the batch size to use
+        url: the url to use to download a model if needed
+        model_path: the path to the model to use
+        **kwargs: additional arguments to be passed to `download_from_url`
+    """
+    def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
+        self.batch_size = batch_size
+        self.session = self._init_model(url, model_path, **kwargs)
+        self._inputs: List[np.ndarray] = []
+        self._results: List[Any] = []
+    def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
+        """
+        Download the model from the given url if needed
+        Args:
+        ----
+            url: the url to use
+            model_path: the path to the model to use
+            **kwargs: additional arguments to be passed to `download_from_url`
+        Returns:
+        -------
+            Any: the ONNX loaded model
+        """
+        requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
+        import onnxruntime as ort
+        if not url and not model_path:
+            raise ValueError("You must provide either a url or a model_path")
+        onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs))  # type: ignore[arg-type]
+        return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+    def preprocess(self, img: np.ndarray) -> np.ndarray:
+        """
+        Preprocess the input image
+        Args:
+        ----
+            img: the input image to preprocess
+        Returns:
+        -------
+            np.ndarray: the preprocessed image
+        """
+        raise NotImplementedError
+    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
+        """
+        Postprocess the model output
+        Args:
+        ----
+            output: the model output to postprocess
+            input_images: the input images used to generate the output
+        Returns:
+        -------
+            Any: the postprocessed output
+        """
+        raise NotImplementedError
+    def __call__(self, inputs: List[np.ndarray]) -> Any:
+        """
+        Call the model on the given inputs
+        Args:
+        ----
+            inputs: the inputs to use
+        Returns:
+        -------
+            Any: the postprocessed output
+        """
+        self._inputs = inputs
+        model_inputs = self.session.get_inputs()
+        batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
+        processed_batches = [
+            np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
+        ]
+        outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
+        return self.postprocess(outputs, batched_inputs)

{python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/cord.py RENAMED Viewed

@@ -33,6 +33,7 @@ class CORD(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -53,6 +54,7 @@ class CORD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -64,10 +66,15 @@ class CORD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         # List images
         tmp_root = os.path.join(self.root, "image")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         self.train = train
         np_dtype = np.float32
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
@@ -109,6 +116,8 @@ class CORD(VisionDataset):
                 )
                 for crop, label in zip(crops, list(text_targets)):
                     self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:
                 self.data.append((
                     img_path,

{python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/datasets/pytorch.py RENAMED Viewed

@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
     @staticmethod
     def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
         images, targets = zip(*samples)
-        images = torch.stack(images, dim=0)
+        images = torch.stack(images, dim=0)  # type: ignore[assignment]
-        return images, list(targets)
+        return images, list(targets)  # type: ignore[return-value]
 class VisionDataset(AbstractDataset, _VisionDataset):  # noqa: D101

{python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/funsd.py RENAMED Viewed

@@ -33,6 +33,7 @@ class FUNSD(VisionDataset):
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -45,6 +46,7 @@ class FUNSD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -55,6 +57,12 @@ class FUNSD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         np_dtype = np.float32
@@ -63,7 +71,7 @@ class FUNSD(VisionDataset):
         # # List images
         tmp_root = os.path.join(self.root, subfolder, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
         for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
@@ -100,6 +108,8 @@ class FUNSD(VisionDataset):
                     # filter labels with unknown characters
                     if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
             else:
                 self.data.append((
                     img_path,

python-doctr 0.8.1__tar.gz → 0.10.0__tar.gz

python-doctr 0.8.1tar.gz → 0.10.0tar.gz