PyPI - python-doctr - Versions diffs - 0.10.0__tar.gz → 0.12.0__tar.gz - Mend

python-doctr 0.10.0tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (218) hide show

{python_doctr-0.10.0 → python_doctr-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: python-doctr
-Version: 0.10.0
+Version: 0.12.0
 Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
 Author-email: Mindee <contact@mindee.com>
 Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,11 +219,11 @@ Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Natural Language :: English
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <4,>=3.9.0
+Requires-Python: <4,>=3.10.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy<3.0.0,>=1.16.0
@@ -239,9 +239,11 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
 Requires-Dist: Pillow>=9.2.0
 Requires-Dist: defusedxml>=0.7.0
 Requires-Dist: anyascii>=0.3.2
+Requires-Dist: validators>=0.18.0
 Requires-Dist: tqdm>=4.30.0
 Provides-Extra: tf
-Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
+Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
 Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
 Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
 Provides-Extra: torch
@@ -275,7 +277,8 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
 Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
 Requires-Dist: furo>=2022.3.4; extra == "docs"
 Provides-Extra: dev
-Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
+Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
 Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
 Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
@@ -300,12 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
 Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
 Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
 Requires-Dist: furo>=2022.3.4; extra == "dev"
+Dynamic: license-file
 <p align="center">
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -436,9 +440,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
 ## Installation
+> [!WARNING]
+> **TensorFlow Backend Deprecation Notice**
+>
+> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
+> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
+> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
+>
+> This decision was made based on several considerations:
+>
+> - Allows better focus on improving the core library
+> - Frees up resources to develop new features faster
+> - Enables more targeted optimizations with PyTorch
 ### Prerequisites
-Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
+Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 ### Latest release
@@ -502,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
 - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
 - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
 - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
+- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
 ## More goodies
@@ -557,37 +575,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
 ### Docker container
-[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
+We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
 #### Using GPU with docTR Docker Images
-The docTR Docker images are GPU-ready and based on CUDA `11.8`.
-However, to use GPU support with these Docker images, please ensure that Docker is configured to use your GPU.
+The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
+Please ensure that Docker is configured to use your GPU.
 To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
 Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
 ```shell
-docker run -it --gpus all ghcr.io/mindee/doctr:tf-py3.8.18-gpu-2023-09 bash
+docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
 ```
 #### Available Tags
-The Docker images for docTR follow a specific tag nomenclature: `<framework>-py<python_version>-<system>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
+The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
-- `<framework>`: `tf` (TensorFlow) or `torch` (PyTorch).
-- `<python_version>`: `3.8.18`, `3.9.18`, or `3.10.13`.
-- `<system>`: `cpu` or `gpu`
-- `<doctr_version>`: a tag >= `v0.7.1`
-- `<YYYY-MM>`: e.g. `2023-09`
+- `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
+- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
+- `<doctr_version>`: a tag >= `v0.11.0`
+- `<YYYY-MM>`: e.g. `2014-10`
 Here are examples of different image tags:
 | Tag                        | Description                                       |
 |----------------------------|---------------------------------------------------|
-| `tf-py3.8.18-cpu-v0.7.1`       | TensorFlow version `3.8.18` with docTR `v0.7.1`. |
-| `torch-py3.9.18-gpu-2023-09`| PyTorch version `3.9.18` with GPU support and a monthly build from `2023-09`. |
+| `tf-py3.10.13-v0.11.0`       | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
+| `torch-viz-html-contrib-py3.11.8-2024-10`       | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
+| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
 #### Building Docker Images Locally

{python_doctr-0.10.0 → python_doctr-0.12.0}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -133,9 +133,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
 ## Installation
+> [!WARNING]
+> **TensorFlow Backend Deprecation Notice**
+>
+> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
+> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
+> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
+>
+> This decision was made based on several considerations:
+>
+> - Allows better focus on improving the core library
+> - Frees up resources to develop new features faster
+> - Enables more targeted optimizations with PyTorch
 ### Prerequisites
-Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
+Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 ### Latest release
@@ -199,6 +212,7 @@ Credits where it's due: this repository is implementing, among others, architect
 - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
 - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
 - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
+- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
 ## More goodies
@@ -254,37 +268,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
 ### Docker container
-[We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
+We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
 #### Using GPU with docTR Docker Images
-The docTR Docker images are GPU-ready and based on CUDA `11.8`.
-However, to use GPU support with these Docker images, please ensure that Docker is configured to use your GPU.
+The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
+Please ensure that Docker is configured to use your GPU.
 To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
 Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
 ```shell
-docker run -it --gpus all ghcr.io/mindee/doctr:tf-py3.8.18-gpu-2023-09 bash
+docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
 ```
 #### Available Tags
-The Docker images for docTR follow a specific tag nomenclature: `<framework>-py<python_version>-<system>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
+The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
-- `<framework>`: `tf` (TensorFlow) or `torch` (PyTorch).
-- `<python_version>`: `3.8.18`, `3.9.18`, or `3.10.13`.
-- `<system>`: `cpu` or `gpu`
-- `<doctr_version>`: a tag >= `v0.7.1`
-- `<YYYY-MM>`: e.g. `2023-09`
+- `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
+- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
+- `<doctr_version>`: a tag >= `v0.11.0`
+- `<YYYY-MM>`: e.g. `2014-10`
 Here are examples of different image tags:
 | Tag                        | Description                                       |
 |----------------------------|---------------------------------------------------|
-| `tf-py3.8.18-cpu-v0.7.1`       | TensorFlow version `3.8.18` with docTR `v0.7.1`. |
-| `torch-py3.9.18-gpu-2023-09`| PyTorch version `3.9.18` with GPU support and a monthly build from `2023-09`. |
+| `tf-py3.10.13-v0.11.0`       | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
+| `torch-viz-html-contrib-py3.11.8-2024-10`       | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
+| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
 #### Building Docker Images Locally

python_doctr-0.12.0/doctr/contrib/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .artefacts import ArtefactDetector

{python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/artefacts.py RENAMED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import cv2
 import numpy as np
@@ -14,7 +14,7 @@ from .base import _BasePredictor
 __all__ = ["ArtefactDetector"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "yolov8_artefact": {
         "input_shape": (3, 1024, 1024),
         "labels": ["bar_code", "qr_code", "logo", "photo"],
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
     >>> results = detector(doc)
     Args:
-    ----
         arch: the architecture to use
         batch_size: the batch size to use
         model_path: the path to the model to use
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
         self,
         arch: str = "yolov8_artefact",
         batch_size: int = 2,
-        model_path: Optional[str] = None,
-        labels: Optional[List[str]] = None,
-        input_shape: Optional[Tuple[int, int, int]] = None,
+        model_path: str | None = None,
+        labels: list[str] | None = None,
+        input_shape: tuple[int, int, int] | None = None,
         conf_threshold: float = 0.5,
         iou_threshold: float = 0.5,
         **kwargs: Any,
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
     def preprocess(self, img: np.ndarray) -> np.ndarray:
         return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
-    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
+    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
         results = []
         for batch in zip(output, input_images):
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
         Display the results
         Args:
-        ----
             **kwargs: additional keyword arguments to be passed to `plt.show`
         """
         requires_package("matplotlib", "`.show()` requires matplotlib installed")

{python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/base.py RENAMED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Optional
+from typing import Any
 import numpy as np
@@ -16,32 +16,29 @@ class _BasePredictor:
     Base class for all predictors
     Args:
-    ----
         batch_size: the batch size to use
         url: the url to use to download a model if needed
         model_path: the path to the model to use
         **kwargs: additional arguments to be passed to `download_from_url`
     """
-    def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
+    def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
         self.batch_size = batch_size
         self.session = self._init_model(url, model_path, **kwargs)
-        self._inputs: List[np.ndarray] = []
-        self._results: List[Any] = []
+        self._inputs: list[np.ndarray] = []
+        self._results: list[Any] = []
-    def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
+    def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
         """
         Download the model from the given url if needed
         Args:
-        ----
             url: the url to use
             model_path: the path to the model to use
             **kwargs: additional arguments to be passed to `download_from_url`
         Returns:
-        -------
             Any: the ONNX loaded model
         """
         requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
@@ -57,40 +54,34 @@ class _BasePredictor:
         Preprocess the input image
         Args:
-        ----
             img: the input image to preprocess
         Returns:
-        -------
             np.ndarray: the preprocessed image
         """
         raise NotImplementedError
-    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
+    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
         """
         Postprocess the model output
         Args:
-        ----
             output: the model output to postprocess
             input_images: the input images used to generate the output
         Returns:
-        -------
             Any: the postprocessed output
         """
         raise NotImplementedError
-    def __call__(self, inputs: List[np.ndarray]) -> Any:
+    def __call__(self, inputs: list[np.ndarray]) -> Any:
         """
         Call the model on the given inputs
         Args:
-        ----
             inputs: the inputs to use
         Returns:
-        -------
             Any: the postprocessed output
         """
         self._inputs = inputs

{python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from doctr.file_utils import is_tf_available
 from .generator import *
+from .coco_text import *
 from .cord import *
 from .detection import *
 from .doc_artefacts import *

python_doctr-0.12.0/doctr/datasets/coco_text.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import json
+import os
+from pathlib import Path
+from typing import Any
+import numpy as np
+from tqdm import tqdm
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+__all__ = ["COCOTEXT"]
+class COCOTEXT(AbstractDataset):
+    """
+    COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
+    <https://arxiv.org/pdf/1601.07140v2>`_ |
+    `"homepage" <https://bgshih.github.io/cocotext/>`_.
+    >>> # NOTE: You need to download the dataset first.
+    >>> from doctr.datasets import COCOTEXT
+    >>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
+    >>>                     label_path="/path/to/coco_text/cocotext.v2.json")
+    >>> img, target = train_set[0]
+    >>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
+    >>> label_path = "/path/to/coco_text/cocotext.v2.json")
+    >>> img, target = test_set[0]
+    Args:
+        img_folder: folder with all the images of the dataset
+        label_path: path to the annotations file of the dataset
+        train: whether the subset should be the training one
+        use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
+        recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
+        **kwargs: keyword arguments from `AbstractDataset`.
+    """
+    def __init__(
+        self,
+        img_folder: str,
+        label_path: str,
+        train: bool = True,
+        use_polygons: bool = False,
+        recognition_task: bool = False,
+        detection_task: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(
+            img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
+        )
+        # Task check
+        if recognition_task and detection_task:
+            raise ValueError(
+                " 'recognition' and 'detection task' cannot be set to True simultaneously. "
+                + " To get the whole dataset with boxes and labels leave both parameters to False "
+            )
+        # File existence check
+        if not os.path.exists(label_path) or not os.path.exists(img_folder):
+            raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
+        tmp_root = img_folder
+        self.train = train
+        np_dtype = np.float32
+        self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
+        with open(label_path, "r") as file:
+            data = json.load(file)
+        # Filter images based on the set
+        img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
+        box: list[float] | np.ndarray
+        for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
+            img_path = os.path.join(img_folder, img_info["file_name"])
+            # File existence check
+            if not os.path.exists(img_path):  # pragma: no cover
+                raise FileNotFoundError(f"Unable to locate {img_path}")
+            # Get annotations for the current image (only legible text)
+            annotations = [
+                ann
+                for ann in data["anns"].values()
+                if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
+            ]
+            # Some images have no annotations with readable text
+            if not annotations:  # pragma: no cover
+                continue
+            _targets = []
+            for annotation in annotations:
+                x, y, w, h = annotation["bbox"]
+                if use_polygons:
+                    # (x, y) coordinates of top left, top right, bottom right, bottom left corners
+                    box = np.array(
+                        [
+                            [x, y],
+                            [x + w, y],
+                            [x + w, y + h],
+                            [x, y + h],
+                        ],
+                        dtype=np_dtype,
+                    )
+                else:
+                    # (xmin, ymin, xmax, ymax) coordinates
+                    box = [x, y, x + w, y + h]
+                _targets.append((annotation["utf8_string"], box))
+            text_targets, box_targets = zip(*_targets)
+            if recognition_task:
+                crops = crop_bboxes_from_image(
+                    img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
+                )
+                for crop, label in zip(crops, list(text_targets)):
+                    if label and " " not in label:
+                        self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
+            else:
+                self.data.append((
+                    img_path,
+                    dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
+                ))
+        self.root = tmp_root
+    def extra_repr(self) -> str:
+        return f"train={self.train}"

{python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/cord.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 from tqdm import tqdm
@@ -29,7 +29,6 @@ class CORD(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
@@ -72,12 +71,14 @@ class CORD(VisionDataset):
                 + "To get the whole dataset with boxes and labels leave both parameters to False."
             )
-        # List images
+        # list images
         tmp_root = os.path.join(self.root, "image")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         self.train = train
         np_dtype = np.float32
-        for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
+        for img_path in tqdm(
+            iterable=os.listdir(tmp_root), desc="Preparing and Loading CORD", total=len(os.listdir(tmp_root))
+        ):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
                 raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
@@ -91,7 +92,7 @@ class CORD(VisionDataset):
                         if len(word["text"]) > 0:
                             x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
                             y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
-                            box: Union[List[float], np.ndarray]
+                            box: list[float] | np.ndarray
                             if use_polygons:
                                 # (x, y) coordinates of top left, top right, bottom right, bottom left corners
                                 box = np.array(
@@ -115,7 +116,8 @@ class CORD(VisionDataset):
                     img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
                 )
                 for crop, label in zip(crops, list(text_targets)):
-                    self.data.append((crop, label))
+                    if " " not in label:
+                        self.data.append((crop, label))
             elif detection_task:
                 self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:

python_doctr-0.12.0/doctr/datasets/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from doctr.file_utils import is_tf_available, is_torch_available
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.10.0__tar.gz → 0.12.0__tar.gz

python-doctr 0.10.0tar.gz → 0.12.0tar.gz