PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/__init__.py +1 -0
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +10 -8
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +9 -8
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +5 -6
doctr/datasets/ic13.py +6 -6
doctr/datasets/iiit5k.py +10 -6
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -7
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +4 -5
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +7 -6
doctr/datasets/svt.py +6 -7
doctr/datasets/synthtext.py +19 -7
doctr/datasets/utils.py +41 -35
doctr/datasets/vocabs.py +1107 -49
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +11 -7
doctr/io/elements.py +96 -82
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +15 -23
doctr/models/builder.py +30 -48
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +11 -15
doctr/models/classification/magc_resnet/tensorflow.py +11 -14
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +20 -18
doctr/models/classification/mobilenet/tensorflow.py +19 -23
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +7 -9
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +47 -34
doctr/models/classification/resnet/tensorflow.py +45 -35
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +20 -18
doctr/models/classification/textnet/tensorflow.py +19 -17
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +21 -8
doctr/models/classification/vgg/tensorflow.py +20 -14
doctr/models/classification/vip/__init__.py +4 -0
doctr/models/classification/vip/layers/__init__.py +4 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +18 -15
doctr/models/classification/vit/tensorflow.py +15 -12
doctr/models/classification/zoo.py +23 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +10 -21
doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +8 -17
doctr/models/detection/fast/pytorch.py +37 -35
doctr/models/detection/fast/tensorflow.py +24 -28
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +8 -18
doctr/models/detection/linknet/pytorch.py +34 -28
doctr/models/detection/linknet/tensorflow.py +24 -25
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +6 -10
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +19 -20
doctr/models/kie_predictor/tensorflow.py +14 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +55 -10
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +13 -14
doctr/models/predictor/tensorflow.py +9 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +30 -29
doctr/models/recognition/crnn/tensorflow.py +21 -24
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +32 -25
doctr/models/recognition/master/tensorflow.py +22 -25
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +47 -29
doctr/models/recognition/parseq/tensorflow.py +29 -27
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +111 -52
doctr/models/recognition/predictor/pytorch.py +9 -9
doctr/models/recognition/predictor/tensorflow.py +8 -9
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +30 -22
doctr/models/recognition/sar/tensorflow.py +22 -24
doctr/models/recognition/utils.py +57 -53
doctr/models/recognition/viptr/__init__.py +4 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +28 -21
doctr/models/recognition/vitstr/tensorflow.py +22 -23
doctr/models/recognition/zoo.py +27 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +41 -34
doctr/models/utils/tensorflow.py +31 -23
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +9 -13
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +17 -48
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
python_doctr-0.12.0.dist-info/RECORD +180 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0

doctr/datasets/wildreceipt.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,9 +6,10 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
+from tqdm import tqdm
 from .datasets import AbstractDataset
 from .utils import convert_target_to_relative, crop_bboxes_from_image
@@ -17,9 +18,10 @@ __all__ = ["WILDRECEIPT"]
 class WILDRECEIPT(AbstractDataset):
-    """WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
-        <https://arxiv.org/abs/2103.14470v1>`_ |
-    `repository <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
+    """
+    WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
+    <https://arxiv.org/abs/2103.14470v1>`_ |
+    `"repository" <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
     .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0
         :align: center
@@ -34,7 +36,6 @@ class WILDRECEIPT(AbstractDataset):
     >>> img, target = test_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the annotations file of the dataset
         train: whether the subset should be the training one
@@ -71,15 +72,18 @@ class WILDRECEIPT(AbstractDataset):
         tmp_root = img_folder
         self.train = train
         np_dtype = np.float32
-        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         with open(label_path, "r") as file:
             data = file.read()
         # Split the text file into separate JSON strings
         json_strings = data.strip().split("\n")
-        box: Union[List[float], np.ndarray]
-        _targets = []
-        for json_string in json_strings:
+        box: list[float] | np.ndarray
+        for json_string in tqdm(
+            iterable=json_strings, desc="Preparing and Loading WILDRECEIPT", total=len(json_strings)
+        ):
+            _targets = []
             json_data = json.loads(json_string)
             img_path = json_data["file_name"]
             annotations = json_data["annotations"]

doctr/file_utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -9,7 +9,6 @@ import importlib.metadata
 import importlib.util
 import logging
 import os
-from typing import Optional
 CLASS_NAME: str = "words"
@@ -80,10 +79,16 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
         else:
             logging.info(f"TensorFlow version {_tf_version} available.")
             ensure_keras_v2()
-            import tensorflow as tf
-            # Enable eager execution - this is required for some models to work properly
-            tf.config.run_functions_eagerly(True)
+        import warnings
+        warnings.simplefilter("always", DeprecationWarning)
+        warnings.warn(
+            "Support for TensorFlow in DocTR is deprecated and will be removed in the next major release (v1.0.0). "
+            "Please switch to the PyTorch backend.",
+            DeprecationWarning,
+        )
 else:  # pragma: no cover
     logging.info("Disabling Tensorflow because USE_TORCH is set")
     _tf_available = False
@@ -96,12 +101,11 @@ if not _torch_available and not _tf_available:  # pragma: no cover
     )
-def requires_package(name: str, extra_message: Optional[str] = None) -> None:  # pragma: no cover
+def requires_package(name: str, extra_message: str | None = None) -> None:  # pragma: no cover
     """
     package requirement helper
     Args:
-    ----
         name: name of the package
         extra_message: additional message to display if the package is not found
     """

doctr/io/elements.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 from defusedxml import defuse_stdlib
@@ -32,8 +32,8 @@ __all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page",
 class Element(NestedObject):
     """Implements an abstract document element with exporting and text rendering capabilities"""
-    _children_names: List[str] = []
-    _exported_keys: List[str] = []
+    _children_names: list[str] = []
+    _exported_keys: list[str] = []
     def __init__(self, **kwargs: Any) -> None:
         for k, v in kwargs.items():
@@ -42,7 +42,7 @@ class Element(NestedObject):
             else:
                 raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
-    def export(self) -> Dict[str, Any]:
+    def export(self) -> dict[str, Any]:
         """Exports the object into a nested dict format"""
         export_dict = {k: getattr(self, k) for k in self._exported_keys}
         for children_name in self._children_names:
@@ -56,7 +56,7 @@ class Element(NestedObject):
         return export_dict
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         raise NotImplementedError
     def render(self) -> str:
@@ -67,7 +67,6 @@ class Word(Element):
     """Implements a word element
     Args:
-    ----
         value: the text string of the word
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -76,16 +75,16 @@ class Word(Element):
         crop_orientation: the general orientation of the crop in degrees and its confidence
     """
-    _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
-    _children_names: List[str] = []
+    _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
+    _children_names: list[str] = []
     def __init__(
         self,
         value: str,
         confidence: float,
-        geometry: Union[BoundingBox, np.ndarray],
+        geometry: BoundingBox | np.ndarray,
         objectness_score: float,
-        crop_orientation: Dict[str, Any],
+        crop_orientation: dict[str, Any],
     ) -> None:
         super().__init__()
         self.value = value
@@ -102,7 +101,7 @@ class Word(Element):
         return f"value='{self.value}', confidence={self.confidence:.2}"
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         return cls(**kwargs)
@@ -111,15 +110,14 @@ class Artefact(Element):
     """Implements a non-textual element
     Args:
-    ----
         artefact_type: the type of artefact
         confidence: the confidence of the type prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size.
     """
-    _exported_keys: List[str] = ["geometry", "type", "confidence"]
-    _children_names: List[str] = []
+    _exported_keys: list[str] = ["geometry", "type", "confidence"]
+    _children_names: list[str] = []
     def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
         super().__init__()
@@ -135,7 +133,7 @@ class Artefact(Element):
         return f"type='{self.type}', confidence={self.confidence:.2}"
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         return cls(**kwargs)
@@ -144,22 +142,21 @@ class Line(Element):
     """Implements a line element as a collection of words
     Args:
-    ----
         words: list of word elements
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
             all words in it.
     """
-    _exported_keys: List[str] = ["geometry", "objectness_score"]
-    _children_names: List[str] = ["words"]
-    words: List[Word] = []
+    _exported_keys: list[str] = ["geometry", "objectness_score"]
+    _children_names: list[str] = ["words"]
+    words: list[Word] = []
     def __init__(
         self,
-        words: List[Word],
-        geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
-        objectness_score: Optional[float] = None,
+        words: list[Word],
+        geometry: BoundingBox | np.ndarray | None = None,
+        objectness_score: float | None = None,
     ) -> None:
         # Compute the objectness score of the line
         if objectness_score is None:
@@ -179,7 +176,7 @@ class Line(Element):
         return " ".join(w.render() for w in self.words)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
@@ -202,7 +199,6 @@ class Block(Element):
     """Implements a block element as a collection of lines and artefacts
     Args:
-    ----
         lines: list of line elements
         artefacts: list of artefacts
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -210,17 +206,17 @@ class Block(Element):
             all lines and artefacts in it.
     """
-    _exported_keys: List[str] = ["geometry", "objectness_score"]
-    _children_names: List[str] = ["lines", "artefacts"]
-    lines: List[Line] = []
-    artefacts: List[Artefact] = []
+    _exported_keys: list[str] = ["geometry", "objectness_score"]
+    _children_names: list[str] = ["lines", "artefacts"]
+    lines: list[Line] = []
+    artefacts: list[Artefact] = []
     def __init__(
         self,
-        lines: List[Line] = [],
-        artefacts: List[Artefact] = [],
-        geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
-        objectness_score: Optional[float] = None,
+        lines: list[Line] = [],
+        artefacts: list[Artefact] = [],
+        geometry: BoundingBox | np.ndarray | None = None,
+        objectness_score: float | None = None,
     ) -> None:
         # Compute the objectness score of the line
         if objectness_score is None:
@@ -243,7 +239,7 @@ class Block(Element):
         return line_break.join(line.render() for line in self.lines)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
@@ -256,7 +252,6 @@ class Page(Element):
     """Implements a page element as a collection of blocks
     Args:
-    ----
         page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
@@ -265,18 +260,18 @@ class Page(Element):
         language: a dictionary with the language value and confidence of the prediction
     """
-    _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
-    _children_names: List[str] = ["blocks"]
-    blocks: List[Block] = []
+    _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
+    _children_names: list[str] = ["blocks"]
+    blocks: list[Block] = []
     def __init__(
         self,
         page: np.ndarray,
-        blocks: List[Block],
+        blocks: list[Block],
         page_idx: int,
-        dimensions: Tuple[int, int],
-        orientation: Optional[Dict[str, Any]] = None,
-        language: Optional[Dict[str, Any]] = None,
+        dimensions: tuple[int, int],
+        orientation: dict[str, Any] | None = None,
+        language: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(blocks=blocks)
         self.page = page
@@ -311,25 +306,21 @@ class Page(Element):
         """Synthesize the page from the predictions
         Args:
-        ----
             **kwargs: keyword arguments passed to the `synthesize_page` method
-        Returns
-        -------
+        Returns:
             synthesized page
         """
         return synthesize_page(self.export(), **kwargs)
-    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
+    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
         """Export the page as XML (hOCR-format)
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
-        ----
             file_title: the title of the XML file
         Returns:
-        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -356,7 +347,7 @@ class Page(Element):
         )
         # Create the body
         body = SubElement(page_hocr, "body")
-        SubElement(
+        page_div = SubElement(
             body,
             "div",
             attrib={
@@ -371,7 +362,7 @@ class Page(Element):
                 raise TypeError("XML export is only available for straight bounding boxes for now.")
             (xmin, ymin), (xmax, ymax) = block.geometry
             block_div = SubElement(
-                body,
+                page_div,
                 "div",
                 attrib={
                     "class": "ocr_carea",
@@ -427,7 +418,7 @@ class Page(Element):
         return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
         return cls(**kwargs)
@@ -437,7 +428,6 @@ class KIEPage(Element):
     """Implements a KIE page element as a collection of predictions
     Args:
-    ----
         predictions: Dictionary with list of block elements for each detection class
         page: image encoded as a numpy array in uint8
         page_idx: the index of the page in the input raw document
@@ -446,18 +436,18 @@ class KIEPage(Element):
         language: a dictionary with the language value and confidence of the prediction
     """
-    _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
-    _children_names: List[str] = ["predictions"]
-    predictions: Dict[str, List[Prediction]] = {}
+    _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
+    _children_names: list[str] = ["predictions"]
+    predictions: dict[str, list[Prediction]] = {}
     def __init__(
         self,
         page: np.ndarray,
-        predictions: Dict[str, List[Prediction]],
+        predictions: dict[str, list[Prediction]],
         page_idx: int,
-        dimensions: Tuple[int, int],
-        orientation: Optional[Dict[str, Any]] = None,
-        language: Optional[Dict[str, Any]] = None,
+        dimensions: tuple[int, int],
+        orientation: dict[str, Any] | None = None,
+        language: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(predictions=predictions)
         self.page = page
@@ -496,25 +486,21 @@ class KIEPage(Element):
         """Synthesize the page from the predictions
         Args:
-        ----
             **kwargs: keyword arguments passed to the `synthesize_kie_page` method
         Returns:
-        -------
             synthesized page
         """
         return synthesize_kie_page(self.export(), **kwargs)
-    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
+    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
         """Export the page as XML (hOCR-format)
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
-        ----
             file_title: the title of the XML file
         Returns:
-        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -564,13 +550,47 @@ class KIEPage(Element):
                         {int(round(xmax * width))} {int(round(ymax * height))}",
                     },
                 )
-                prediction_div.text = prediction.value
+                # NOTE: ocr_par, ocr_line and ocrx_word are the same because the KIE predictions contain only words
+                # This is a workaround to make it PDF/A compatible
+                par_div = SubElement(
+                    prediction_div,
+                    "p",
+                    attrib={
+                        "class": "ocr_par",
+                        "id": f"{class_name}_par_{prediction_count}",
+                        "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
+                        {int(round(xmax * width))} {int(round(ymax * height))}",
+                    },
+                )
+                line_span = SubElement(
+                    par_div,
+                    "span",
+                    attrib={
+                        "class": "ocr_line",
+                        "id": f"{class_name}_line_{prediction_count}",
+                        "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
+                        {int(round(xmax * width))} {int(round(ymax * height))}; \
+                        baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0",
+                    },
+                )
+                word_div = SubElement(
+                    line_span,
+                    "span",
+                    attrib={
+                        "class": "ocrx_word",
+                        "id": f"{class_name}_word_{prediction_count}",
+                        "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
+                        {int(round(xmax * width))} {int(round(ymax * height))}; \
+                        x_wconf {int(round(prediction.confidence * 100))}",
+                    },
+                )
+                word_div.text = prediction.value
                 prediction_count += 1
         return ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]
@@ -582,16 +602,15 @@ class Document(Element):
     """Implements a document element as a collection of pages
     Args:
-    ----
         pages: list of page elements
     """
-    _children_names: List[str] = ["pages"]
-    pages: List[Page] = []
+    _children_names: list[str] = ["pages"]
+    pages: list[Page] = []
     def __init__(
         self,
-        pages: List[Page],
+        pages: list[Page],
     ) -> None:
         super().__init__(pages=pages)
@@ -604,34 +623,30 @@ class Document(Element):
         for result in self.pages:
             result.show(**kwargs)
-    def synthesize(self, **kwargs) -> List[np.ndarray]:
+    def synthesize(self, **kwargs) -> list[np.ndarray]:
         """Synthesize all pages from their predictions
         Args:
-        ----
             **kwargs: keyword arguments passed to the `Page.synthesize` method
-        Returns
-        -------
+        Returns:
             list of synthesized pages
         """
         return [page.synthesize(**kwargs) for page in self.pages]
-    def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
+    def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)
         Args:
-        ----
             **kwargs: additional keyword arguments passed to the Page.export_as_xml method
         Returns:
-        -------
             list of tuple of (bytes, ElementTree)
         """
         return [page.export_as_xml(**kwargs) for page in self.pages]
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
         return cls(**kwargs)
@@ -641,15 +656,14 @@ class KIEDocument(Document):
     """Implements a document element as a collection of pages
     Args:
-    ----
         pages: list of page elements
     """
-    _children_names: List[str] = ["pages"]
-    pages: List[KIEPage] = []  # type: ignore[assignment]
+    _children_names: list[str] = ["pages"]
+    pages: list[KIEPage] = []  # type: ignore[assignment]
     def __init__(
         self,
-        pages: List[KIEPage],
+        pages: list[KIEPage],
     ) -> None:
         super().__init__(pages=pages)  # type: ignore[arg-type]

doctr/io/html.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
     >>> doc = read_html("https://www.yoursite.com")
     Args:
-    ----
         url: URL of the target web page
         **kwargs: keyword arguments from `weasyprint.HTML`
     Returns:
-    -------
         decoded PDF file as a bytes stream
     """
     from weasyprint import HTML

doctr/io/image/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from doctr.file_utils import is_tf_available, is_torch_available
 from .base import *
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

doctr/io/image/base.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from pathlib import Path
-from typing import Optional, Tuple
 import cv2
 import numpy as np
@@ -16,7 +15,7 @@ __all__ = ["read_img_as_numpy"]
 def read_img_as_numpy(
     file: AbstractFile,
-    output_size: Optional[Tuple[int, int]] = None,
+    output_size: tuple[int, int] | None = None,
     rgb_output: bool = True,
 ) -> np.ndarray:
     """Read an image file into numpy format
@@ -25,13 +24,11 @@ def read_img_as_numpy(
     >>> page = read_img_as_numpy("path/to/your/doc.jpg")
     Args:
-    ----
         file: the path to the image file
         output_size: the expected output size of each page in format H x W
         rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
     Returns:
-    -------
         the page decoded as numpy ndarray of shape H x W x 3
     """
     if isinstance(file, (str, Path)):

python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl