PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/io/elements.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 from defusedxml import defuse_stdlib
@@ -32,8 +32,8 @@ __all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page",
 class Element(NestedObject):
     """Implements an abstract document element with exporting and text rendering capabilities"""
-    _children_names: List[str] = []
-    _exported_keys: List[str] = []
+    _children_names: list[str] = []
+    _exported_keys: list[str] = []
     def __init__(self, **kwargs: Any) -> None:
         for k, v in kwargs.items():
@@ -42,7 +42,7 @@ class Element(NestedObject):
             else:
                 raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
-    def export(self) -> Dict[str, Any]:
+    def export(self) -> dict[str, Any]:
         """Exports the object into a nested dict format"""
         export_dict = {k: getattr(self, k) for k in self._exported_keys}
         for children_name in self._children_names:
@@ -56,7 +56,7 @@ class Element(NestedObject):
         return export_dict
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         raise NotImplementedError
     def render(self) -> str:
@@ -67,7 +67,6 @@ class Word(Element):
     """Implements a word element
     Args:
-    ----
         value: the text string of the word
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -76,16 +75,16 @@ class Word(Element):
         crop_orientation: the general orientation of the crop in degrees and its confidence
     """
-    _exported_keys: List[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
-    _children_names: List[str] = []
+    _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
+    _children_names: list[str] = []
     def __init__(
         self,
         value: str,
         confidence: float,
-        geometry: Union[BoundingBox, np.ndarray],
+        geometry: BoundingBox | np.ndarray,
         objectness_score: float,
-        crop_orientation: Dict[str, Any],
+        crop_orientation: dict[str, Any],
     ) -> None:
         super().__init__()
         self.value = value
@@ -102,7 +101,7 @@ class Word(Element):
         return f"value='{self.value}', confidence={self.confidence:.2}"
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         return cls(**kwargs)
@@ -111,15 +110,14 @@ class Artefact(Element):
     """Implements a non-textual element
     Args:
-    ----
         artefact_type: the type of artefact
         confidence: the confidence of the type prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size.
     """
-    _exported_keys: List[str] = ["geometry", "type", "confidence"]
-    _children_names: List[str] = []
+    _exported_keys: list[str] = ["geometry", "type", "confidence"]
+    _children_names: list[str] = []
     def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
         super().__init__()
@@ -135,7 +133,7 @@ class Artefact(Element):
         return f"type='{self.type}', confidence={self.confidence:.2}"
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         return cls(**kwargs)
@@ -144,22 +142,21 @@ class Line(Element):
     """Implements a line element as a collection of words
     Args:
-    ----
         words: list of word elements
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
             all words in it.
     """
-    _exported_keys: List[str] = ["geometry", "objectness_score"]
-    _children_names: List[str] = ["words"]
-    words: List[Word] = []
+    _exported_keys: list[str] = ["geometry", "objectness_score"]
+    _children_names: list[str] = ["words"]
+    words: list[Word] = []
     def __init__(
         self,
-        words: List[Word],
-        geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
-        objectness_score: Optional[float] = None,
+        words: list[Word],
+        geometry: BoundingBox | np.ndarray | None = None,
+        objectness_score: float | None = None,
     ) -> None:
         # Compute the objectness score of the line
         if objectness_score is None:
@@ -168,7 +165,7 @@ class Line(Element):
         if geometry is None:
             # Check whether this is a rotated or straight box
             box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
-            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[operator]
+            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[misc]
         super().__init__(words=words)
         self.geometry = geometry
@@ -179,7 +176,7 @@ class Line(Element):
         return " ".join(w.render() for w in self.words)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
@@ -202,7 +199,6 @@ class Block(Element):
     """Implements a block element as a collection of lines and artefacts
     Args:
-    ----
         lines: list of line elements
         artefacts: list of artefacts
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -210,17 +206,17 @@ class Block(Element):
             all lines and artefacts in it.
     """
-    _exported_keys: List[str] = ["geometry", "objectness_score"]
-    _children_names: List[str] = ["lines", "artefacts"]
-    lines: List[Line] = []
-    artefacts: List[Artefact] = []
+    _exported_keys: list[str] = ["geometry", "objectness_score"]
+    _children_names: list[str] = ["lines", "artefacts"]
+    lines: list[Line] = []
+    artefacts: list[Artefact] = []
     def __init__(
         self,
-        lines: List[Line] = [],
-        artefacts: List[Artefact] = [],
-        geometry: Optional[Union[BoundingBox, np.ndarray]] = None,
-        objectness_score: Optional[float] = None,
+        lines: list[Line] = [],
+        artefacts: list[Artefact] = [],
+        geometry: BoundingBox | np.ndarray | None = None,
+        objectness_score: float | None = None,
     ) -> None:
         # Compute the objectness score of the line
         if objectness_score is None:
@@ -232,7 +228,7 @@ class Block(Element):
             box_resolution_fn = (
                 resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
             )
-            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore[operator]
+            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore
         super().__init__(lines=lines, artefacts=artefacts)
         self.geometry = geometry
@@ -243,7 +239,7 @@ class Block(Element):
         return line_break.join(line.render() for line in self.lines)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
@@ -256,7 +252,6 @@ class Page(Element):
     """Implements a page element as a collection of blocks
     Args:
-    ----
         page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
@@ -265,18 +260,18 @@ class Page(Element):
         language: a dictionary with the language value and confidence of the prediction
     """
-    _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
-    _children_names: List[str] = ["blocks"]
-    blocks: List[Block] = []
+    _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
+    _children_names: list[str] = ["blocks"]
+    blocks: list[Block] = []
     def __init__(
         self,
         page: np.ndarray,
-        blocks: List[Block],
+        blocks: list[Block],
         page_idx: int,
-        dimensions: Tuple[int, int],
-        orientation: Optional[Dict[str, Any]] = None,
-        language: Optional[Dict[str, Any]] = None,
+        dimensions: tuple[int, int],
+        orientation: dict[str, Any] | None = None,
+        language: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(blocks=blocks)
         self.page = page
@@ -310,22 +305,22 @@ class Page(Element):
     def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
-        Returns
-        -------
+        Args:
+            **kwargs: keyword arguments passed to the `synthesize_page` method
+        Returns:
             synthesized page
         """
         return synthesize_page(self.export(), **kwargs)
-    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
+    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
         """Export the page as XML (hOCR-format)
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
-        ----
             file_title: the title of the XML file
         Returns:
-        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -423,7 +418,7 @@ class Page(Element):
         return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
         return cls(**kwargs)
@@ -433,7 +428,6 @@ class KIEPage(Element):
     """Implements a KIE page element as a collection of predictions
     Args:
-    ----
         predictions: Dictionary with list of block elements for each detection class
         page: image encoded as a numpy array in uint8
         page_idx: the index of the page in the input raw document
@@ -442,18 +436,18 @@ class KIEPage(Element):
         language: a dictionary with the language value and confidence of the prediction
     """
-    _exported_keys: List[str] = ["page_idx", "dimensions", "orientation", "language"]
-    _children_names: List[str] = ["predictions"]
-    predictions: Dict[str, List[Prediction]] = {}
+    _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
+    _children_names: list[str] = ["predictions"]
+    predictions: dict[str, list[Prediction]] = {}
     def __init__(
         self,
         page: np.ndarray,
-        predictions: Dict[str, List[Prediction]],
+        predictions: dict[str, list[Prediction]],
         page_idx: int,
-        dimensions: Tuple[int, int],
-        orientation: Optional[Dict[str, Any]] = None,
-        language: Optional[Dict[str, Any]] = None,
+        dimensions: tuple[int, int],
+        orientation: dict[str, Any] | None = None,
+        language: dict[str, Any] | None = None,
     ) -> None:
         super().__init__(predictions=predictions)
         self.page = page
@@ -492,25 +486,21 @@ class KIEPage(Element):
         """Synthesize the page from the predictions
         Args:
-        ----
-            **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
+            **kwargs: keyword arguments passed to the `synthesize_kie_page` method
         Returns:
-        -------
             synthesized page
         """
         return synthesize_kie_page(self.export(), **kwargs)
-    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> Tuple[bytes, ET.ElementTree]:
+    def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
         """Export the page as XML (hOCR-format)
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
         Args:
-        ----
             file_title: the title of the XML file
         Returns:
-        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -566,7 +556,7 @@ class KIEPage(Element):
         return ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({
             "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]]
@@ -578,16 +568,15 @@ class Document(Element):
     """Implements a document element as a collection of pages
     Args:
-    ----
         pages: list of page elements
     """
-    _children_names: List[str] = ["pages"]
-    pages: List[Page] = []
+    _children_names: list[str] = ["pages"]
+    pages: list[Page] = []
     def __init__(
         self,
-        pages: List[Page],
+        pages: list[Page],
     ) -> None:
         super().__init__(pages=pages)
@@ -600,30 +589,30 @@ class Document(Element):
         for result in self.pages:
             result.show(**kwargs)
-    def synthesize(self, **kwargs) -> List[np.ndarray]:
+    def synthesize(self, **kwargs) -> list[np.ndarray]:
         """Synthesize all pages from their predictions
-        Returns
-        -------
+        Args:
+            **kwargs: keyword arguments passed to the `Page.synthesize` method
+        Returns:
             list of synthesized pages
         """
-        return [page.synthesize() for page in self.pages]
+        return [page.synthesize(**kwargs) for page in self.pages]
-    def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
+    def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)
         Args:
-        ----
             **kwargs: additional keyword arguments passed to the Page.export_as_xml method
         Returns:
-        -------
             list of tuple of (bytes, ElementTree)
         """
         return [page.export_as_xml(**kwargs) for page in self.pages]
     @classmethod
-    def from_dict(cls, save_dict: Dict[str, Any], **kwargs):
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
         kwargs = {k: save_dict[k] for k in cls._exported_keys}
         kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
         return cls(**kwargs)
@@ -633,15 +622,14 @@ class KIEDocument(Document):
     """Implements a document element as a collection of pages
     Args:
-    ----
         pages: list of page elements
     """
-    _children_names: List[str] = ["pages"]
-    pages: List[KIEPage] = []  # type: ignore[assignment]
+    _children_names: list[str] = ["pages"]
+    pages: list[KIEPage] = []  # type: ignore[assignment]
     def __init__(
         self,
-        pages: List[KIEPage],
+        pages: list[KIEPage],
     ) -> None:
         super().__init__(pages=pages)  # type: ignore[arg-type]

doctr/io/html.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
     >>> doc = read_html("https://www.yoursite.com")
     Args:
-    ----
         url: URL of the target web page
         **kwargs: keyword arguments from `weasyprint.HTML`
     Returns:
-    -------
         decoded PDF file as a bytes stream
     """
     from weasyprint import HTML

doctr/io/image/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from doctr.file_utils import is_tf_available, is_torch_available
 from .base import *
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

doctr/io/image/base.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from pathlib import Path
-from typing import Optional, Tuple
 import cv2
 import numpy as np
@@ -16,7 +15,7 @@ __all__ = ["read_img_as_numpy"]
 def read_img_as_numpy(
     file: AbstractFile,
-    output_size: Optional[Tuple[int, int]] = None,
+    output_size: tuple[int, int] | None = None,
     rgb_output: bool = True,
 ) -> np.ndarray:
     """Read an image file into numpy format
@@ -25,13 +24,11 @@ def read_img_as_numpy(
     >>> page = read_img_as_numpy("path/to/your/doc.jpg")
     Args:
-    ----
         file: the path to the image file
         output_size: the expected output size of each page in format H x W
         rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
     Returns:
-    -------
         the page decoded as numpy ndarray of shape H x W x 3
     """
     if isinstance(file, (str, Path)):

doctr/io/image/pytorch.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from io import BytesIO
-from typing import Tuple
 import numpy as np
 import torch
@@ -20,12 +19,10 @@ def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) ->
     """Convert a PIL Image to a PyTorch tensor
     Args:
-    ----
         pil_img: a PIL image
         dtype: the output tensor data type
     Returns:
-    -------
         decoded image as tensor
     """
     if dtype == torch.float32:
@@ -40,12 +37,10 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
     """Read an image file as a PyTorch tensor
     Args:
-    ----
         img_path: location of the image file
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -59,12 +54,10 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
     """Read a byte stream as a PyTorch tensor
     Args:
-    ----
         img_content: bytes of a decoded image
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -78,12 +71,10 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
     """Read an image file as a PyTorch tensor
     Args:
-    ----
         npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         same image as a tensor of shape (C, H, W)
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -102,6 +93,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
     return img
-def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
+def get_img_shape(img: torch.Tensor) -> tuple[int, int]:
     """Get the shape of an image"""
-    return img.shape[-2:]  # type: ignore[return-value]
+    return img.shape[-2:]

doctr/io/image/tensorflow.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Tuple
 import numpy as np
 import tensorflow as tf
@@ -19,12 +18,10 @@ def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -
     """Convert a PIL Image to a TensorFlow tensor
     Args:
-    ----
         pil_img: a PIL image
         dtype: the output tensor data type
     Returns:
-    -------
         decoded image as tensor
     """
     npy_img = img_to_array(pil_img)
@@ -36,12 +33,10 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: tf.dtypes.DType = tf.float
     """Read an image file as a TensorFlow tensor
     Args:
-    ----
         img_path: location of the image file
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -61,12 +56,10 @@ def decode_img_as_tensor(img_content: bytes, dtype: tf.dtypes.DType = tf.float32
     """Read a byte stream as a TensorFlow tensor
     Args:
-    ----
         img_content: bytes of a decoded image
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -85,12 +78,10 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
     """Read an image file as a TensorFlow tensor
     Args:
-    ----
         npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         same image as a tensor of shape (H, W, C)
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -105,6 +96,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
     return img
-def get_img_shape(img: tf.Tensor) -> Tuple[int, int]:
+def get_img_shape(img: tf.Tensor) -> tuple[int, int]:
     """Get the shape of an image"""
     return img.shape[:2]

doctr/io/pdf.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Optional
+from typing import Any
 import numpy as np
 import pypdfium2 as pdfium
@@ -15,18 +15,17 @@ __all__ = ["read_pdf"]
 def read_pdf(
     file: AbstractFile,
-    scale: float = 2,
+    scale: int = 2,
     rgb_mode: bool = True,
-    password: Optional[str] = None,
+    password: str | None = None,
     **kwargs: Any,
-) -> List[np.ndarray]:
+) -> list[np.ndarray]:
     """Read a PDF file and convert it into an image in numpy format
     >>> from doctr.io import read_pdf
     >>> doc = read_pdf("path/to/your/doc.pdf")
     Args:
-    ----
         file: the path to the PDF file
         scale: rendering scale (1 corresponds to 72dpi)
         rgb_mode: if True, the output will be RGB, otherwise BGR
@@ -34,7 +33,6 @@ def read_pdf(
         **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
     Returns:
-    -------
         the list of pages decoded as numpy ndarray of shape H x W x C
     """
     # Rasterise pages to numpy ndarrays with pypdfium2

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl