PyPI - python-doctr - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/cord.py +10 -1
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/funsd.py +11 -1
doctr/datasets/generator/base.py +6 -5
doctr/datasets/ic03.py +11 -1
doctr/datasets/ic13.py +10 -1
doctr/datasets/iiit5k.py +26 -16
doctr/datasets/imgur5k.py +11 -2
doctr/datasets/loader.py +1 -6
doctr/datasets/sroie.py +11 -1
doctr/datasets/svhn.py +11 -1
doctr/datasets/svt.py +11 -1
doctr/datasets/synthtext.py +11 -1
doctr/datasets/utils.py +9 -3
doctr/datasets/vocabs.py +15 -4
doctr/datasets/wildreceipt.py +12 -1
doctr/file_utils.py +45 -12
doctr/io/elements.py +52 -10
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +73 -15
doctr/models/classification/magc_resnet/tensorflow.py +13 -6
doctr/models/classification/mobilenet/pytorch.py +47 -9
doctr/models/classification/mobilenet/tensorflow.py +51 -14
doctr/models/classification/predictor/pytorch.py +28 -17
doctr/models/classification/predictor/tensorflow.py +26 -16
doctr/models/classification/resnet/tensorflow.py +21 -8
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +11 -5
doctr/models/classification/vgg/tensorflow.py +9 -3
doctr/models/classification/vit/tensorflow.py +10 -4
doctr/models/classification/zoo.py +55 -19
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
doctr/models/detection/fast/base.py +6 -5
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/fast/tensorflow.py +15 -12
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/linknet/tensorflow.py +23 -11
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +17 -3
doctr/models/detection/zoo.py +7 -2
doctr/models/factory/hub.py +8 -18
doctr/models/kie_predictor/base.py +13 -3
doctr/models/kie_predictor/pytorch.py +45 -20
doctr/models/kie_predictor/tensorflow.py +44 -17
doctr/models/modules/layers/pytorch.py +2 -3
doctr/models/modules/layers/tensorflow.py +6 -8
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/transformer/tensorflow.py +0 -2
doctr/models/modules/vision_transformer/pytorch.py +1 -1
doctr/models/modules/vision_transformer/tensorflow.py +1 -1
doctr/models/predictor/base.py +97 -58
doctr/models/predictor/pytorch.py +35 -20
doctr/models/predictor/tensorflow.py +35 -18
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/crnn/tensorflow.py +8 -6
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/master/tensorflow.py +9 -4
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +14 -11
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +10 -12
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/vitstr/tensorflow.py +9 -4
doctr/models/recognition/zoo.py +1 -1
doctr/models/utils/pytorch.py +1 -1
doctr/models/utils/tensorflow.py +15 -15
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +5 -5
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +73 -14
doctr/transforms/modules/tensorflow.py +78 -19
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +141 -31
doctr/utils/metrics.py +34 -175
doctr/utils/reconstitution.py +212 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/METADATA +85 -81
python_doctr-0.10.0.dist-info/RECORD +173 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
python_doctr-0.8.1.dist-info/RECORD +0 -173
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/zip-safe +0 -0

doctr/utils/reconstitution.py ADDED Viewed

@@ -0,0 +1,212 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
+from typing import Any, Dict, Optional
+import numpy as np
+from anyascii import anyascii
+from PIL import Image, ImageDraw
+from .fonts import get_font
+__all__ = ["synthesize_page", "synthesize_kie_page"]
+# Global variable to avoid multiple warnings
+ROTATION_WARNING = False
+def _warn_rotation(entry: Dict[str, Any]) -> None:  # pragma: no cover
+    global ROTATION_WARNING
+    if not ROTATION_WARNING and len(entry["geometry"]) == 4:
+        logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
+        ROTATION_WARNING = True
+def _synthesize(
+    response: Image.Image,
+    entry: Dict[str, Any],
+    w: int,
+    h: int,
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+    smoothing_factor: float = 0.75,
+    min_font_size: int = 6,
+    max_font_size: int = 50,
+) -> Image.Image:
+    if len(entry["geometry"]) == 2:
+        (xmin, ymin), (xmax, ymax) = entry["geometry"]
+        polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
+    else:
+        polygon = entry["geometry"]
+    # Calculate the bounding box of the word
+    x_coords, y_coords = zip(*polygon)
+    xmin, ymin, xmax, ymax = (
+        int(round(w * min(x_coords))),
+        int(round(h * min(y_coords))),
+        int(round(w * max(x_coords))),
+        int(round(h * max(y_coords))),
+    )
+    word_width = xmax - xmin
+    word_height = ymax - ymin
+    # If lines are provided instead of words, concatenate the word entries
+    if "words" in entry:
+        word_text = " ".join(word["value"] for word in entry["words"])
+    else:
+        word_text = entry["value"]
+    # Find the optimal font size
+    try:
+        font_size = min(word_height, max_font_size)
+        font = get_font(font_family, font_size)
+        text_width, text_height = font.getbbox(word_text)[2:4]
+        while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
+            font_size = max(int(font_size * smoothing_factor), min_font_size)
+            font = get_font(font_family, font_size)
+            text_width, text_height = font.getbbox(word_text)[2:4]
+    except ValueError:
+        font = get_font(font_family, min_font_size)
+    # Create a mask for the word
+    mask = Image.new("L", (w, h), 0)
+    ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
+    # Draw the word text
+    d = ImageDraw.Draw(response)
+    try:
+        try:
+            d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
+        except UnicodeEncodeError:
+            d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
+    # Catch generic exceptions to avoid crashing the whole rendering
+    except Exception:  # pragma: no cover
+        logging.warning(f"Could not render word: {word_text}")
+    if draw_proba:
+        confidence = (
+            entry["confidence"]
+            if "confidence" in entry
+            else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
+        )
+        p = int(255 * confidence)
+        color = (255 - p, 0, p)  # Red to blue gradient based on probability
+        d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
+        prob_font = get_font(font_family, 20)
+        prob_text = f"{confidence:.2f}"
+        prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
+        # Position the probability slightly above the bounding box
+        prob_x_offset = (word_width - prob_text_width) // 2
+        prob_y_offset = ymin - prob_text_height - 2
+        prob_y_offset = max(0, prob_y_offset)
+        d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
+    return response
+def synthesize_page(
+    page: Dict[str, Any],
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+    smoothing_factor: float = 0.95,
+    min_font_size: int = 8,
+    max_font_size: int = 50,
+) -> np.ndarray:
+    """Draw a the content of the element page (OCR response) on a blank page.
+    Args:
+    ----
+        page: exported Page object to represent
+        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
+        font_family: family of the font
+        smoothing_factor: factor to smooth the font size
+        min_font_size: minimum font size
+        max_font_size: maximum font size
+    Returns:
+    -------
+        the synthesized page
+    """
+    # Draw template
+    h, w = page["dimensions"]
+    response = Image.new("RGB", (w, h), color=(255, 255, 255))
+    for block in page["blocks"]:
+        # If lines are provided use these to get better rendering results
+        if len(block["lines"]) > 1:
+            for line in block["lines"]:
+                _warn_rotation(block)  # pragma: no cover
+                response = _synthesize(
+                    response=response,
+                    entry=line,
+                    w=w,
+                    h=h,
+                    draw_proba=draw_proba,
+                    font_family=font_family,
+                    smoothing_factor=smoothing_factor,
+                    min_font_size=min_font_size,
+                    max_font_size=max_font_size,
+                )
+        # Otherwise, draw each word
+        else:
+            for line in block["lines"]:
+                _warn_rotation(block)  # pragma: no cover
+                for word in line["words"]:
+                    response = _synthesize(
+                        response=response,
+                        entry=word,
+                        w=w,
+                        h=h,
+                        draw_proba=draw_proba,
+                        font_family=font_family,
+                        smoothing_factor=smoothing_factor,
+                        min_font_size=min_font_size,
+                        max_font_size=max_font_size,
+                    )
+    return np.array(response, dtype=np.uint8)
+def synthesize_kie_page(
+    page: Dict[str, Any],
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+) -> np.ndarray:
+    """Draw a the content of the element page (OCR response) on a blank page.
+    Args:
+    ----
+        page: exported Page object to represent
+        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
+        font_family: family of the font
+        smoothing_factor: factor to smooth the font size
+        min_font_size: minimum font size
+        max_font_size: maximum font size
+    Returns:
+    -------
+        the synthesized page
+    """
+    # Draw template
+    h, w = page["dimensions"]
+    response = Image.new("RGB", (w, h), color=(255, 255, 255))
+    # Draw each word
+    for predictions in page["predictions"].values():
+        for prediction in predictions:
+            _warn_rotation(prediction)  # pragma: no cover
+            response = _synthesize(
+                response=response,
+                entry=prediction,
+                w=w,
+                h=h,
+                draw_proba=draw_proba,
+                font_family=font_family,
+            )
+    return np.array(response, dtype=np.uint8)

doctr/utils/visualization.py CHANGED Viewed

@@ -9,16 +9,12 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
-import mplcursors
 import numpy as np
 from matplotlib.figure import Figure
-from PIL import Image, ImageDraw
-from unidecode import unidecode
 from .common_types import BoundingBox, Polygon4P
-from .fonts import get_font
-__all__ = ["visualize_page", "synthesize_page", "visualize_kie_page", "synthesize_kie_page", "draw_boxes"]
+__all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
 def rect_patch(
@@ -281,6 +277,8 @@ def visualize_page(
                     artists.append(rect)
     if interactive:
+        import mplcursors
         # Create mlp Cursor to hover patches in artists
         mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
     fig.tight_layout(pad=0.0)
@@ -288,63 +286,6 @@ def visualize_page(
     return fig
-def synthesize_page(
-    page: Dict[str, Any],
-    draw_proba: bool = False,
-    font_family: Optional[str] = None,
-) -> np.ndarray:
-    """Draw a the content of the element page (OCR response) on a blank page.
-    Args:
-    ----
-        page: exported Page object to represent
-        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
-        font_size: size of the font, default font = 13
-        font_family: family of the font
-    Returns:
-    -------
-        the synthesized page
-    """
-    # Draw template
-    h, w = page["dimensions"]
-    response = 255 * np.ones((h, w, 3), dtype=np.int32)
-    # Draw each word
-    for block in page["blocks"]:
-        for line in block["lines"]:
-            for word in line["words"]:
-                # Get aboslute word geometry
-                (xmin, ymin), (xmax, ymax) = word["geometry"]
-                xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
-                ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
-                # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
-                font = get_font(font_family, int(0.75 * (ymax - ymin)))
-                img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
-                d = ImageDraw.Draw(img)
-                # Draw in black the value of the word
-                try:
-                    d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
-                except UnicodeEncodeError:
-                    # When character cannot be encoded, use its unidecode version
-                    d.text((0, 0), unidecode(word["value"]), font=font, fill=(0, 0, 0))
-                # Colorize if draw_proba
-                if draw_proba:
-                    p = int(255 * word["confidence"])
-                    mask = np.where(np.array(img) == 0, 1, 0)
-                    proba: np.ndarray = np.array([255 - p, 0, p])
-                    color = mask * proba[np.newaxis, np.newaxis, :]
-                    white_mask = 255 * (1 - mask)
-                    img = color + white_mask
-                # Write to response page
-                response[ymin:ymax, xmin:xmax, :] = np.array(img)
-    return response
 def visualize_kie_page(
     page: Dict[str, Any],
     image: np.ndarray,
@@ -413,6 +354,8 @@ def visualize_kie_page(
                     artists.append(rect)
     if interactive:
+        import mplcursors
         # Create mlp Cursor to hover patches in artists
         mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
     fig.tight_layout(pad=0.0)
@@ -420,62 +363,6 @@ def visualize_kie_page(
     return fig
-def synthesize_kie_page(
-    page: Dict[str, Any],
-    draw_proba: bool = False,
-    font_family: Optional[str] = None,
-) -> np.ndarray:
-    """Draw a the content of the element page (OCR response) on a blank page.
-    Args:
-    ----
-        page: exported Page object to represent
-        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
-        font_size: size of the font, default font = 13
-        font_family: family of the font
-    Returns:
-    -------
-        the synthesized page
-    """
-    # Draw template
-    h, w = page["dimensions"]
-    response = 255 * np.ones((h, w, 3), dtype=np.int32)
-    # Draw each word
-    for predictions in page["predictions"].values():
-        for prediction in predictions:
-            # Get aboslute word geometry
-            (xmin, ymin), (xmax, ymax) = prediction["geometry"]
-            xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
-            ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
-            # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
-            font = get_font(font_family, int(0.75 * (ymax - ymin)))
-            img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
-            d = ImageDraw.Draw(img)
-            # Draw in black the value of the word
-            try:
-                d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
-            except UnicodeEncodeError:
-                # When character cannot be encoded, use its unidecode version
-                d.text((0, 0), unidecode(prediction["value"]), font=font, fill=(0, 0, 0))
-            # Colorize if draw_proba
-            if draw_proba:
-                p = int(255 * prediction["confidence"])
-                mask = np.where(np.array(img) == 0, 1, 0)
-                proba: np.ndarray = np.array([255 - p, 0, p])
-                color = mask * proba[np.newaxis, np.newaxis, :]
-                white_mask = 255 * (1 - mask)
-                img = color + white_mask
-            # Write to response page
-            response[ymin:ymax, xmin:xmax, :] = np.array(img)
-    return response
 def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
     """Draw an array of relative straight boxes on an image

doctr/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = 'v0.8.1'
1	+ __version__ = 'v0.10.0'

{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: python-doctr
-Version: 0.8.1
+Version: 0.10.0
 Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
 Author-email: Mindee <contact@mindee.com>
 Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,87 +219,93 @@ Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Natural Language :: English
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <4,>=3.8.0
+Requires-Python: <4,>=3.9.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: importlib-metadata
-Requires-Dist: numpy <2.0.0,>=1.16.0
-Requires-Dist: scipy <2.0.0,>=1.4.0
-Requires-Dist: h5py <4.0.0,>=3.1.0
-Requires-Dist: opencv-python <5.0.0,>=4.5.0
-Requires-Dist: pypdfium2 <5.0.0,>=4.0.0
-Requires-Dist: pyclipper <2.0.0,>=1.2.0
-Requires-Dist: shapely <3.0.0,>=1.6.0
-Requires-Dist: langdetect <2.0.0,>=1.0.9
-Requires-Dist: rapidfuzz <4.0.0,>=3.0.0
-Requires-Dist: huggingface-hub <1.0.0,>=0.20.0
-Requires-Dist: matplotlib >=3.1.0
-Requires-Dist: weasyprint >=55.0
-Requires-Dist: Pillow >=9.2.0
-Requires-Dist: defusedxml >=0.7.0
-Requires-Dist: mplcursors >=0.3
-Requires-Dist: unidecode >=1.0.0
-Requires-Dist: tqdm >=4.30.0
+Requires-Dist: numpy<3.0.0,>=1.16.0
+Requires-Dist: scipy<2.0.0,>=1.4.0
+Requires-Dist: h5py<4.0.0,>=3.1.0
+Requires-Dist: opencv-python<5.0.0,>=4.5.0
+Requires-Dist: pypdfium2<5.0.0,>=4.11.0
+Requires-Dist: pyclipper<2.0.0,>=1.2.0
+Requires-Dist: shapely<3.0.0,>=1.6.0
+Requires-Dist: langdetect<2.0.0,>=1.0.9
+Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
+Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
+Requires-Dist: Pillow>=9.2.0
+Requires-Dist: defusedxml>=0.7.0
+Requires-Dist: anyascii>=0.3.2
+Requires-Dist: tqdm>=4.30.0
+Provides-Extra: contrib
+Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
 Provides-Extra: dev
-Requires-Dist: tensorflow <2.16.0,>=2.11.0 ; extra == 'dev'
-Requires-Dist: tf2onnx <2.0.0,>=1.16.0 ; extra == 'dev'
-Requires-Dist: torch <3.0.0,>=1.12.0 ; extra == 'dev'
-Requires-Dist: torchvision >=0.13.0 ; extra == 'dev'
-Requires-Dist: onnx <3.0.0,>=1.12.0 ; extra == 'dev'
-Requires-Dist: pytest >=5.3.2 ; extra == 'dev'
-Requires-Dist: coverage[toml] >=4.5.4 ; extra == 'dev'
-Requires-Dist: hdf5storage >=0.1.18 ; extra == 'dev'
-Requires-Dist: onnxruntime >=1.11.0 ; extra == 'dev'
-Requires-Dist: requests >=2.20.0 ; extra == 'dev'
-Requires-Dist: psutil >=5.9.5 ; extra == 'dev'
-Requires-Dist: ruff >=0.1.5 ; extra == 'dev'
-Requires-Dist: mypy >=0.812 ; extra == 'dev'
-Requires-Dist: pre-commit >=2.17.0 ; extra == 'dev'
-Requires-Dist: sphinx !=3.5.0,>=3.0.0 ; extra == 'dev'
-Requires-Dist: sphinxemoji >=0.1.8 ; extra == 'dev'
-Requires-Dist: sphinx-copybutton >=0.3.1 ; extra == 'dev'
-Requires-Dist: docutils <0.21 ; extra == 'dev'
-Requires-Dist: recommonmark >=0.7.1 ; extra == 'dev'
-Requires-Dist: sphinx-markdown-tables >=0.0.15 ; extra == 'dev'
-Requires-Dist: sphinx-tabs >=3.3.0 ; extra == 'dev'
-Requires-Dist: furo >=2022.3.4 ; extra == 'dev'
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
+Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
+Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
+Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
+Requires-Dist: torchvision>=0.15.0; extra == "dev"
+Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
+Requires-Dist: weasyprint>=55.0; extra == "dev"
+Requires-Dist: matplotlib>=3.1.0; extra == "dev"
+Requires-Dist: mplcursors>=0.3; extra == "dev"
+Requires-Dist: pytest>=5.3.2; extra == "dev"
+Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
+Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
+Requires-Dist: requests>=2.20.0; extra == "dev"
+Requires-Dist: psutil>=5.9.5; extra == "dev"
+Requires-Dist: ruff>=0.3.0; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+Requires-Dist: pre-commit>=3.0.0; extra == "dev"
+Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
+Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
+Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
+Requires-Dist: docutils<0.22; extra == "dev"
+Requires-Dist: recommonmark>=0.7.1; extra == "dev"
+Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
+Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
+Requires-Dist: furo>=2022.3.4; extra == "dev"
 Provides-Extra: docs
-Requires-Dist: sphinx !=3.5.0,>=3.0.0 ; extra == 'docs'
-Requires-Dist: sphinxemoji >=0.1.8 ; extra == 'docs'
-Requires-Dist: sphinx-copybutton >=0.3.1 ; extra == 'docs'
-Requires-Dist: docutils <0.21 ; extra == 'docs'
-Requires-Dist: recommonmark >=0.7.1 ; extra == 'docs'
-Requires-Dist: sphinx-markdown-tables >=0.0.15 ; extra == 'docs'
-Requires-Dist: sphinx-tabs >=3.3.0 ; extra == 'docs'
-Requires-Dist: furo >=2022.3.4 ; extra == 'docs'
+Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
+Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
+Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
+Requires-Dist: docutils<0.22; extra == "docs"
+Requires-Dist: recommonmark>=0.7.1; extra == "docs"
+Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
+Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
+Requires-Dist: furo>=2022.3.4; extra == "docs"
+Provides-Extra: html
+Requires-Dist: weasyprint>=55.0; extra == "html"
 Provides-Extra: quality
-Requires-Dist: ruff >=0.1.5 ; extra == 'quality'
-Requires-Dist: mypy >=0.812 ; extra == 'quality'
-Requires-Dist: pre-commit >=2.17.0 ; extra == 'quality'
+Requires-Dist: ruff>=0.1.5; extra == "quality"
+Requires-Dist: mypy>=0.812; extra == "quality"
+Requires-Dist: pre-commit>=2.17.0; extra == "quality"
 Provides-Extra: testing
-Requires-Dist: pytest >=5.3.2 ; extra == 'testing'
-Requires-Dist: coverage[toml] >=4.5.4 ; extra == 'testing'
-Requires-Dist: hdf5storage >=0.1.18 ; extra == 'testing'
-Requires-Dist: onnxruntime >=1.11.0 ; extra == 'testing'
-Requires-Dist: requests >=2.20.0 ; extra == 'testing'
-Requires-Dist: psutil >=5.9.5 ; extra == 'testing'
+Requires-Dist: pytest>=5.3.2; extra == "testing"
+Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
+Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
+Requires-Dist: requests>=2.20.0; extra == "testing"
+Requires-Dist: psutil>=5.9.5; extra == "testing"
 Provides-Extra: tf
-Requires-Dist: tensorflow <2.16.0,>=2.11.0 ; extra == 'tf'
-Requires-Dist: tf2onnx <2.0.0,>=1.16.0 ; extra == 'tf'
+Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
+Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
+Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
 Provides-Extra: torch
-Requires-Dist: torch <3.0.0,>=1.12.0 ; extra == 'torch'
-Requires-Dist: torchvision >=0.13.0 ; extra == 'torch'
-Requires-Dist: onnx <3.0.0,>=1.12.0 ; extra == 'torch'
+Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
+Requires-Dist: torchvision>=0.15.0; extra == "torch"
+Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
+Provides-Extra: viz
+Requires-Dist: matplotlib>=3.1.0; extra == "viz"
+Requires-Dist: mplcursors>=0.3; extra == "viz"
 <p align="center">
   <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
 </p>
-[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
 **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
 pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
 # Image
 single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
-# Webpage
+# Webpage (requires `weasyprint` to be installed)
 webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
 # Multiple page images
 multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -372,6 +378,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 ```python
+# Display the result (requires matplotlib & mplcursors to be installed)
 result.show()
 ```
@@ -431,17 +438,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
 ### Prerequisites
-Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
-Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
-For MacOS users, you can install them as follows:
-```shell
-brew install cairo pango gdk-pixbuf libffi
-```
-For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
+Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 ### Latest release
@@ -460,12 +457,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
 pip install "python-doctr[tf]"
 # for PyTorch
 pip install "python-doctr[torch]"
+# optional dependencies for visualization, html, and contrib modules can be installed as follows:
+pip install "python-doctr[torch,viz,html,contib]"
 ```
 For MacBooks with M1 chip, you will need some additional packages or specific versions:
 - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
-- PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
+- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
 ### Developer mode
@@ -647,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
 ```python
 import requests
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
 with open('/path/to/your/doc.jpg', 'rb') as f:
-    data = f.read()
-response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("doc.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
 ```
 ### Example notebooks

python-doctr 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl