PyPI - deepdoctection - Versions diffs - 0.44.1__py3-none-any.whl → 0.46__py3-none-any.whl - Mend

deepdoctection 0.44.1py3-none-any.whl → 0.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (33) hide show

deepdoctection/__init__.py +7 -3
deepdoctection/analyzer/config.py +44 -0
deepdoctection/analyzer/factory.py +264 -7
deepdoctection/configs/profiles.jsonl +2 -1
deepdoctection/dataflow/parallel_map.py +7 -1
deepdoctection/datapoint/box.py +5 -5
deepdoctection/datapoint/image.py +5 -5
deepdoctection/datapoint/view.py +73 -52
deepdoctection/eval/cocometric.py +1 -0
deepdoctection/extern/__init__.py +1 -0
deepdoctection/extern/base.py +8 -1
deepdoctection/extern/d2detect.py +1 -1
deepdoctection/extern/doctrocr.py +18 -2
deepdoctection/extern/fastlang.py +2 -2
deepdoctection/extern/hflayoutlm.py +17 -10
deepdoctection/extern/hflm.py +432 -7
deepdoctection/extern/tessocr.py +17 -1
deepdoctection/pipe/language.py +4 -4
deepdoctection/pipe/lm.py +7 -3
deepdoctection/pipe/order.py +12 -6
deepdoctection/pipe/refine.py +10 -1
deepdoctection/pipe/text.py +6 -0
deepdoctection/pipe/transform.py +3 -0
deepdoctection/utils/file_utils.py +34 -5
deepdoctection/utils/logger.py +38 -1
deepdoctection/utils/settings.py +2 -0
deepdoctection/utils/transform.py +43 -18
deepdoctection/utils/viz.py +24 -15
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/METADATA +16 -21
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/RECORD +33 -33
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/WHEEL +0 -0
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/refine.py CHANGED Viewed

@@ -27,7 +27,7 @@ from dataclasses import asdict
 from itertools import chain, product
 from typing import DefaultDict, Optional, Sequence, Union
-import networkx as nx  # type: ignore
+from lazy_imports import try_import
 from ..datapoint.annotation import ImageAnnotation
 from ..datapoint.box import merge_boxes
@@ -35,10 +35,15 @@ from ..datapoint.image import Image, MetaAnnotation
 from ..extern.base import DetectionResult
 from ..mapper.maputils import MappingContextManager
 from ..utils.error import ImageError
+from ..utils.file_utils import networkx_available
 from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType, get_type
 from .base import PipelineComponent
 from .registry import pipeline_component_registry
+with try_import() as import_guard:
+    import networkx as nx  # type: ignore
 __all__ = ["TableSegmentationRefinementService", "generate_html_string"]
@@ -441,6 +446,10 @@ class TableSegmentationRefinementService(PipelineComponent):
             table_names: Sequence of table object types.
             cell_names: Sequence of cell object types.
         """
+        if not networkx_available():
+            raise ModuleNotFoundError(
+                "TableSegmentationRefinementService requires networkx. Please install separately."
+            )
         self.table_name = table_names
         self.cell_names = cell_names
         super().__init__("table_segment_refine")

deepdoctection/pipe/text.py CHANGED Viewed

@@ -129,6 +129,12 @@ class TextExtractionService(PipelineComponent):
                     width, height = self.predictor.get_width_height(predictor_input)  # type: ignore
                 for detect_result in detect_result_list:
+                    if width is not None and height is not None:
+                        box = detect_result.box
+                        if box:
+                            if box[0] >= width or box[1] >= height or box[2] >= width or box[3] >= height:
+                                continue
                     if isinstance(self.predictor, TextRecognizer):
                         detect_ann_id = detect_result.uuid
                     else:

deepdoctection/pipe/transform.py CHANGED Viewed

@@ -77,6 +77,9 @@ class SimpleTransformService(PipelineComponent):
                         score=ann.score,
                         class_id=ann.category_id,
                         uuid=ann.annotation_id,
+                        angle=detection_result.angle,
+                        image_width=dp.width,  # we need the original width, not the transformed width
+                        image_height=dp.height,  # same with height
                     )
                 )
             output_detect_results = self.transform_predictor.transform_coords(detect_results)

deepdoctection/utils/file_utils.py CHANGED Viewed

@@ -8,6 +8,7 @@
 Utilities for maintaining dependencies and dealing with external library packages. Parts of this file is adapted from
 <https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py>
 """
+import importlib.metadata
 import importlib.util
 import multiprocessing as mp
 import string
@@ -17,7 +18,6 @@ from shutil import which
 from types import ModuleType
 from typing import Any, Union, no_type_check
-import importlib_metadata
 import numpy as np
 from packaging import version
@@ -72,9 +72,9 @@ def get_tf_version() -> str:
         for pkg in candidates:
             try:
-                tf_version = importlib_metadata.version(pkg)
+                tf_version = importlib.metadata.version(pkg)
                 break
-            except importlib_metadata.PackageNotFoundError:
+            except importlib.metadata.PackageNotFoundError:
                 pass
     return tf_version
@@ -175,6 +175,19 @@ def get_pytorch_requirement() -> Requirement:
     return "torch", pytorch_available(), _PYTORCH_ERR_MSG
+_PYZMQ_AVAILABLE = importlib.util.find_spec("zmq") is not None
+def pyzmq_available() -> bool:
+    """
+    Returns whether pyzmq is installed.
+    Returns:
+        bool: True if pyzmq is installed, False otherwise.
+    """
+    return bool(_PYZMQ_AVAILABLE)
 # lxml
 _LXML_AVAILABLE = importlib.util.find_spec("lxml") is not None
 _LXML_ERR_MSG = f"lxml must be installed. {_GENERIC_ERR_MSG}"
@@ -232,7 +245,7 @@ _DISTANCE_ERR_MSG = f"distance must be installed. {_GENERIC_ERR_MSG}"
 def distance_available() -> bool:
     """
-    Returns whether `distance` is available.
+    Returns True if `distance` is available.
     Returns:
         bool: `True` if `distance` is available, False otherwise.
@@ -250,6 +263,22 @@ def get_distance_requirement() -> Requirement:
     return "distance", distance_available(), _DISTANCE_ERR_MSG
+# networkx
+_NETWORKX_AVAILABLE = importlib.util.find_spec("networkx") is not None
+def networkx_available() -> bool:
+    """
+    Checks if networkx is installed.
+    Returns:
+        bool: True if networkx is installed, False otherwise.
+    :return:
+    """
+    return bool(_NETWORKX_AVAILABLE)
+# numpy
 _NUMPY_V1_ERR_MSG = "numpy v1 must be installed."
@@ -263,7 +292,7 @@ def numpy_v1_available() -> bool:
     Returns:
         True if the installed NumPy version is 1, otherwise False
     """
-    major_version = np.__version__.split('.', maxsplit=1)[0]
+    major_version = np.__version__.split(".", maxsplit=1)[0]
     print(f"major version: {major_version}")
     if major_version in (1, "1"):
         return True

deepdoctection/utils/logger.py CHANGED Viewed

@@ -143,6 +143,43 @@ class FileFormatter(logging.Formatter):
 _LOG_DIR = None
+def _coerce_log_level(val: Any) -> Union[int, str]:
+    """Normalize environment log level values.
+    Accepts integer values (e.g., ``20``), numeric strings (``"20"``),
+    or names case-insensitively (``"info"``, ``"Warn"``, ...). Returns
+    either an integer level number or a valid uppercase level name
+    accepted by the :mod:`logging` module.
+    Args:
+        val: The raw value from the environment variable ``LOG_LEVEL``.
+    Returns:
+        int | str: The corresponding logging level as an int or an
+        uppercase string. Defaults to ``"INFO"`` if the input is invalid.
+    """
+    if isinstance(val, int):
+        return val
+    if val is None:
+        return "INFO"
+    s = str(val).strip()
+    if s.isdigit():
+        return int(s)
+    name = s.upper()
+    if name == "WARN":
+        name = "WARNING"
+    if name in logging._nameToLevel:  # pylint: disable=W0212
+        return name
+    lvl = logging.getLevelName(name)
+    return lvl if isinstance(lvl, int) else "INFO"
+# resolve level from LOG_LEVEL only
+_ENV_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
+_RESOLVED_LOG_LEVEL = _coerce_log_level(_ENV_LOG_LEVEL)
 _CONFIG_DICT: dict[str, Any] = {
     "version": 1,
     "disable_existing_loggers": False,
@@ -155,7 +192,7 @@ _CONFIG_DICT: dict[str, Any] = {
     },
     "root": {
         "handlers": ["streamhandler"],
-        "level": os.environ.get("LOG_LEVEL", "INFO"),
+        "level": _RESOLVED_LOG_LEVEL,
         "propagate": os.environ.get("LOG_PROPAGATE", "False") in ENV_VARS_TRUE,
     },
 }

deepdoctection/utils/settings.py CHANGED Viewed

@@ -108,6 +108,7 @@ class DocumentType(ObjectTypes):
     GOVERNMENT_TENDERS = "government_tenders"
     MANUALS = "manuals"
     PATENTS = "patents"
+    BANK_STATEMENT = "bank_statement"
 @object_types_registry.register("LayoutType")
@@ -296,6 +297,7 @@ class Languages(ObjectTypes):
     BOSNIAN = "bos"
     NORWEGIAN_NOVOSIBIRSK = "nno"
     URDU = "urd"
+    SWAHILI = "swa"
     NOT_DEFINED = "nn"

deepdoctection/utils/transform.py CHANGED Viewed

@@ -408,8 +408,35 @@ class RotationTransform(BaseTransform):
             angle: Angle to rotate the image. Must be one of 90, 180, 270, or 360 degrees.
         """
         self.angle = angle
-        self.image_width: Optional[int] = None
-        self.image_height: Optional[int] = None
+        self.image_width: Optional[Union[int, float]] = None
+        self.image_height: Optional[Union[int, float]] = None
+    def set_angle(self, angle: Literal[90, 180, 270, 360]) -> None:
+        """
+        Set angle
+        Args:
+            angle: One of 90, 180, 270, or 360 degrees.
+        """
+        self.angle = angle
+    def set_image_width(self, image_width: Union[int, float]) -> None:
+        """
+        Set image width
+        Args:
+            image_width: Either a positive integer or 1.
+        """
+        self.image_width = image_width
+    def set_image_height(self, image_height: Union[int, float]) -> None:
+        """
+        Set image height
+        Args:
+            image_height: Either a positive integer or 1.
+        """
+        self.image_height = image_height
     def apply_image(self, img: PixelValues) -> PixelValues:
         """
@@ -442,17 +469,16 @@ class RotationTransform(BaseTransform):
             raise ValueError("Initialize image_width and image_height first")
         if self.angle == 90:
-            coords[:, [0, 1, 2, 3]] = coords[:, [1, 0, 3, 2]]
+            self.image_width = self.image_height
+            coords[:, [0, 1, 2, 3]] = coords[:, [1, 2, 3, 0]]
             coords[:, [1, 3]] = self.image_width - coords[:, [1, 3]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [0, 3, 2, 1]]
         elif self.angle == 180:
-            coords[:, [0, 2]] = self.image_width - coords[:, [0, 2]]
-            coords[:, [1, 3]] = self.image_height - coords[:, [1, 3]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [2, 3, 0, 1]]
+            coords[:, [0, 2]] = self.image_width - coords[:, [2, 0]]
+            coords[:, [1, 3]] = self.image_height - coords[:, [3, 1]]
         elif self.angle == 270:
-            coords[:, [0, 1, 2, 3]] = coords[:, [1, 0, 3, 2]]
+            self.image_height = self.image_width
+            coords[:, [0, 1, 2, 3]] = coords[:, [3, 0, 1, 2]]
             coords[:, [0, 2]] = self.image_height - coords[:, [0, 2]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [2, 1, 0, 3]]
         return coords
@@ -473,17 +499,16 @@ class RotationTransform(BaseTransform):
             raise ValueError("Initialize image_width and image_height first")
         if self.angle == 90:
-            coords[:, [0, 1, 2, 3]] = coords[:, [1, 0, 3, 2]]
-            coords[:, [0, 2]] = self.image_width - coords[:, [0, 2]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [2, 1, 0, 3]]
+            self.image_height = self.image_width
+            coords[:, [0, 1, 2, 3]] = coords[:, [3, 0, 1, 2]]
+            coords[:, [0, 2]] = self.image_height - coords[:, [0, 2]]
         elif self.angle == 180:
-            coords[:, [0, 2]] = self.image_width - coords[:, [0, 2]]
-            coords[:, [1, 3]] = self.image_height - coords[:, [1, 3]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [2, 3, 0, 1]]
+            coords[:, [0, 2]] = self.image_width - coords[:, [2, 0]]
+            coords[:, [1, 3]] = self.image_height - coords[:, [3, 1]]
         elif self.angle == 270:
-            coords[:, [0, 1, 2, 3]] = coords[:, [1, 0, 3, 2]]
-            coords[:, [1, 3]] = self.image_height - coords[:, [1, 3]]
-            coords[:, [0, 1, 2, 3]] = coords[:, [0, 3, 2, 1]]
+            self.image_width = self.image_height
+            coords[:, [0, 1, 2, 3]] = coords[:, [1, 2, 3, 0]]
+            coords[:, [1, 3]] = self.image_width - coords[:, [1, 3]]
         return coords
     def clone(self) -> RotationTransform:

deepdoctection/utils/viz.py CHANGED Viewed

@@ -20,10 +20,11 @@ Visualisation utils. Copied and pasted from
 """
 import base64
+import hashlib
 import os
 import sys
 from io import BytesIO
-from typing import Any, Optional, Sequence, no_type_check
+from typing import Any, Optional, Sequence, Tuple, Union, no_type_check
 import numpy as np
 import numpy.typing as npt
@@ -177,17 +178,23 @@ _COLORS = (
 )
-def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
+def random_color(
+    rgb: bool = True, maximum: int = 255, deterministic_input_str: Optional[str] = None
+) -> tuple[int, int, int]:
     """
     Args:
         rgb: Whether to return RGB colors or BGR colors.
         maximum: Either 255 or 1.
+        deterministic_input_str: A string to use for deterministic color generation.
     Returns:
         A tuple of three integers representing the color.
     """
-    idx = np.random.randint(0, len(_COLORS))
+    if deterministic_input_str:
+        hash_digest = hashlib.md5(deterministic_input_str.encode("utf-8")).hexdigest()
+        idx = int(hash_digest, 16) % len(_COLORS)
+    else:
+        idx = np.random.randint(0, len(_COLORS))
     ret = _COLORS[idx] * maximum
     if not rgb:
         ret = ret[::-1]
@@ -197,7 +204,7 @@ def random_color(rgb: bool = True, maximum: int = 255) -> tuple[int, int, int]:
 def draw_boxes(
     np_image: PixelValues,
     boxes: npt.NDArray[float32],
-    category_names_list: Optional[list[Optional[str]]] = None,
+    category_names_list: Optional[list[Tuple[Union[str, None], Union[str, None]]]] = None,
     color: Optional[BGR] = None,
     font_scale: float = 1.0,
     rectangle_thickness: int = 4,
@@ -210,7 +217,8 @@ def draw_boxes(
     Args:
         np_image: Image as `np.ndarray`.
         boxes: A numpy array of shape Nx4 where each row is `[x1, y1, x2, y2]`.
-        category_names_list: List of N category names.
+        category_names_list: List of N tuples. The first element is the category name, whereas the second element is
+                             the value, that is going to be displayed in the text box..
         color: A 3-tuple BGR color (in range `[0, 255]`).
         font_scale: Font scale of text box.
         rectangle_thickness: Thickness of bounding box.
@@ -230,13 +238,14 @@ def draw_boxes(
     category_to_color = {}
     if box_color_by_category and category_names_list is not None:
         category_names = set(category_names_list)
-        category_to_color = {category: random_color() for category in category_names}
+        category_to_color = {
+            category[1]: random_color(deterministic_input_str=category[1]) for category in category_names
+        }
     boxes = np.array(boxes, dtype="int32")
     if category_names_list is not None:
         assert len(category_names_list) == len(boxes), f"{len(category_names_list)} != {len(boxes)}"
     else:
-        category_names_list = [None] * len(boxes)
+        category_names_list = [(None, None)] * len(boxes)
     areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
     sorted_inds = np.argsort(-areas)  # draw large ones first
     assert areas.min() > 0, areas.min()
@@ -255,12 +264,12 @@ def draw_boxes(
         np_image = cv2.cvtColor(np_image, cv2.COLOR_GRAY2BGR).astype(np.uint8)
     for i in sorted_inds:
         box = boxes[i, :]
-        choose_color = category_to_color.get(category_names_list[i]) if category_to_color is not None else color
+        choose_color = category_to_color.get(category_names_list[i][1]) if category_to_color is not None else color
         if choose_color is None:
             choose_color = random_color()
-        if category_names_list[i] is not None:
+        if category_names_list[i][0] is not None:
             np_image = viz_handler.draw_text(
-                np_image, (box[0], box[1]), category_names_list[i], color=choose_color, font_scale=font_scale
+                np_image, (box[0], box[1]), category_names_list[i][0], color=choose_color, font_scale=font_scale
             )
         np_image = viz_handler.draw_rectangle(
             np_image, (box[0], box[1], box[2], box[3]), choose_color, rectangle_thickness
@@ -423,7 +432,7 @@ class VizPackageHandler:
     @staticmethod
     def _cv2_read_image(path: PathLikeOrStr) -> PixelValues:
-        return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)
+        return cv2.imread(os.fspath(path), cv2.IMREAD_COLOR).astype(np.uint8)  # type: ignore
     @staticmethod
     def _pillow_read_image(path: PathLikeOrStr) -> PixelValues:
@@ -517,7 +526,7 @@ class VizPackageHandler:
     @staticmethod
     def _cv2_convert_b64_to_np(image: B64Str) -> PixelValues:
         np_array = np.fromstring(base64.b64decode(image), np.uint8)  # type: ignore
-        np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
+        np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)  # type: ignore
         return np_array.astype(uint8)
     @staticmethod
@@ -543,7 +552,7 @@ class VizPackageHandler:
     def _cv2_convert_bytes_to_np(image_bytes: bytes) -> PixelValues:
         np_array = np.frombuffer(image_bytes, np.uint8)
         np_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
-        return np_image
+        return np_image  # type: ignore
     @staticmethod
     def _pillow_convert_bytes_to_np(image_bytes: bytes) -> PixelValues:

{deepdoctection-0.44.1.dist-info → deepdoctection-0.46.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepdoctection
-Version: 0.44.1
+Version: 0.46
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer
@@ -19,18 +19,15 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: catalogue==2.0.10
 Requires-Dist: huggingface_hub>=0.26.0
-Requires-Dist: importlib-metadata>=5.0.0
 Requires-Dist: jsonlines==3.1.0
 Requires-Dist: lazy-imports==0.3.1
 Requires-Dist: mock==4.0.3
-Requires-Dist: networkx>=2.7.1
-Requires-Dist: numpy<2.0,>=1.21
+Requires-Dist: numpy>2.0
 Requires-Dist: packaging>=20.0
 Requires-Dist: Pillow>=10.0.0
 Requires-Dist: pypdf>=6.0.0
 Requires-Dist: pypdfium2>=4.30.0
 Requires-Dist: pyyaml>=6.0.1
-Requires-Dist: pyzmq>=16
 Requires-Dist: scipy>=1.13.1
 Requires-Dist: termcolor>=1.1
 Requires-Dist: tabulate>=0.7.7
@@ -38,18 +35,15 @@ Requires-Dist: tqdm>=4.64.0
 Provides-Extra: tf
 Requires-Dist: catalogue==2.0.10; extra == "tf"
 Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
-Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
 Requires-Dist: jsonlines==3.1.0; extra == "tf"
 Requires-Dist: lazy-imports==0.3.1; extra == "tf"
 Requires-Dist: mock==4.0.3; extra == "tf"
-Requires-Dist: networkx>=2.7.1; extra == "tf"
-Requires-Dist: numpy<2.0,>=1.21; extra == "tf"
+Requires-Dist: numpy>2.0; extra == "tf"
 Requires-Dist: packaging>=20.0; extra == "tf"
 Requires-Dist: Pillow>=10.0.0; extra == "tf"
 Requires-Dist: pypdf>=6.0.0; extra == "tf"
 Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
 Requires-Dist: pyyaml>=6.0.1; extra == "tf"
-Requires-Dist: pyzmq>=16; extra == "tf"
 Requires-Dist: scipy>=1.13.1; extra == "tf"
 Requires-Dist: termcolor>=1.1; extra == "tf"
 Requires-Dist: tabulate>=0.7.7; extra == "tf"
@@ -58,30 +52,28 @@ Requires-Dist: tensorpack==0.11; extra == "tf"
 Requires-Dist: protobuf==3.20.1; extra == "tf"
 Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
 Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
-Requires-Dist: python-doctr==0.9.0; extra == "tf"
+Requires-Dist: python-doctr==0.10.0; extra == "tf"
 Requires-Dist: pycocotools>=2.0.2; extra == "tf"
 Requires-Dist: boto3==1.34.102; extra == "tf"
 Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
-Requires-Dist: fasttext-wheel; extra == "tf"
+Requires-Dist: pyzmq>=16; extra == "tf"
 Requires-Dist: jdeskew>=0.2.2; extra == "tf"
 Requires-Dist: apted==1.0.3; extra == "tf"
 Requires-Dist: distance==0.1.3; extra == "tf"
 Requires-Dist: lxml>=4.9.1; extra == "tf"
+Requires-Dist: networkx>=2.7.1; extra == "tf"
 Provides-Extra: pt
 Requires-Dist: catalogue==2.0.10; extra == "pt"
 Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
-Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
 Requires-Dist: jsonlines==3.1.0; extra == "pt"
 Requires-Dist: lazy-imports==0.3.1; extra == "pt"
 Requires-Dist: mock==4.0.3; extra == "pt"
-Requires-Dist: networkx>=2.7.1; extra == "pt"
-Requires-Dist: numpy<2.0,>=1.21; extra == "pt"
+Requires-Dist: numpy>2.0; extra == "pt"
 Requires-Dist: packaging>=20.0; extra == "pt"
 Requires-Dist: Pillow>=10.0.0; extra == "pt"
 Requires-Dist: pypdf>=6.0.0; extra == "pt"
 Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
 Requires-Dist: pyyaml>=6.0.1; extra == "pt"
-Requires-Dist: pyzmq>=16; extra == "pt"
 Requires-Dist: scipy>=1.13.1; extra == "pt"
 Requires-Dist: termcolor>=1.1; extra == "pt"
 Requires-Dist: tabulate>=0.7.7; extra == "pt"
@@ -89,15 +81,16 @@ Requires-Dist: tqdm>=4.64.0; extra == "pt"
 Requires-Dist: timm>=0.9.16; extra == "pt"
 Requires-Dist: transformers>=4.48.0; extra == "pt"
 Requires-Dist: accelerate>=0.29.1; extra == "pt"
-Requires-Dist: python-doctr==0.9.0; extra == "pt"
+Requires-Dist: python-doctr==0.10.0; extra == "pt"
 Requires-Dist: pycocotools>=2.0.2; extra == "pt"
 Requires-Dist: boto3==1.34.102; extra == "pt"
 Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
-Requires-Dist: fasttext-wheel; extra == "pt"
+Requires-Dist: pyzmq>=16; extra == "pt"
 Requires-Dist: jdeskew>=0.2.2; extra == "pt"
 Requires-Dist: apted==1.0.3; extra == "pt"
 Requires-Dist: distance==0.1.3; extra == "pt"
 Requires-Dist: lxml>=4.9.1; extra == "pt"
+Requires-Dist: networkx>=2.7.1; extra == "pt"
 Provides-Extra: docs
 Requires-Dist: tensorpack==0.11; extra == "docs"
 Requires-Dist: boto3==1.34.102; extra == "docs"
@@ -183,7 +176,8 @@ It also provides a framework for training, evaluating and inferencing Document A
   [**LiLT**](https://github.com/jpWang/LiLT) and selected
   [**Bert**](https://huggingface.co/docs/transformers/model_doc/xlm-roberta)-style including features like sliding windows.
 - Text mining for native PDFs with [**pdfplumber**](https://github.com/jsvine/pdfplumber),
-- Language detection with [**fastText**](https://github.com/facebookresearch/fastText),
+- Language detection with `papluca/xlm-roberta-base-language-detection`. [**fastText**](https://github.com/facebookresearch/fastText) is still available but
+  but will be removed in a future version.
 - Deskewing and rotating images with [**jdeskew**](https://github.com/phamquiluan/jdeskew).
 - Fine-tuning and evaluation tools.
 - Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
@@ -294,7 +288,7 @@ alt="text" width="40%">
 - Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
 - Python >= 3.9
-- 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
+- 2.6 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
   Tensorflow support will be stopped from Python 3.11 onwards.
 - To fine-tune models, a GPU is recommended.
@@ -321,7 +315,7 @@ For a simple setup which is enough to parse documents with the default setting,
 ```
 pip install transformers
-pip install python-doctr==0.9.0
+pip install python-doctr==0.10.0 # If you use Python 3.10 or higher you can use the latest version.
 pip install deepdoctection
 ```
@@ -329,8 +323,9 @@ pip install deepdoctection
 ```
 pip install tensorpack
-pip install python-doctr==0.9.0
 pip install deepdoctection
+pip install "numpy>=1.21,<2.0" --upgrade --force-reinstall  # because TF 2.11 does not support numpy 2.0
+pip install "python-doctr==0.9.0"
 ```
 Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).

deepdoctection 0.44.1__py3-none-any.whl → 0.46__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.44.1py3-none-any.whl → 0.46py3-none-any.whl