PyPI - deepdoctection - Versions diffs - 0.34__py3-none-any.whl → 0.35__py3-none-any.whl - Mend

deepdoctection 0.34py3-none-any.whl → 0.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (21) hide show

deepdoctection/__init__.py +6 -10
deepdoctection/analyzer/__init__.py +1 -0
deepdoctection/analyzer/_config.py +150 -0
deepdoctection/analyzer/dd.py +11 -335
deepdoctection/analyzer/factory.py +522 -0
deepdoctection/configs/conf_dd_one.yaml +1 -0
deepdoctection/datapoint/annotation.py +1 -1
deepdoctection/datapoint/convert.py +6 -4
deepdoctection/datapoint/image.py +16 -6
deepdoctection/datapoint/view.py +1 -0
deepdoctection/extern/pdftext.py +96 -5
deepdoctection/extern/tessocr.py +1 -0
deepdoctection/utils/env_info.py +30 -1
deepdoctection/utils/file_utils.py +19 -0
deepdoctection/utils/metacfg.py +12 -0
deepdoctection/utils/pdf_utils.py +86 -3
{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/METADATA +17 -11
{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/RECORD +21 -19
{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/WHEEL +1 -1
{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/LICENSE +0 -0
{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/top_level.txt +0 -0

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -24,21 +24,25 @@ from typing import Optional
 from lazy_imports import try_import
 from ..utils.context import save_tmp_file
-from ..utils.file_utils import get_pdfplumber_requirement
+from ..utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
 from ..utils.settings import LayoutType, ObjectTypes
 from ..utils.types import Requirement
 from .base import DetectionResult, ModelCategories, PdfMiner
-with try_import() as import_guard:
+with try_import() as pdfplumber_import_guard:
     from pdfplumber.pdf import PDF, Page
+with try_import() as pypdfmium_import_guard:
+    import pypdfium2.raw as pypdfium_c
+    from pypdfium2 import PdfDocument
-def _to_detect_result(word: dict[str, str]) -> DetectionResult:
+def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> DetectionResult:
     return DetectionResult(
         box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
         class_id=1,
         text=word["text"],
-        class_name=LayoutType.WORD,
+        class_name=class_name,
     )
@@ -49,6 +53,7 @@ class PdfPlumberTextDetector(PdfMiner):
         pdf_plumber = PdfPlumberTextDetector()
         df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             detection_results = pdf_plumber.predict(dp["pdf_bytes"])
@@ -61,6 +66,8 @@ class PdfPlumberTextDetector(PdfMiner):
         pipe = DoctectionPipe([text_extract])
         df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
         for dp in df:
             ...
@@ -87,7 +94,7 @@ class PdfPlumberTextDetector(PdfMiner):
                 self._page = PDF(fin).pages[0]
                 self._pdf_bytes = pdf_bytes
                 words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
-        detect_results = list(map(_to_detect_result, words))
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
         return detect_results
     @classmethod
@@ -113,3 +120,87 @@ class PdfPlumberTextDetector(PdfMiner):
     def get_category_names(self) -> tuple[ObjectTypes, ...]:
         return self.categories.get_categories(as_dict=False)
+class Pdfmium2TextDetector(PdfMiner):
+    """
+    Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
+        pdfmium2 = Pdfmium2TextDetector()
+        df = SerializerPdfDoc.load("path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            detection_results = pdfmium2.predict(dp["pdf_bytes"])
+    To use it in a more integrated way:
+        pdfmium2 = Pdfmium2TextDetector()
+        text_extract = TextExtractionService(pdfmium2)
+        pipe = DoctectionPipe([text_extract])
+        df = pipe.analyze(path="path/to/document.pdf")
+        df.reset_state()
+        for dp in df:
+            ...
+    """
+    def __init__(self) -> None:
+        self.name = "Pdfmium"
+        self.model_id = self.get_model_id()
+        self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
+        self._page: Optional[Page] = None
+    def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
+        """
+        Call pypdfium2 and returns detected text as detection results
+        :param pdf_bytes: bytes of a single pdf page
+        :return: A list of DetectionResult
+        """
+        pdf = PdfDocument(pdf_bytes)
+        page = pdf.get_page(0)
+        text = page.get_textpage()
+        words = []
+        height = page.get_height()
+        for obj in page.get_objects((pypdfium_c.FPDF_PAGEOBJ_TEXT,)):
+            box = obj.get_pos()
+            if all(x > 0 for x in box):
+                words.append(
+                    {
+                        "text": text.get_text_bounded(*box),
+                        "x0": box[0],
+                        "x1": box[2],
+                        "top": height - box[3],
+                        "bottom": height - box[1],
+                    }
+                )
+        detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
+        return detect_results
+    @classmethod
+    def get_requirements(cls) -> list[Requirement]:
+        return [get_pypdfium2_requirement()]
+    def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
+        """
+        Get the width and height of the full page
+        :param pdf_bytes: pdf_bytes generating the pdf
+        :return: width and height
+        """
+        if self._pdf_bytes == pdf_bytes and self._page is not None:
+            return self._page.bbox[2], self._page.bbox[3]  # pylint: disable=E1101
+        # if the pdf bytes is not equal to the cached pdf, will recalculate values
+        pdf = PdfDocument(pdf_bytes)
+        self._page = pdf.get_page(0)
+        self._pdf_bytes = pdf_bytes
+        if self._page is not None:
+            return self._page.get_width(), self._page.get_height()  # type: ignore
+        raise ValueError("Page not found")
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -421,6 +421,7 @@ class TesseractRotationTransformer(ImageTransformer):
     def __init__(self) -> None:
         self.name = fspath(_TESS_PATH) + "-rotation"
         self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
+        self.model_id = self.get_model_id()
     def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """

deepdoctection/utils/env_info.py CHANGED Viewed

@@ -20,6 +20,10 @@ Some useful function for collecting environment information.
 This is also the place where we give an overview of the important environment variables.
+For env variables with boolean character, use one of the following values:
+{"1", "True", "TRUE", "true", "yes"}
 `USE_TENSORFLOW
 USE_PYTORCH
 USE_CUDA
@@ -35,6 +39,12 @@ decide what image processing library the `viz_handler` should use. The default l
 to be installed separately. However, if both libraries have been detected `viz_handler` will opt for OpenCV.
 Use the variables to let choose `viz_handler` according to your preferences.
+`USE_DD_POPPLER
+USE_DD_PDFIUM`
+For PDF rendering we use PyPDFium2 as default but for legacy reasons, we also support Poppler. If you want to enforce
+Poppler set one to `USE_DD_POPPLER=True` and `USE_DD_PDFIUM=False` the other to False.
 `HF_CREDENTIALS`
 will be used by the `ModelDownloadManager` to pass your credentials if you have a model registered that resides in a
@@ -56,6 +66,7 @@ from typing import Optional
 import numpy as np
 from packaging import version
+from pypdf.errors import DependencyError
 from tabulate import tabulate
 from .file_utils import (
@@ -75,6 +86,7 @@ from .file_utils import (
     pdf_to_cairo_available,
     pdf_to_ppm_available,
     pdfplumber_available,
+    pypdfium2_available,
     pytorch_available,
     qpdf_available,
     scipy_available,
@@ -88,7 +100,7 @@ from .file_utils import (
 from .logger import LoggingRecord, logger
 from .types import KeyValEnvInfos, PathLikeOrStr
-__all__ = ["collect_env_info", "auto_select_viz_library", "ENV_VARS_TRUE"]
+__all__ = ["collect_env_info", "auto_select_viz_library", "auto_select_pdf_render_framework", "ENV_VARS_TRUE"]
 # pylint: disable=import-outside-toplevel
@@ -532,4 +544,21 @@ def auto_select_viz_library() -> None:
         os.environ["USE_DD_OPENCV"] = "False"
+def auto_select_pdf_render_framework() -> None:
+    """Setting pdf2image as default pdf rendering library if pdfium is not installed"""
+    # if env variables are already set, don't change them
+    if os.environ.get("USE_DD_POPPLER") or os.environ.get("USE_DD_PDFIUM"):
+        return
+    if pypdfium2_available():
+        os.environ["USE_DD_POPPLER"] = "False"
+        os.environ["USE_DD_PDFIUM"] = "True"
+        return
+    if pdf_to_cairo_available() or pdf_to_ppm_available():
+        os.environ["USE_DD_POPPLER"] = "True"
+        os.environ["USE_DD_PDFIUM"] = "False"
+        return
+    raise DependencyError("No pdf rendering library found. Please install Poppler or pdfium.")
 # pylint: enable=import-outside-toplevel

deepdoctection/utils/file_utils.py CHANGED Viewed

@@ -616,6 +616,25 @@ def get_pillow_requirement() -> Requirement:
     return "pillow", pillow_available(), _PILLOW_ERR_MSG
+# Pypdfium2
+_PYPDFIUM2_AVAILABLE = importlib.util.find_spec("pypdfium2") is not None
+_PYPDFIUM2_ERR_MSG = f"pypdfium2 must be installed. {_GENERIC_ERR_MSG}"
+def pypdfium2_available() -> bool:
+    """
+    Returns True if pypdfium2 is installed
+    """
+    return bool(_PYPDFIUM2_AVAILABLE)
+def get_pypdfium2_requirement() -> Requirement:
+    """
+    Return pypdfium2 requirement
+    """
+    return "pypdfium2", pypdfium2_available(), _PYPDFIUM2_ERR_MSG
 # SpaCy
 _SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
 _SPACY_ERR_MSG = f"SpaCy must be installed. {_GENERIC_ERR_MSG}"

deepdoctection/utils/metacfg.py CHANGED Viewed

@@ -18,6 +18,7 @@
 """
 Class AttrDict for maintaining configs and some functions for generating and saving AttrDict instances to .yaml files
 """
+from __future__ import annotations
 import pprint
 from typing import Any
@@ -105,6 +106,17 @@ class AttrDict:
                 v = eval(v)  # pylint: disable=C0103, W0123
             setattr(dic, key, v)
+    def overwrite_config(self, other_config: AttrDict) -> None:
+        """
+        Overwrite the current config with values from another config.
+        :param other_config: The other AttrDict instance to copy values from.
+        :raises AttributeError: If a key from other_config is not an attribute of self.
+        """
+        if self._freezed:
+            raise AttributeError("Config was freezed! Cannot overwrite config.")
+        self.from_dict(other_config.to_dict())
     def freeze(self, freezed: bool = True) -> None:
         """
         :param freezed: freeze the instance, so that no attributes can be added or changed

deepdoctection/utils/pdf_utils.py CHANGED Viewed

@@ -24,13 +24,16 @@ import subprocess
 import sys
 from errno import ENOENT
 from io import BytesIO
+from pathlib import Path
 from shutil import copyfile
-from typing import Generator, Optional
+from typing import Generator, Literal, Optional
+from lazy_imports import try_import
 from numpy import uint8
 from pypdf import PdfReader, PdfWriter, errors
 from .context import save_tmp_file, timeout_manager
+from .env_info import ENV_VARS_TRUE
 from .error import DependencyError, FileExtensionError
 from .file_utils import pdf_to_cairo_available, pdf_to_ppm_available, qpdf_available
 from .logger import LoggingRecord, logger
@@ -38,7 +41,17 @@ from .types import PathLikeOrStr, PixelValues
 from .utils import is_file_extension
 from .viz import viz_handler
-__all__ = ["decrypt_pdf_document", "get_pdf_file_reader", "get_pdf_file_writer", "PDFStreamer", "pdf_to_np_array"]
+with try_import() as pt_import_guard:
+    import pypdfium2
+__all__ = [
+    "decrypt_pdf_document",
+    "get_pdf_file_reader",
+    "get_pdf_file_writer",
+    "PDFStreamer",
+    "pdf_to_np_array",
+    "split_pdf",
+]
 def decrypt_pdf_document(path: PathLikeOrStr) -> bool:
@@ -234,7 +247,7 @@ def _run_poppler(poppler_args: list[str]) -> None:
             raise PopplerError(status=proc.returncode, message="Syntax Error: PDF cannot be read with Poppler")
-def pdf_to_np_array(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: int = 200) -> PixelValues:
+def pdf_to_np_array_poppler(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: int = 200) -> PixelValues:
     """
     Convert a single pdf page from its byte representation to a numpy array. This function will save the pdf as to a tmp
     file and then call poppler via `pdftoppm` resp. `pdftocairo` if the former is not available.
@@ -250,3 +263,73 @@ def pdf_to_np_array(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dp
         image = viz_handler.read_image(tmp_name + "-1.png")
     return image.astype(uint8)
+def pdf_to_np_array_pdfmium(pdf_bytes: bytes, dpi: int = 200) -> PixelValues:
+    """
+    Convert a single pdf page from its byte representation to a numpy array using pdfium.
+    :param pdf_bytes: Bytes representing the PDF file
+    :param dpi:  Image quality in DPI/dots-per-inch (default 200)
+    :return: numpy array
+    """
+    page = pypdfium2.PdfDocument(pdf_bytes)[0]
+    return page.render(scale=dpi * 1 / 72).to_numpy().astype(uint8)
+def pdf_to_np_array(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: int = 200) -> PixelValues:
+    """
+    Convert a single pdf page from its byte representation to a numpy array. This function will either use Poppler or
+    pdfium to render the pdf.
+    :param pdf_bytes: Bytes representing the PDF file
+    :param size: Size of the resulting image(s), uses (width, height) standard
+    :param dpi:  Image quality in DPI/dots-per-inch (default 200)
+    :return: numpy array
+    """
+    if os.environ.get("USE_DD_PDFIUM", "False") in ENV_VARS_TRUE:
+        if size is not None:
+            logger.warning(
+                LoggingRecord(
+                    f"pdf_to_np_array_pdfmium does not support the size parameter. Will use dpi = {dpi} instead."
+                )
+            )
+        return pdf_to_np_array_pdfmium(pdf_bytes, dpi)
+    return pdf_to_np_array_poppler(pdf_bytes, size, dpi)
+def split_pdf(
+    pdf_path: PathLikeOrStr, output_dir: PathLikeOrStr, file_type: Literal["image", "pdf"], dpi: int = 200
+) -> None:
+    """
+    Split a pdf into single pages. The pages are saved as single pdf/png files in a subfolder of the output directory.
+    :param pdf_path: Path to the pdf file
+    :param output_dir: Path to the output directory
+    :param file_type: Type of the output file. Either "image" or "pdf"
+    :param dpi: Image quality in DPI/dots-per-inch (default
+    """
+    pdf_path = Path(pdf_path)
+    filename = pdf_path.stem
+    output_dir = Path(output_dir)
+    file_dir = output_dir / filename
+    if not file_dir.exists():
+        os.makedirs(file_dir)
+    with open(pdf_path, "rb") as file:
+        pdf = PdfReader(file)
+        for i, page in enumerate(pdf.pages):
+            writer = PdfWriter()
+            writer.add_page(page)
+            if file_type == ".pdf":
+                with open(file_dir / f"{filename}_{i}.pdf", "wb") as out:
+                    writer.write(out)
+                    writer.close()
+            else:
+                with BytesIO() as buffer:
+                    writer.write(buffer)
+                    buffer.seek(0)
+                    np_image = pdf_to_np_array(buffer.getvalue(), dpi=dpi)
+                    viz_handler.write_image(file_dir / f"{filename}_{i}.png", np_image)
+                    writer.close()

{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: deepdoctection
-Version: 0.34
+Version: 0.35
 Summary: Repository for Document AI
 Home-page: https://github.com/deepdoctection/deepdoctection
 Author: Dr. Janis Meyer
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: catalogue ==2.0.10
-Requires-Dist: huggingface-hub >=0.12.0
+Requires-Dist: huggingface-hub <0.26,>=0.12.0
 Requires-Dist: importlib-metadata >=5.0.0
 Requires-Dist: jsonlines ==3.1.0
 Requires-Dist: lazy-imports ==0.3.1
@@ -27,6 +27,7 @@ Requires-Dist: numpy <2.0,>=1.21
 Requires-Dist: packaging >=20.0
 Requires-Dist: Pillow >=10.0.0
 Requires-Dist: pypdf >=3.16.0
+Requires-Dist: pypdfium2 >=4.30.0
 Requires-Dist: pyyaml >=6.0.1
 Requires-Dist: pyzmq >=16
 Requires-Dist: scipy >=1.13.1
@@ -63,7 +64,7 @@ Requires-Dist: mkdocstrings-python ; extra == 'docs'
 Requires-Dist: griffe ==0.25.0 ; extra == 'docs'
 Provides-Extra: pt
 Requires-Dist: catalogue ==2.0.10 ; extra == 'pt'
-Requires-Dist: huggingface-hub >=0.12.0 ; extra == 'pt'
+Requires-Dist: huggingface-hub <0.26,>=0.12.0 ; extra == 'pt'
 Requires-Dist: importlib-metadata >=5.0.0 ; extra == 'pt'
 Requires-Dist: jsonlines ==3.1.0 ; extra == 'pt'
 Requires-Dist: lazy-imports ==0.3.1 ; extra == 'pt'
@@ -73,6 +74,7 @@ Requires-Dist: numpy <2.0,>=1.21 ; extra == 'pt'
 Requires-Dist: packaging >=20.0 ; extra == 'pt'
 Requires-Dist: Pillow >=10.0.0 ; extra == 'pt'
 Requires-Dist: pypdf >=3.16.0 ; extra == 'pt'
+Requires-Dist: pypdfium2 >=4.30.0 ; extra == 'pt'
 Requires-Dist: pyyaml >=6.0.1 ; extra == 'pt'
 Requires-Dist: pyzmq >=16 ; extra == 'pt'
 Requires-Dist: scipy >=1.13.1 ; extra == 'pt'
@@ -95,7 +97,7 @@ Requires-Dist: pytest ==8.0.2 ; extra == 'test'
 Requires-Dist: pytest-cov ; extra == 'test'
 Provides-Extra: tf
 Requires-Dist: catalogue ==2.0.10 ; extra == 'tf'
-Requires-Dist: huggingface-hub >=0.12.0 ; extra == 'tf'
+Requires-Dist: huggingface-hub <0.26,>=0.12.0 ; extra == 'tf'
 Requires-Dist: importlib-metadata >=5.0.0 ; extra == 'tf'
 Requires-Dist: jsonlines ==3.1.0 ; extra == 'tf'
 Requires-Dist: lazy-imports ==0.3.1 ; extra == 'tf'
@@ -105,6 +107,7 @@ Requires-Dist: numpy <2.0,>=1.21 ; extra == 'tf'
 Requires-Dist: packaging >=20.0 ; extra == 'tf'
 Requires-Dist: Pillow >=10.0.0 ; extra == 'tf'
 Requires-Dist: pypdf >=3.16.0 ; extra == 'tf'
+Requires-Dist: pypdfium2 >=4.30.0 ; extra == 'tf'
 Requires-Dist: pyyaml >=6.0.1 ; extra == 'tf'
 Requires-Dist: pyzmq >=16 ; extra == 'tf'
 Requires-Dist: scipy >=1.13.1 ; extra == 'tf'
@@ -172,9 +175,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
  - Document layout analysis and table recognition now runs with
    [**Torchscript**](https://pytorch.org/docs/stable/jit.html) (CPU) as well and [**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2) is not required
    anymore for basic inference.
- - [**new**] More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
+ - More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
    (not contained in the built-in Analyzer).
- - [**new**] Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
+ - Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
    [**transformers**](https://github.com/huggingface/transformers).
    We have added a model wrapper for token classification with LiLT and added a some LiLT models to the model catalog
    that seem to look promising, especially if you want to train a model on non-english data. The training script for
@@ -263,7 +266,7 @@ documentation.
 ## Requirements
-![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection.png)
+![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_081124.png)
 Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
 separately.
@@ -272,13 +275,16 @@ separately.
 - Python >= 3.9
 - 1.13 <= PyTorch  **or** 2.11 <= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
 In general, if you want to train or fine-tune models, a GPU is required.
-- **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
-images.
 - With respect to the Deep Learning framework, you must decide between [Tensorflow](https://www.tensorflow.org/install?hl=en)
   and [PyTorch](https://pytorch.org/get-started/locally/).
 - [Tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine will be used through a Python wrapper. The core
   engine has to be installed separately.
+- For release `v.0.34.0` and below **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF
+  documents into images. For release `v.0.35.0` this dependency will be optional.
 The following overview shows the availability of the models in conjunction with the DL framework.
 | Task                                          | PyTorch | Torchscript    |  Tensorflow  |
@@ -396,8 +402,8 @@ to develop this framework.
 ## Problems
 We try hard to eliminate bugs. We also know that the code is not free of issues. We welcome all issues relevant to this
-repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 4
-to 6 weeks.
+repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 10
+to 12 weeks.
 ## If you like **deep**doctection ...

{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,11 @@
-deepdoctection/__init__.py,sha256=lgfD5PlxwSqTwMnKBtcpzKH9emJ4UtyWaWrpM9Pn0Ng,12596
+deepdoctection/__init__.py,sha256=RZpawNRTJPKNPFuONawVOsYWdr-rI8PPNXZhlPtOKtc,12580
 deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deepdoctection/analyzer/__init__.py,sha256=g86MeZz_BIQ2-b8kDIss7osPUNrFhT-Z3Eu7Wm02pFI,706
-deepdoctection/analyzer/dd.py,sha256=j3G6PFmXe9XBTwtu8-g9D3yAx7obaNzfZ2yl7rEOUqg,20234
+deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
+deepdoctection/analyzer/_config.py,sha256=0cWtaI2e3jHNhufHZAqMje0YTTDAogKAHVl4VpYojAo,4874
+deepdoctection/analyzer/dd.py,sha256=DUOhOtwipHw5nabYqn3WGR9aZcgP0ma_bi_tjf9xscw,5973
+deepdoctection/analyzer/factory.py,sha256=T9jxtVLNFhocbsfWIGLPfFrEv21zQJzM6VdFt0yxMyg,23849
 deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
-deepdoctection/configs/conf_dd_one.yaml,sha256=d4ZTMQ1oTIYMFctQAaQBKK6iQP4LsViUDrPvsnaLumo,2220
+deepdoctection/configs/conf_dd_one.yaml,sha256=orP-oeqtWbz5S9FJZJKxy1UqMwOYjL9g0DOX-wbamqU,2239
 deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
 deepdoctection/dataflow/__init__.py,sha256=CWRHMpmJaPk4xY_oIIFubCt-z11SguWrMWxHZ7rdrvY,845
 deepdoctection/dataflow/base.py,sha256=z4DCComSj5wStEPjtk0093cNNGfUMiDqx8dqz36nS_o,6221
@@ -14,11 +16,11 @@ deepdoctection/dataflow/parallel_map.py,sha256=8FhxJBWV-kjJrJ27jQtP3yYF6Ev6rz98w
 deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP2QYSsgs,4555
 deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
 deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
-deepdoctection/datapoint/annotation.py,sha256=3hDwNf3bm7qi0xnvfKn459hxZe4BdiLPiFt03hJBbUQ,22517
+deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
 deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
-deepdoctection/datapoint/convert.py,sha256=9L3YS89nGPAV8dqPZ-KOLVxvatj_zax2yP5RD-fuZCU,6718
-deepdoctection/datapoint/image.py,sha256=WyGcVYNrC-sv7bxODunEttxcQCFhplpWkWLLVQ266C0,32766
-deepdoctection/datapoint/view.py,sha256=Hdz67F8UtIkQjFW6U2mKeQ1WIdaXq4dOx95ymyQFLiU,42137
+deepdoctection/datapoint/convert.py,sha256=Gw2IjNiEotPu1yuMZqrIYB0mCAwafKt-VgMnrHj6S7U,6808
+deepdoctection/datapoint/image.py,sha256=EvZlVwJjMAcL1z8RNPBvZ8fwdJvkGuGpcFxCP1y26Go,33045
+deepdoctection/datapoint/view.py,sha256=7qSX4DQw9OPQQSKfSjV8e5i6jLyu6hOMceSKJAob2N8,42154
 deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
 deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
 deepdoctection/datasets/base.py,sha256=DT4i-d74sIEiUNC6UspIHNJuHSK0t1dBv7qwadg4rLw,22341
@@ -57,8 +59,8 @@ deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0
 deepdoctection/extern/hflayoutlm.py,sha256=KfoWx9_Rpa1Y2L51HLrYvenfWaTB4SVTmVJH00Cqb-s,56510
 deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
 deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
-deepdoctection/extern/pdftext.py,sha256=9EvDstMBeOeCFXM21wKaj5iTOUJSt8_50RfGdMcMjIA,4048
-deepdoctection/extern/tessocr.py,sha256=GCTcVHm6oOXS2Xq76j-xY9etRDDJA5qfqWJ5AJ-Kn8k,17400
+deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
+deepdoctection/extern/tessocr.py,sha256=tG7etMvZ-jHFdq-jJAHYMJii3ujDjMfAFYUsjBp3nKI,17444
 deepdoctection/extern/texocr.py,sha256=yMt5ZzKtsjd7ogrcNXba7zccGGGF9LXK194EtER6YNQ,5804
 deepdoctection/extern/tpdetect.py,sha256=yAk1duQdoX-_pHLHgvhU7OOSiDy863q6XUMpjpYR734,8477
 deepdoctection/extern/pt/__init__.py,sha256=3Cu0ZHjbYsJomru7-RQXEHihEQLegZrmLetlHiqS58I,742
@@ -124,23 +126,23 @@ deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ6
 deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
 deepdoctection/utils/context.py,sha256=VSnJnTtRGuq3w-0-syTf9DXOhR7WsPvWLLWTxKIBYec,4186
 deepdoctection/utils/develop.py,sha256=4HyTarkFbJwctL-Hgu1TU_LSJppHvaroDbcyHsxhIA8,3444
-deepdoctection/utils/env_info.py,sha256=Fm6A4XfJsYQmW5TzPmwn7_jh9qx5jqYlt00k9NK0yR8,18007
+deepdoctection/utils/env_info.py,sha256=TnCA-LOTj4WIHd9yvn1AaoPWsLmPgc42l-BJmGV6zmM,19147
 deepdoctection/utils/error.py,sha256=_3q9VepKfEhsM3H033_Fu0hwBzMSjsWALsjyJbGAZr8,2367
-deepdoctection/utils/file_utils.py,sha256=koYsfHtl0-nh8T9nUb215Rc1X-WDvk2gEjyw-YJVZ34,19019
+deepdoctection/utils/file_utils.py,sha256=IRElrcND0YEiU1QELw5hfXeNA39uE2_nyzh9-X7YcxI,19477
 deepdoctection/utils/fs.py,sha256=C4ktrzjoVtX9kgycv5YrEigDI9byi65b6_D0aKsGM4Y,10161
 deepdoctection/utils/identifier.py,sha256=QkNaGGqPynHwDPnd3_m8iur4Cv64rcQa7qolCE7Qphk,2159
 deepdoctection/utils/logger.py,sha256=J0OVKiXP_2A82MWbbJoOeMEJ-75aZu5npgaS_yI6mVA,10003
-deepdoctection/utils/metacfg.py,sha256=AGAE-KOymOLsarpUBBYawpVSXImvJyUeOD4LD2W_7Yo,5196
+deepdoctection/utils/metacfg.py,sha256=hD76KQ_RnD_5B02qLI2Zxf3WfnsnXhEI_KUTKpw91RI,5711
 deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
-deepdoctection/utils/pdf_utils.py,sha256=H5BdLXvDlvTEfb-3zcRjy207PeqEnaymkG122R7UA4o,8635
+deepdoctection/utils/pdf_utils.py,sha256=OAQjE9xHVNcDsFqAvX47Lu-mgmoMpVXqIf5pOK8AwxY,11595
 deepdoctection/utils/settings.py,sha256=k6OyuWbj-IPeaO9zT9RZ-5Yad1wNhWGYqGLZdtgXAZY,12464
 deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
 deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F2GPU,8502
 deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
 deepdoctection/utils/utils.py,sha256=ANzyIX6AY1yc-4gcn6yxksV84sPrJDaUurUNVatAFu8,5168
 deepdoctection/utils/viz.py,sha256=Xm6pKlhM29UWBBGZHlWFl9XYFDAqaYDdwHXwe26Hvqo,25728
-deepdoctection-0.34.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
-deepdoctection-0.34.dist-info/METADATA,sha256=YJ5XJnf7zMlDmr6f7vqvFNL11hy-ZEz8VbdYgii0AQo,19169
-deepdoctection-0.34.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-deepdoctection-0.34.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
-deepdoctection-0.34.dist-info/RECORD,,
+deepdoctection-0.35.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
+deepdoctection-0.35.dist-info/METADATA,sha256=B6pPQjRYWcqd1p-3ul3PhflYOcKq2ZpP5D-i8kr7qgk,19403
+deepdoctection-0.35.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+deepdoctection-0.35.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
+deepdoctection-0.35.dist-info/RECORD,,

{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/LICENSE RENAMED Viewed

File without changes

{deepdoctection-0.34.dist-info → deepdoctection-0.35.dist-info}/top_level.txt RENAMED Viewed

File without changes

deepdoctection 0.34__py3-none-any.whl → 0.35__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.34py3-none-any.whl → 0.35py3-none-any.whl