PyPI - docling - Versions diffs - 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

docling 2.2.0py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

docling/backend/asciidoc_backend.py +0 -4
docling/backend/html_backend.py +53 -56
docling/backend/md_backend.py +59 -6
docling/backend/msword_backend.py +9 -15
docling/datamodel/base_models.py +1 -1
docling/datamodel/document.py +3 -1
docling/datamodel/settings.py +15 -1
docling/document_converter.py +12 -8
docling/models/base_model.py +4 -1
docling/models/base_ocr_model.py +21 -4
docling/models/ds_glm_model.py +27 -11
docling/models/easyocr_model.py +49 -39
docling/models/layout_model.py +87 -61
docling/models/page_assemble_model.py +102 -100
docling/models/page_preprocessing_model.py +25 -7
docling/models/table_structure_model.py +125 -90
docling/models/tesseract_ocr_cli_model.py +62 -52
docling/models/tesseract_ocr_model.py +57 -45
docling/pipeline/base_pipeline.py +68 -69
docling/pipeline/simple_pipeline.py +8 -11
docling/pipeline/standard_pdf_pipeline.py +59 -56
docling/utils/profiling.py +62 -0
{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/METADATA +5 -4
docling-2.3.0.dist-info/RECORD +45 -0
docling-2.2.0.dist-info/RECORD +0 -44
{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/LICENSE +0 -0
{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/WHEEL +0 -0
{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/entry_points.txt +0 -0

docling/pipeline/base_pipeline.py CHANGED Viewed

@@ -19,6 +19,7 @@ from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BaseEnrichmentModel
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import chunkify
 _log = logging.getLogger(__name__)
@@ -35,13 +36,16 @@ class BasePipeline(ABC):
         _log.info(f"Processing document {in_doc.file.name}")
         try:
-            # These steps are building and assembling the structure of the
-            # output DoclingDocument
-            conv_res = self._build_document(in_doc, conv_res)
-            conv_res = self._assemble_document(in_doc, conv_res)
-            # From this stage, all operations should rely only on conv_res.output
-            conv_res = self._enrich_document(in_doc, conv_res)
-            conv_res.status = self._determine_status(in_doc, conv_res)
+            with TimeRecorder(
+                conv_res, "pipeline_total", scope=ProfilingScope.DOCUMENT
+            ):
+                # These steps are building and assembling the structure of the
+                # output DoclingDocument
+                conv_res = self._build_document(conv_res)
+                conv_res = self._assemble_document(conv_res)
+                # From this stage, all operations should rely only on conv_res.output
+                conv_res = self._enrich_document(conv_res)
+                conv_res.status = self._determine_status(conv_res)
         except Exception as e:
             conv_res.status = ConversionStatus.FAILURE
             if raises_on_error:
@@ -50,19 +54,13 @@ class BasePipeline(ABC):
         return conv_res
     @abstractmethod
-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
         pass
-    def _assemble_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
         return conv_res
-    def _enrich_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:
         def _filter_elements(
             doc: DoclingDocument, model: BaseEnrichmentModel
@@ -71,24 +69,23 @@ class BasePipeline(ABC):
                 if model.is_processable(doc=doc, element=element):
                     yield element
-        for model in self.enrichment_pipe:
-            for element_batch in chunkify(
-                _filter_elements(conv_res.document, model),
-                settings.perf.elements_batch_size,
-            ):
-                # TODO: currently we assume the element itself is modified, because
-                # we don't have an interface to save the element back to the document
-                for element in model(
-                    doc=conv_res.document, element_batch=element_batch
-                ):  # Must exhaust!
-                    pass
+        with TimeRecorder(conv_res, "doc_enrich", scope=ProfilingScope.DOCUMENT):
+            for model in self.enrichment_pipe:
+                for element_batch in chunkify(
+                    _filter_elements(conv_res.document, model),
+                    settings.perf.elements_batch_size,
+                ):
+                    # TODO: currently we assume the element itself is modified, because
+                    # we don't have an interface to save the element back to the document
+                    for element in model(
+                        doc=conv_res.document, element_batch=element_batch
+                    ):  # Must exhaust!
+                        pass
         return conv_res
     @abstractmethod
-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
         pass
     @classmethod
@@ -110,66 +107,68 @@ class BasePipeline(ABC):
 class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
-    def _apply_on_pages(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def _apply_on_pages(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
         for model in self.build_pipe:
-            page_batch = model(page_batch)
+            page_batch = model(conv_res, page_batch)
         yield from page_batch
-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
-        if not isinstance(in_doc._backend, PdfDocumentBackend):
+        if not isinstance(conv_res.input._backend, PdfDocumentBackend):
             raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a PDF backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
                 f"Can not convert this with a PDF pipeline. "
                 f"Please check your format configuration on DocumentConverter."
             )
             # conv_res.status = ConversionStatus.FAILURE
             # return conv_res
-        for i in range(0, in_doc.page_count):
-            conv_res.pages.append(Page(page_no=i))
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
-        try:
-            # Iterate batches of pages (page_batch_size) in the doc
-            for page_batch in chunkify(conv_res.pages, settings.perf.page_batch_size):
-                start_pb_time = time.time()
+            for i in range(0, conv_res.input.page_count):
+                conv_res.pages.append(Page(page_no=i))
-                # 1. Initialise the page resources
-                init_pages = map(
-                    functools.partial(self.initialize_page, in_doc), page_batch
-                )
+            try:
+                # Iterate batches of pages (page_batch_size) in the doc
+                for page_batch in chunkify(
+                    conv_res.pages, settings.perf.page_batch_size
+                ):
+                    start_pb_time = time.time()
-                # 2. Run pipeline stages
-                pipeline_pages = self._apply_on_pages(init_pages)
+                    # 1. Initialise the page resources
+                    init_pages = map(
+                        functools.partial(self.initialize_page, conv_res), page_batch
+                    )
-                for p in pipeline_pages:  # Must exhaust!
-                    pass
+                    # 2. Run pipeline stages
+                    pipeline_pages = self._apply_on_pages(conv_res, init_pages)
-                end_pb_time = time.time() - start_pb_time
-                _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
+                    for p in pipeline_pages:  # Must exhaust!
+                        pass
-        except Exception as e:
-            conv_res.status = ConversionStatus.FAILURE
-            trace = "\n".join(traceback.format_exception(e))
-            _log.warning(
-                f"Encountered an error during conversion of document {in_doc.document_hash}:\n"
-                f"{trace}"
-            )
-            raise e
+                    end_pb_time = time.time() - start_pb_time
+                    _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
+            except Exception as e:
+                conv_res.status = ConversionStatus.FAILURE
+                trace = "\n".join(traceback.format_exception(e))
+                _log.warning(
+                    f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
+                    f"{trace}"
+                )
+                raise e
-        finally:
-            # Always unload the PDF backend, even in case of failure
-            if in_doc._backend:
-                in_doc._backend.unload()
+            finally:
+                # Always unload the PDF backend, even in case of failure
+                if conv_res.input._backend:
+                    conv_res.input._backend.unload()
         return conv_res
-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
         status = ConversionStatus.SUCCESS
         for page in conv_res.pages:
             if page._backend is None or not page._backend.is_valid():
@@ -186,5 +185,5 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
     # Initialise and load resources for a page
     @abstractmethod
-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
         pass

docling/pipeline/simple_pipeline.py CHANGED Viewed

@@ -5,9 +5,10 @@ from docling.backend.abstract_backend import (
     DeclarativeDocumentBackend,
 )
 from docling.datamodel.base_models import ConversionStatus
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.pipeline.base_pipeline import BasePipeline
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 _log = logging.getLogger(__name__)
@@ -22,13 +23,11 @@ class SimplePipeline(BasePipeline):
     def __init__(self, pipeline_options: PipelineOptions):
         super().__init__(pipeline_options)
-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
-        if not isinstance(in_doc._backend, DeclarativeDocumentBackend):
+        if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
             raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a declarative backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
                 f"Can not convert this with simple pipeline. "
                 f"Please check your format configuration on DocumentConverter."
             )
@@ -38,13 +37,11 @@ class SimplePipeline(BasePipeline):
         # Instead of running a page-level pipeline to build up the document structure,
         # the backend is expected to be of type DeclarativeDocumentBackend, which can output
         # a DoclingDocument straight.
-        conv_res.document = in_doc._backend.convert()
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
+            conv_res.document = conv_res.input._backend.convert()
         return conv_res
-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
         # This is called only if the previous steps didn't raise.
         # Since we don't have anything else to evaluate, we can
         # safely return SUCCESS.

docling/pipeline/standard_pdf_pipeline.py CHANGED Viewed

@@ -7,7 +7,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import AssembledUnit, Page
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
     PdfPipelineOptions,
@@ -27,6 +27,7 @@ from docling.models.table_structure_model import TableStructureModel
 from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
 from docling.models.tesseract_ocr_model import TesseractOcrModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 _log = logging.getLogger(__name__)
@@ -119,73 +120,75 @@ class StandardPdfPipeline(PaginatedPipeline):
             )
         return None
-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
-        page._backend = doc._backend.load_page(page.page_no)  # type: ignore
-        if page._backend is not None and page._backend.is_valid():
-            page.size = page._backend.get_size()
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
+        with TimeRecorder(conv_res, "page_init"):
+            page._backend = conv_res.input._backend.load_page(page.page_no)  # type: ignore
+            if page._backend is not None and page._backend.is_valid():
+                page.size = page._backend.get_size()
         return page
-    def _assemble_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
         all_elements = []
         all_headers = []
         all_body = []
-        for p in conv_res.pages:
-            if p.assembled is not None:
-                for el in p.assembled.body:
-                    all_body.append(el)
-                for el in p.assembled.headers:
-                    all_headers.append(el)
-                for el in p.assembled.elements:
-                    all_elements.append(el)
-        conv_res.assembled = AssembledUnit(
-            elements=all_elements, headers=all_headers, body=all_body
-        )
-        conv_res.document = self.glm_model(conv_res)
+        with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
+            for p in conv_res.pages:
+                if p.assembled is not None:
+                    for el in p.assembled.body:
+                        all_body.append(el)
+                    for el in p.assembled.headers:
+                        all_headers.append(el)
+                    for el in p.assembled.elements:
+                        all_elements.append(el)
+            conv_res.assembled = AssembledUnit(
+                elements=all_elements, headers=all_headers, body=all_body
+            )
-        # Generate page images in the output
-        if self.pipeline_options.generate_page_images:
-            for page in conv_res.pages:
-                assert page.image is not None
-                page_no = page.page_no + 1
-                conv_res.document.pages[page_no].image = ImageRef.from_pil(
-                    page.image, dpi=int(72 * self.pipeline_options.images_scale)
-                )
+            conv_res.document = self.glm_model(conv_res)
-        # Generate images of the requested element types
-        if (
-            self.pipeline_options.generate_picture_images
-            or self.pipeline_options.generate_table_images
-        ):
-            scale = self.pipeline_options.images_scale
-            for element, _level in conv_res.document.iterate_items():
-                if not isinstance(element, DocItem) or len(element.prov) == 0:
-                    continue
-                if (
-                    isinstance(element, PictureItem)
-                    and self.pipeline_options.generate_picture_images
-                ) or (
-                    isinstance(element, TableItem)
-                    and self.pipeline_options.generate_table_images
-                ):
-                    page_ix = element.prov[0].page_no - 1
-                    page = conv_res.pages[page_ix]
-                    assert page.size is not None
+            # Generate page images in the output
+            if self.pipeline_options.generate_page_images:
+                for page in conv_res.pages:
                     assert page.image is not None
-                    crop_bbox = (
-                        element.prov[0]
-                        .bbox.scaled(scale=scale)
-                        .to_top_left_origin(page_height=page.size.height * scale)
+                    page_no = page.page_no + 1
+                    conv_res.document.pages[page_no].image = ImageRef.from_pil(
+                        page.image, dpi=int(72 * self.pipeline_options.images_scale)
                     )
-                    cropped_im = page.image.crop(crop_bbox.as_tuple())
-                    element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
+            # Generate images of the requested element types
+            if (
+                self.pipeline_options.generate_picture_images
+                or self.pipeline_options.generate_table_images
+            ):
+                scale = self.pipeline_options.images_scale
+                for element, _level in conv_res.document.iterate_items():
+                    if not isinstance(element, DocItem) or len(element.prov) == 0:
+                        continue
+                    if (
+                        isinstance(element, PictureItem)
+                        and self.pipeline_options.generate_picture_images
+                    ) or (
+                        isinstance(element, TableItem)
+                        and self.pipeline_options.generate_table_images
+                    ):
+                        page_ix = element.prov[0].page_no - 1
+                        page = conv_res.pages[page_ix]
+                        assert page.size is not None
+                        assert page.image is not None
+                        crop_bbox = (
+                            element.prov[0]
+                            .bbox.scaled(scale=scale)
+                            .to_top_left_origin(page_height=page.size.height * scale)
+                        )
+                        cropped_im = page.image.crop(crop_bbox.as_tuple())
+                        element.image = ImageRef.from_pil(
+                            cropped_im, dpi=int(72 * scale)
+                        )
         return conv_res

docling/utils/profiling.py ADDED Viewed

@@ -0,0 +1,62 @@
+import time
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING, List
+import numpy as np
+from pydantic import BaseModel
+from docling.datamodel.settings import settings
+if TYPE_CHECKING:
+    from docling.datamodel.document import ConversionResult
+class ProfilingScope(str, Enum):
+    PAGE = "page"
+    DOCUMENT = "document"
+class ProfilingItem(BaseModel):
+    scope: ProfilingScope
+    count: int = 0
+    times: List[float] = []
+    start_timestamps: List[datetime] = []
+    def avg(self) -> float:
+        return np.average(self.times)  # type: ignore
+    def std(self) -> float:
+        return np.std(self.times)  # type: ignore
+    def mean(self) -> float:
+        return np.mean(self.times)  # type: ignore
+    def percentile(self, perc: float) -> float:
+        return np.percentile(self.times, perc)  # type: ignore
+class TimeRecorder:
+    def __init__(
+        self,
+        conv_res: "ConversionResult",
+        key: str,
+        scope: ProfilingScope = ProfilingScope.PAGE,
+    ):
+        if settings.debug.profile_pipeline_timings:
+            if key not in conv_res.timings.keys():
+                conv_res.timings[key] = ProfilingItem(scope=scope)
+            self.conv_res = conv_res
+            self.key = key
+    def __enter__(self):
+        if settings.debug.profile_pipeline_timings:
+            self.start = time.monotonic()
+            self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
+        return self
+    def __exit__(self, *args):
+        if settings.debug.profile_pipeline_timings:
+            elapsed = time.monotonic() - self.start
+            self.conv_res.timings[self.key].times.append(elapsed)
+            self.conv_res.timings[self.key].count += 1

{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.2.0
+Version: 2.3.0
 Summary: Docling PDF conversion package
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -23,7 +23,7 @@ Provides-Extra: tesserocr
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=0.26.1,<0.27.0)
-Requires-Dist: docling-core (>=2.1.0,<3.0.0)
+Requires-Dist: docling-core (>=2.2.3,<3.0.0)
 Requires-Dist: docling-ibm-models (>=2.0.1,<3.0.0)
 Requires-Dist: docling-parse (>=2.0.0,<3.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
@@ -73,8 +73,9 @@ Docling parses documents and exports them to the desired format with ease and sp
 ## Features
-* 🗂️ Multi-format support for input (PDF, DOCX etc.) & output (Markdown, JSON etc.)
-* 📑 Advanced PDF document understanding incl. page layout, reading order & table structures
+* 🗂️ Reads popular document formats (PDF, DOCX, PPTX, Images, HTML, AsciiDoc, Markdown) and exports to Markdown and JSON
+* 📑 Advanced PDF document understanding including page layout, reading order & table structures
+* 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
 * 📝 Metadata extraction, including title, authors, references & language
 * 🤖 Seamless LlamaIndex 🦙 & LangChain 🦜🔗 integration for powerful RAG / QA applications
 * 🔍 OCR support for scanned PDFs

docling-2.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,45 @@
+docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq_G2RdU,1678
+docling/backend/asciidoc_backend.py,sha256=kXZxOLk_LvLFVZwnJVVwjmvc3QWZ0iiG7VnwjgtC3hI,14051
+docling/backend/docling_parse_backend.py,sha256=TaIMli9vePd3fz9L6S4t75JPYZDpgYBLRGfWjbc9Hbk,7632
+docling/backend/docling_parse_v2_backend.py,sha256=QlVU8NgqKvVCa99E8oDa2Xvy__kq30C-myGY3o9Qoq4,8588
+docling/backend/html_backend.py,sha256=p3WlYta1f3e4osmvVR12KIUYLJimveTX8UwEkyPt7_g,15161
+docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
+docling/backend/mspowerpoint_backend.py,sha256=J472AIH_IXvGg3D0FDmXhue1At_VSBD6n15c64Kxttw,15446
+docling/backend/msword_backend.py,sha256=FAUdP74QxGKo2xMZQ4WQGYwtpIBCTJ_FG17PBpRwhxI,17230
+docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
+docling/backend/pypdfium2_backend.py,sha256=MJX6fQqwK3r967fyAAs-RA_YIkeQvhgsLkQAgaBTgaE,8995
+docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/cli/main.py,sha256=NRVGz0z-3EBwYNMJGVnLtDBcfOeutaUyYdkM0ymRnGA,8008
+docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/datamodel/base_models.py,sha256=fmkS6iTxGZCTtNCo2zsgMmBC11Ogf2Ht-mNIlZ9GP-o,5375
+docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
+docling/datamodel/pipeline_options.py,sha256=WNjluKC-Ww63ifkGMHwws8zIDHnOS1z5Hw7_j3S0qao,2446
+docling/datamodel/settings.py,sha256=2-sYEnKLV_giGygUlBtiBd4CJYN5T9-3BdL6NpWkUYw,1155
+docling/document_converter.py,sha256=Y0Tngh-seNSty7Ov71DDAJzbBgruoEdwYPunVn7DT00,10413
+docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
+docling/models/base_ocr_model.py,sha256=Ti0glL-_DVRfmP3MpywYVmkNf5RP6qhRg_UKzJuV1Dc,5663
+docling/models/ds_glm_model.py,sha256=2OpWW8MMzCIshrtP36gDSRPYOCjv1ex34FqxD2nYjP4,11986
+docling/models/easyocr_model.py,sha256=23hWq484qVS3nkch6nRRWowfQamN-McFZgfbHfp5Vuo,3818
+docling/models/layout_model.py,sha256=ZvbTSyxvXB5yLHNEti0Wv3trz0vwGuHySI5TCdApb0U,14011
+docling/models/page_assemble_model.py,sha256=kSGNiRKhmzkpFH7xCiT3rulMsgJmUXFa6Th_eB-cLEk,7103
+docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
+docling/models/table_structure_model.py,sha256=-ANSQpiN2avt3B9sbi7dHcoULUJbMBalAR5xxlrM7To,8421
+docling/models/tesseract_ocr_cli_model.py,sha256=ZflwQcD7YjhPqEB8bbgNgP14OBD4NNEJefUS8Lbr5X0,6511
+docling/models/tesseract_ocr_model.py,sha256=AccCgaYNzGryiJnkwR4sv2FeOdlSgO3uspdQOmo1sNY,5569
+docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/pipeline/base_pipeline.py,sha256=IF1XWYgUGbdB4-teLkmM4Hvg_UNEfPrGuhExMRTUsk8,7168
+docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
+docling/pipeline/standard_pdf_pipeline.py,sha256=h59eA0CLMYuuJoH-0SyCRkYEregNs6i0pa46Ioqf8kU,7947
+docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
+docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
+docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
+docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
+docling-2.3.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.3.0.dist-info/METADATA,sha256=e3LTQgbktuUHzQlI4qXDhIDMGOX0duC1EJWws6j6_y8,6373
+docling-2.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling-2.3.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
+docling-2.3.0.dist-info/RECORD,,

docling-2.2.0.dist-info/RECORD DELETED Viewed

@@ -1,44 +0,0 @@
-docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq_G2RdU,1678
-docling/backend/asciidoc_backend.py,sha256=WW0eIanPIObcg5ci9YcnqFxwipmqRFsRY8zjZDdKvJA,14116
-docling/backend/docling_parse_backend.py,sha256=TaIMli9vePd3fz9L6S4t75JPYZDpgYBLRGfWjbc9Hbk,7632
-docling/backend/docling_parse_v2_backend.py,sha256=QlVU8NgqKvVCa99E8oDa2Xvy__kq30C-myGY3o9Qoq4,8588
-docling/backend/html_backend.py,sha256=wfh5PWEwoqsCXxFCQbFBdJvEtlqZhXgqfPfTYETWHfE,14974
-docling/backend/md_backend.py,sha256=osYiNLnep9UgLq8mUH9bmwG3kP9RXxt69I8LlyeJN6g,11505
-docling/backend/mspowerpoint_backend.py,sha256=J472AIH_IXvGg3D0FDmXhue1At_VSBD6n15c64Kxttw,15446
-docling/backend/msword_backend.py,sha256=6bY0ebOaeSbpskUJY5t5pOf4a2VclWzeHeSo-vzsaO0,17470
-docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
-docling/backend/pypdfium2_backend.py,sha256=MJX6fQqwK3r967fyAAs-RA_YIkeQvhgsLkQAgaBTgaE,8995
-docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=NRVGz0z-3EBwYNMJGVnLtDBcfOeutaUyYdkM0ymRnGA,8008
-docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/datamodel/base_models.py,sha256=Mx0xR6YmRP8thu8CjOxjbGHLUJctqIvFwRZQ-8tQowY,5380
-docling/datamodel/document.py,sha256=mkPXDms9jtPFY1pfBSicNaVRZwbbfzYFUj0dJDbMgG8,20612
-docling/datamodel/pipeline_options.py,sha256=WNjluKC-Ww63ifkGMHwws8zIDHnOS1z5Hw7_j3S0qao,2446
-docling/datamodel/settings.py,sha256=KBFVeQviR1hoCFjA1ZwuLuQ6EAAYR7saIa6EUYiOkHI,767
-docling/document_converter.py,sha256=T-Y2pWwbCIofW209XJ3wlc5TiGeQqMbDqgzcVWyZ_0Y,10227
-docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/models/base_model.py,sha256=wSBGAIAbLqrqP_SMtkzXMuyFvvzjVU6iCqgSNnGIR4Y,603
-docling/models/base_ocr_model.py,sha256=SYelQRValiUo6M_p_9-J7CqNIOFO-EkK58j90SMsKQY,5028
-docling/models/ds_glm_model.py,sha256=vJLngchZonqFzGWbUr2izFSXk9DloPDhAfN2c3nkzNU,11254
-docling/models/easyocr_model.py,sha256=YfvdodjZ20WuOfouQXJmDyQL78QDOqWYsWSs2zSxWFc,3327
-docling/models/layout_model.py,sha256=zd2ULW3U6v9OJl4TnjWFEY6Q2O-lBfrIqtvrnDzF7HU,12596
-docling/models/page_assemble_model.py,sha256=LOKHho-r-RpeIVh8CpJ9tid_QIp5um3ukcrucZsyUlY,6645
-docling/models/page_preprocessing_model.py,sha256=cfhUIlGAGaX1RxILi69ZEV9Kmhhd3Y0XaSlQnGo18o4,1964
-docling/models/table_structure_model.py,sha256=YWSZKOz56gvicjTzVgSE-8Z_hI3NcRD5EN0yOUoM-_g,6979
-docling/models/tesseract_ocr_cli_model.py,sha256=fKc05V73ibMvAeuA4PForhYNtunpT5rR0k_xHZsew-E,5980
-docling/models/tesseract_ocr_model.py,sha256=v6td0vq8NogePuRTJRZhKF0DtZXITj70r9rKJKO5u9k,4984
-docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/pipeline/base_pipeline.py,sha256=7DTzVvM_jVHCxyY-BuuGRhmUsD_sgX4DD00oBFJWdB8,6723
-docling/pipeline/simple_pipeline.py,sha256=pxce0-3He5Lqa-xXT-7h173XVOSMZiMHl6HOfAJmQ7o,2162
-docling/pipeline/standard_pdf_pipeline.py,sha256=AVNSxGc6kPmBPDLWDc9eI8fryc25eOtiIVrOyVhZMZM,7527
-docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
-docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
-docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
-docling-2.2.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.2.0.dist-info/METADATA,sha256=TkaywA2l2ImdMc9WpUYWUQy3n50zG9Y9eC7ziElBlU0,6205
-docling-2.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling-2.2.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
-docling-2.2.0.dist-info/RECORD,,

{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.2.0.dist-info → docling-2.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

docling 2.2.0py3-none-any.whl → 2.3.0py3-none-any.whl