PyPI - docling - Versions diffs - 2.25.0__tar.gz → 2.25.2__tar.gz - Mend

docling 2.25.0tar.gz → 2.25.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

{docling-2.25.0 → docling-2.25.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.25.0
+Version: 2.25.2
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -185,7 +185,7 @@ For individual model usage, please refer to the model licenses found in the orig
 Docling has been brought to you by IBM.
-[supported_formats]: https://ds4sd.github.io/docling/supported_formats/
+[supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
 [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
 [integrations]: https://ds4sd.github.io/docling/integrations/

{docling-2.25.0 → docling-2.25.2}/README.md RENAMED Viewed

@@ -123,6 +123,6 @@ For individual model usage, please refer to the model licenses found in the orig
 Docling has been brought to you by IBM.
-[supported_formats]: https://ds4sd.github.io/docling/supported_formats/
+[supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
 [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
 [integrations]: https://ds4sd.github.io/docling/integrations/

{docling-2.25.0 → docling-2.25.2}/docling/backend/docling_parse_v2_backend.py RENAMED Viewed

@@ -12,6 +12,7 @@ from pypdfium2 import PdfPage
 from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
 from docling.datamodel.base_models import Cell, Size
+from docling.utils.locks import pypdfium2_lock
 if TYPE_CHECKING:
     from docling.datamodel.document import InputDocument
@@ -182,20 +183,24 @@ class DoclingParseV2PageBackend(PdfPageBackend):
             padbox.r = page_size.width - padbox.r
             padbox.t = page_size.height - padbox.t
-        image = (
-            self._ppage.render(
-                scale=scale * 1.5,
-                rotation=0,  # no additional rotation
-                crop=padbox.as_tuple(),
-            )
-            .to_pil()
-            .resize(size=(round(cropbox.width * scale), round(cropbox.height * scale)))
-        )  # We resize the image from 1.5x the given scale to make it sharper.
+        with pypdfium2_lock:
+            image = (
+                self._ppage.render(
+                    scale=scale * 1.5,
+                    rotation=0,  # no additional rotation
+                    crop=padbox.as_tuple(),
+                )
+                .to_pil()
+                .resize(
+                    size=(round(cropbox.width * scale), round(cropbox.height * scale))
+                )
+            )  # We resize the image from 1.5x the given scale to make it sharper.
         return image
     def get_size(self) -> Size:
-        return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
+        with pypdfium2_lock:
+            return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
     def unload(self):
         self._ppage = None
@@ -206,23 +211,24 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
     def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
         super().__init__(in_doc, path_or_stream)
-        self._pdoc = pdfium.PdfDocument(self.path_or_stream)
-        self.parser = pdf_parser_v2("fatal")
+        with pypdfium2_lock:
+            self._pdoc = pdfium.PdfDocument(self.path_or_stream)
+            self.parser = pdf_parser_v2("fatal")
-        success = False
-        if isinstance(self.path_or_stream, BytesIO):
-            success = self.parser.load_document_from_bytesio(
-                self.document_hash, self.path_or_stream
-            )
-        elif isinstance(self.path_or_stream, Path):
-            success = self.parser.load_document(
-                self.document_hash, str(self.path_or_stream)
-            )
+            success = False
+            if isinstance(self.path_or_stream, BytesIO):
+                success = self.parser.load_document_from_bytesio(
+                    self.document_hash, self.path_or_stream
+                )
+            elif isinstance(self.path_or_stream, Path):
+                success = self.parser.load_document(
+                    self.document_hash, str(self.path_or_stream)
+                )
-        if not success:
-            raise RuntimeError(
-                f"docling-parse v2 could not load document {self.document_hash}."
-            )
+            if not success:
+                raise RuntimeError(
+                    f"docling-parse v2 could not load document {self.document_hash}."
+                )
     def page_count(self) -> int:
         # return len(self._pdoc)  # To be replaced with docling-parse API
@@ -236,9 +242,10 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
         return len_2
     def load_page(self, page_no: int) -> DoclingParseV2PageBackend:
-        return DoclingParseV2PageBackend(
-            self.parser, self.document_hash, page_no, self._pdoc[page_no]
-        )
+        with pypdfium2_lock:
+            return DoclingParseV2PageBackend(
+                self.parser, self.document_hash, page_no, self._pdoc[page_no]
+            )
     def is_valid(self) -> bool:
         return self.page_count() > 0
@@ -246,5 +253,6 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
     def unload(self):
         super().unload()
         self.parser.unload_document(self.document_hash)
-        self._pdoc.close()
-        self._pdoc = None
+        with pypdfium2_lock:
+            self._pdoc.close()
+            self._pdoc = None

{docling-2.25.0 → docling-2.25.2}/docling/backend/html_backend.py RENAMED Viewed

@@ -15,6 +15,7 @@ from docling_core.types.doc import (
     TableCell,
     TableData,
 )
+from docling_core.types.doc.document import ContentLayer
 from typing_extensions import override
 from docling.backend.abstract_backend import DeclarativeDocumentBackend
@@ -66,7 +67,8 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     self.soup = BeautifulSoup(html_content, "html.parser")
         except Exception as e:
             raise RuntimeError(
-                f"Could not initialize HTML backend for file with hash {self.document_hash}."
+                "Could not initialize HTML backend for file with "
+                f"hash {self.document_hash}."
             ) from e
     @override
@@ -109,14 +111,21 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             # TODO: remove style to avoid losing text from tags like i, b, span, ...
             for br in content("br"):
                 br.replace_with(NavigableString("\n"))
+            headers = content.find(["h1", "h2", "h3", "h4", "h5", "h6"])
+            self.content_layer = (
+                ContentLayer.BODY if headers is None else ContentLayer.FURNITURE
+            )
             self.walk(content, doc)
         else:
             raise RuntimeError(
-                f"Cannot convert doc with {self.document_hash} because the backend failed to init."
+                f"Cannot convert doc with {self.document_hash} because the backend "
+                "failed to init."
             )
         return doc
     def walk(self, tag: Tag, doc: DoclingDocument) -> None:
         # Iterate over elements in the body of the document
         text: str = ""
         for element in tag.children:
@@ -143,8 +152,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     if text and tag.name in ["div"]:
                         doc.add_text(
                             parent=self.parents[self.level],
-                            label=DocItemLabel.PARAGRAPH,
+                            label=DocItemLabel.TEXT,
                             text=text,
+                            content_layer=self.content_layer,
                         )
                     text = ""
@@ -166,7 +176,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         elif tag.name == "figure":
             self.handle_figure(tag, doc)
         elif tag.name == "img":
-            self.handle_image(doc)
+            self.handle_image(tag, doc)
         else:
             self.walk(tag, doc)
@@ -197,12 +207,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         text = element.text.strip()
         if hlevel == 1:
+            self.content_layer = ContentLayer.BODY
             for key in self.parents.keys():
                 self.parents[key] = None
             self.level = 1
             self.parents[self.level] = doc.add_text(
-                parent=self.parents[0], label=DocItemLabel.TITLE, text=text
+                parent=self.parents[0],
+                label=DocItemLabel.TITLE,
+                text=text,
+                content_layer=self.content_layer,
             )
         else:
             if hlevel > self.level:
@@ -213,6 +228,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                         name=f"header-{i}",
                         label=GroupLabel.SECTION,
                         parent=self.parents[i - 1],
+                        content_layer=self.content_layer,
                     )
                 self.level = hlevel
@@ -228,6 +244,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                 parent=self.parents[hlevel - 1],
                 text=text,
                 level=hlevel,
+                content_layer=self.content_layer,
             )
     def handle_code(self, element: Tag, doc: DoclingDocument) -> None:
@@ -236,16 +253,24 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             return
         text = element.text.strip()
         if text:
-            doc.add_code(parent=self.parents[self.level], text=text)
+            doc.add_code(
+                parent=self.parents[self.level],
+                text=text,
+                content_layer=self.content_layer,
+            )
     def handle_paragraph(self, element: Tag, doc: DoclingDocument) -> None:
         """Handles paragraph tags (p)."""
         if element.text is None:
             return
         text = element.text.strip()
-        label = DocItemLabel.PARAGRAPH
         if text:
-            doc.add_text(parent=self.parents[self.level], label=label, text=text)
+            doc.add_text(
+                parent=self.parents[self.level],
+                label=DocItemLabel.TEXT,
+                text=text,
+                content_layer=self.content_layer,
+            )
     def handle_list(self, element: Tag, doc: DoclingDocument) -> None:
         """Handles list tags (ul, ol) and their list items."""
@@ -253,14 +278,24 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         if element.name == "ul":
             # create a list group
             self.parents[self.level + 1] = doc.add_group(
-                parent=self.parents[self.level], name="list", label=GroupLabel.LIST
+                parent=self.parents[self.level],
+                name="list",
+                label=GroupLabel.LIST,
+                content_layer=self.content_layer,
             )
         elif element.name == "ol":
+            start_attr = element.get("start")
+            start: int = (
+                int(start_attr)
+                if isinstance(start_attr, str) and start_attr.isnumeric()
+                else 1
+            )
             # create a list group
             self.parents[self.level + 1] = doc.add_group(
                 parent=self.parents[self.level],
-                name="ordered list",
+                name="ordered list" + (f" start {start}" if start != 1 else ""),
                 label=GroupLabel.ORDERED_LIST,
+                content_layer=self.content_layer,
             )
         self.level += 1
@@ -270,15 +305,23 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         self.level -= 1
     def handle_list_item(self, element: Tag, doc: DoclingDocument) -> None:
-        """Handles listitem tags (li)."""
+        """Handles list item tags (li)."""
         nested_list = element.find(["ul", "ol"])
         parent = self.parents[self.level]
         if parent is None:
-            _log.warning(f"list-item has no parent in DoclingDocument: {element}")
+            _log.debug(f"list-item has no parent in DoclingDocument: {element}")
             return
         parent_label: str = parent.label
         index_in_list = len(parent.children) + 1
+        if (
+            parent_label == GroupLabel.ORDERED_LIST
+            and isinstance(parent, GroupItem)
+            and parent.name
+        ):
+            start_in_list: str = parent.name.split(" ")[-1]
+            start: int = int(start_in_list) if start_in_list.isnumeric() else 1
+            index_in_list += start - 1
         if nested_list:
             # Text in list item can be hidden within hierarchy, hence
@@ -301,6 +344,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     enumerated=enumerated,
                     marker=marker,
                     parent=parent,
+                    content_layer=self.content_layer,
                 )
                 self.level += 1
@@ -322,15 +366,16 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                 enumerated=enumerated,
                 marker=marker,
                 parent=parent,
+                content_layer=self.content_layer,
             )
         else:
-            _log.warning(f"list-item has no text: {element}")
+            _log.debug(f"list-item has no text: {element}")
     @staticmethod
     def parse_table_data(element: Tag) -> Optional[TableData]:
         nested_tables = element.find("table")
         if nested_tables is not None:
-            _log.warning("Skipping nested table.")
+            _log.debug("Skipping nested table.")
             return None
         # Count the number of rows (number of <tr> elements)
@@ -425,7 +470,11 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         table_data = HTMLDocumentBackend.parse_table_data(element)
         if table_data is not None:
-            doc.add_table(data=table_data, parent=self.parents[self.level])
+            doc.add_table(
+                data=table_data,
+                parent=self.parents[self.level],
+                content_layer=self.content_layer,
+            )
     def get_list_text(self, list_element: Tag, level: int = 0) -> list[str]:
         """Recursively extract text from <ul> or <ol> with proper indentation."""
@@ -465,20 +514,33 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         contains_captions = element.find(["figcaption"])
         if not isinstance(contains_captions, Tag):
-            doc.add_picture(parent=self.parents[self.level], caption=None)
+            doc.add_picture(
+                parent=self.parents[self.level],
+                caption=None,
+                content_layer=self.content_layer,
+            )
         else:
             texts = []
             for item in contains_captions:
                 texts.append(item.text)
             fig_caption = doc.add_text(
-                label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
+                label=DocItemLabel.CAPTION,
+                text=("".join(texts)).strip(),
+                content_layer=self.content_layer,
             )
             doc.add_picture(
                 parent=self.parents[self.level],
                 caption=fig_caption,
+                content_layer=self.content_layer,
             )
-    def handle_image(self, doc: DoclingDocument) -> None:
+    def handle_image(self, element: Tag, doc: DoclingDocument) -> None:
         """Handles image tags (img)."""
-        doc.add_picture(parent=self.parents[self.level], caption=None)
+        _log.debug(f"ignoring <img> tags at the moment: {element}")
+        doc.add_picture(
+            parent=self.parents[self.level],
+            caption=None,
+            content_layer=self.content_layer,
+        )

{docling-2.25.0 → docling-2.25.2}/docling/backend/pypdfium2_backend.py RENAMED Viewed

@@ -13,6 +13,7 @@ from pypdfium2._helpers.misc import PdfiumError
 from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
 from docling.datamodel.base_models import Cell
+from docling.utils.locks import pypdfium2_lock
 if TYPE_CHECKING:
     from docling.datamodel.document import InputDocument
@@ -24,6 +25,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
     def __init__(
         self, pdfium_doc: pdfium.PdfDocument, document_hash: str, page_no: int
     ):
+        # Note: lock applied by the caller
         self.valid = True  # No better way to tell from pypdfium.
         try:
             self._ppage: pdfium.PdfPage = pdfium_doc[page_no]
@@ -40,51 +42,57 @@ class PyPdfiumPageBackend(PdfPageBackend):
     def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
         AREA_THRESHOLD = 0  # 32 * 32
-        for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
-            pos = obj.get_pos()
-            cropbox = BoundingBox.from_tuple(
-                pos, origin=CoordOrigin.BOTTOMLEFT
-            ).to_top_left_origin(page_height=self.get_size().height)
+        page_size = self.get_size()
+        with pypdfium2_lock:
+            for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
+                pos = obj.get_pos()
+                cropbox = BoundingBox.from_tuple(
+                    pos, origin=CoordOrigin.BOTTOMLEFT
+                ).to_top_left_origin(page_height=page_size.height)
-            if cropbox.area() > AREA_THRESHOLD:
-                cropbox = cropbox.scaled(scale=scale)
+                if cropbox.area() > AREA_THRESHOLD:
+                    cropbox = cropbox.scaled(scale=scale)
-                yield cropbox
+                    yield cropbox
     def get_text_in_rect(self, bbox: BoundingBox) -> str:
-        if not self.text_page:
-            self.text_page = self._ppage.get_textpage()
+        with pypdfium2_lock:
+            if not self.text_page:
+                self.text_page = self._ppage.get_textpage()
         if bbox.coord_origin != CoordOrigin.BOTTOMLEFT:
             bbox = bbox.to_bottom_left_origin(self.get_size().height)
-        text_piece = self.text_page.get_text_bounded(*bbox.as_tuple())
+        with pypdfium2_lock:
+            text_piece = self.text_page.get_text_bounded(*bbox.as_tuple())
         return text_piece
     def get_text_cells(self) -> Iterable[Cell]:
-        if not self.text_page:
-            self.text_page = self._ppage.get_textpage()
+        with pypdfium2_lock:
+            if not self.text_page:
+                self.text_page = self._ppage.get_textpage()
         cells = []
         cell_counter = 0
         page_size = self.get_size()
-        for i in range(self.text_page.count_rects()):
-            rect = self.text_page.get_rect(i)
-            text_piece = self.text_page.get_text_bounded(*rect)
-            x0, y0, x1, y1 = rect
-            cells.append(
-                Cell(
-                    id=cell_counter,
-                    text=text_piece,
-                    bbox=BoundingBox(
-                        l=x0, b=y0, r=x1, t=y1, coord_origin=CoordOrigin.BOTTOMLEFT
-                    ).to_top_left_origin(page_size.height),
+        with pypdfium2_lock:
+            for i in range(self.text_page.count_rects()):
+                rect = self.text_page.get_rect(i)
+                text_piece = self.text_page.get_text_bounded(*rect)
+                x0, y0, x1, y1 = rect
+                cells.append(
+                    Cell(
+                        id=cell_counter,
+                        text=text_piece,
+                        bbox=BoundingBox(
+                            l=x0, b=y0, r=x1, t=y1, coord_origin=CoordOrigin.BOTTOMLEFT
+                        ).to_top_left_origin(page_size.height),
+                    )
                 )
-            )
-            cell_counter += 1
+                cell_counter += 1
         # PyPdfium2 produces very fragmented cells, with sub-word level boundaries, in many PDFs.
         # The cell merging code below is to clean this up.
@@ -214,20 +222,24 @@ class PyPdfiumPageBackend(PdfPageBackend):
             padbox.r = page_size.width - padbox.r
             padbox.t = page_size.height - padbox.t
-        image = (
-            self._ppage.render(
-                scale=scale * 1.5,
-                rotation=0,  # no additional rotation
-                crop=padbox.as_tuple(),
-            )
-            .to_pil()
-            .resize(size=(round(cropbox.width * scale), round(cropbox.height * scale)))
-        )  # We resize the image from 1.5x the given scale to make it sharper.
+        with pypdfium2_lock:
+            image = (
+                self._ppage.render(
+                    scale=scale * 1.5,
+                    rotation=0,  # no additional rotation
+                    crop=padbox.as_tuple(),
+                )
+                .to_pil()
+                .resize(
+                    size=(round(cropbox.width * scale), round(cropbox.height * scale))
+                )
+            )  # We resize the image from 1.5x the given scale to make it sharper.
         return image
     def get_size(self) -> Size:
-        return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
+        with pypdfium2_lock:
+            return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
     def unload(self):
         self._ppage = None
@@ -239,22 +251,26 @@ class PyPdfiumDocumentBackend(PdfDocumentBackend):
         super().__init__(in_doc, path_or_stream)
         try:
-            self._pdoc = pdfium.PdfDocument(self.path_or_stream)
+            with pypdfium2_lock:
+                self._pdoc = pdfium.PdfDocument(self.path_or_stream)
         except PdfiumError as e:
             raise RuntimeError(
                 f"pypdfium could not load document with hash {self.document_hash}"
             ) from e
     def page_count(self) -> int:
-        return len(self._pdoc)
+        with pypdfium2_lock:
+            return len(self._pdoc)
     def load_page(self, page_no: int) -> PyPdfiumPageBackend:
-        return PyPdfiumPageBackend(self._pdoc, self.document_hash, page_no)
+        with pypdfium2_lock:
+            return PyPdfiumPageBackend(self._pdoc, self.document_hash, page_no)
     def is_valid(self) -> bool:
         return self.page_count() > 0
     def unload(self):
         super().unload()
-        self._pdoc.close()
-        self._pdoc = None
+        with pypdfium2_lock:
+            self._pdoc.close()
+            self._pdoc = None

{docling-2.25.0 → docling-2.25.2}/docling/utils/layout_postprocessor.py RENAMED Viewed

@@ -203,6 +203,7 @@ class LayoutPostprocessor:
         """Initialize processor with cells and spatial indices."""
         self.cells = cells
         self.page_size = page_size
+        self.all_clusters = clusters
         self.regular_clusters = [
             c for c in clusters if c.label not in self.SPECIAL_TYPES
         ]
@@ -267,7 +268,7 @@ class LayoutPostprocessor:
         # Handle orphaned cells
         unassigned = self._find_unassigned_cells(clusters)
         if unassigned:
-            next_id = max((c.id for c in clusters), default=0) + 1
+            next_id = max((c.id for c in self.all_clusters), default=0) + 1
             orphan_clusters = []
             for i, cell in enumerate(unassigned):
                 conf = 1.0

docling-2.25.2/docling/utils/locks.py ADDED Viewed

@@ -0,0 +1,3 @@
+import threading
+pypdfium2_lock = threading.Lock()

{docling-2.25.0 → docling-2.25.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "2.25.0"  # DO NOT EDIT, updated automatically
+version = "2.25.2"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Panos Vagenas <pva@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"

{docling-2.25.0 → docling-2.25.2}/LICENSE RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/abstract_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/asciidoc_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/csv_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/docling_parse_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/json/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/json/docling_json_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/md_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/msexcel_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/mspowerpoint_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/msword_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/pdf_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/xml/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/xml/jats_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/backend/xml/uspto_backend.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/chunking/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/cli/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/cli/main.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/cli/models.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/cli/tools.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/datamodel/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/datamodel/base_models.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/datamodel/document.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/datamodel/pipeline_options.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/datamodel/settings.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/document_converter.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/exceptions.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/base_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/base_ocr_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/code_formula_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/document_picture_classifier.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/easyocr_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/hf_vlm_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/layout_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/ocr_mac_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/page_assemble_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/page_preprocessing_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/picture_description_api_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/picture_description_base_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/picture_description_vlm_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/rapid_ocr_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/readingorder_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/table_structure_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/tesseract_ocr_cli_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/models/tesseract_ocr_model.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/pipeline/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/pipeline/base_pipeline.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/pipeline/simple_pipeline.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/pipeline/standard_pdf_pipeline.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/pipeline/vlm_pipeline.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/py.typed RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/__init__.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/accelerator_utils.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/export.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/glm_utils.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/model_downloader.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/ocr_utils.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/profiling.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/utils.py RENAMED Viewed

File without changes

{docling-2.25.0 → docling-2.25.2}/docling/utils/visualization.py RENAMED Viewed

File without changes

docling 2.25.0__tar.gz → 2.25.2__tar.gz

docling 2.25.0tar.gz → 2.25.2tar.gz