PyPI - docling - Versions diffs - 1.5.0__py3-none-any.whl → 1.6.1__py3-none-any.whl - Mend

docling 1.5.0py3-none-any.whl → 1.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

docling/backend/abstract_backend.py +4 -0
docling/backend/docling_parse_backend.py +23 -3
docling/backend/pypdfium2_backend.py +15 -1
docling/datamodel/base_models.py +15 -7
docling/document_converter.py +0 -2
docling/models/base_ocr_model.py +124 -0
docling/models/easyocr_model.py +39 -46
docling/models/table_structure_model.py +0 -1
docling/pipeline/base_model_pipeline.py +0 -1
docling/pipeline/standard_model_pipeline.py +1 -3
{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/METADATA +5 -4
docling-1.6.1.dist-info/RECORD +27 -0
docling-1.5.0.dist-info/RECORD +0 -26
{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/LICENSE +0 -0
{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/WHEEL +0 -0

docling/backend/abstract_backend.py CHANGED Viewed

@@ -18,6 +18,10 @@ class PdfPageBackend(ABC):
     def get_text_cells(self) -> Iterable["Cell"]:
         pass
+    @abstractmethod
+    def get_bitmap_rects(self, scale: int = 1) -> Iterable["BoundingBox"]:
+        pass
     @abstractmethod
     def get_page_image(
         self, scale: int = 1, cropbox: Optional["BoundingBox"] = None

docling/backend/docling_parse_backend.py CHANGED Viewed

@@ -3,7 +3,7 @@ import random
 import time
 from io import BytesIO
 from pathlib import Path
-from typing import Iterable, List, Optional, Union
+from typing import Iterable, Optional, Union
 import pypdfium2 as pdfium
 from docling_parse.docling_parse import pdf_parser
@@ -43,7 +43,7 @@ class DoclingParsePageBackend(PdfPageBackend):
                 r=x1 * scale * page_size.width / parser_width,
                 t=y1 * scale * page_size.height / parser_height,
                 coord_origin=CoordOrigin.BOTTOMLEFT,
-            ).to_top_left_origin(page_size.height * scale)
+            ).to_top_left_origin(page_height=page_size.height * scale)
             overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
@@ -66,6 +66,12 @@ class DoclingParsePageBackend(PdfPageBackend):
         for i in range(len(self._dpage["cells"])):
             rect = self._dpage["cells"][i]["box"]["device"]
             x0, y0, x1, y1 = rect
+            if x1 < x0:
+                x0, x1 = x1, x0
+            if y1 < y0:
+                y0, y1 = y1, y0
             text_piece = self._dpage["cells"][i]["content"]["rnormalized"]
             cells.append(
                 Cell(
@@ -108,6 +114,20 @@ class DoclingParsePageBackend(PdfPageBackend):
         return cells
+    def get_bitmap_rects(self, scale: int = 1) -> Iterable[BoundingBox]:
+        AREA_THRESHOLD = 32 * 32
+        for i in range(len(self._dpage["images"])):
+            bitmap = self._dpage["images"][i]
+            cropbox = BoundingBox.from_tuple(
+                bitmap["box"], origin=CoordOrigin.BOTTOMLEFT
+            ).to_top_left_origin(self.get_size().height)
+            if cropbox.area() > AREA_THRESHOLD:
+                cropbox = cropbox.scaled(scale=scale)
+                yield cropbox
     def get_page_image(
         self, scale: int = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
@@ -173,7 +193,7 @@ class DoclingParseDocumentBackend(PdfDocumentBackend):
     def page_count(self) -> int:
         return len(self._parser_doc["pages"])
-    def load_page(self, page_no: int) -> PdfPage:
+    def load_page(self, page_no: int) -> DoclingParsePageBackend:
         return DoclingParsePageBackend(
             self._pdoc[page_no], self._parser_doc["pages"][page_no]
         )

docling/backend/pypdfium2_backend.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Iterable, List, Optional, Union
 import pypdfium2 as pdfium
+import pypdfium2.raw as pdfium_c
 from PIL import Image, ImageDraw
 from pypdfium2 import PdfPage
@@ -17,6 +18,19 @@ class PyPdfiumPageBackend(PdfPageBackend):
         self._ppage = page_obj
         self.text_page = None
+    def get_bitmap_rects(self, scale: int = 1) -> Iterable[BoundingBox]:
+        AREA_THRESHOLD = 32 * 32
+        for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
+            pos = obj.get_pos()
+            cropbox = BoundingBox.from_tuple(
+                pos, origin=CoordOrigin.BOTTOMLEFT
+            ).to_top_left_origin(page_height=self.get_size().height)
+            if cropbox.area() > AREA_THRESHOLD:
+                cropbox = cropbox.scaled(scale=scale)
+                yield cropbox
     def get_text_in_rect(self, bbox: BoundingBox) -> str:
         if not self.text_page:
             self.text_page = self._ppage.get_textpage()
@@ -208,7 +222,7 @@ class PyPdfiumDocumentBackend(PdfDocumentBackend):
     def page_count(self) -> int:
         return len(self._pdoc)
-    def load_page(self, page_no: int) -> PdfPage:
+    def load_page(self, page_no: int) -> PyPdfiumPageBackend:
         return PyPdfiumPageBackend(self._pdoc[page_no])
     def is_valid(self) -> bool:

docling/datamodel/base_models.py CHANGED Viewed

@@ -68,13 +68,21 @@ class BoundingBox(BaseModel):
     @classmethod
     def from_tuple(cls, coord: Tuple[float], origin: CoordOrigin):
         if origin == CoordOrigin.TOPLEFT:
-            return BoundingBox(
-                l=coord[0], t=coord[1], r=coord[2], b=coord[3], coord_origin=origin
-            )
+            l, t, r, b = coord[0], coord[1], coord[2], coord[3]
+            if r < l:
+                l, r = r, l
+            if b < t:
+                b, t = t, b
+            return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
         elif origin == CoordOrigin.BOTTOMLEFT:
-            return BoundingBox(
-                l=coord[0], b=coord[1], r=coord[2], t=coord[3], coord_origin=origin
-            )
+            l, b, r, t = coord[0], coord[1], coord[2], coord[3]
+            if r < l:
+                l, r = r, l
+            if b > t:
+                b, t = t, b
+            return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
     def area(self) -> float:
         return (self.r - self.l) * (self.b - self.t)
@@ -280,7 +288,7 @@ class TableStructureOptions(BaseModel):
 class PipelineOptions(BaseModel):
     do_table_structure: bool = True  # True: perform table structure extraction
-    do_ocr: bool = False  # True: perform OCR, replace programmatic PDF text
+    do_ocr: bool = True  # True: perform OCR, replace programmatic PDF text
     table_structure_options: TableStructureOptions = TableStructureOptions()

docling/document_converter.py CHANGED Viewed

@@ -35,8 +35,6 @@ _log = logging.getLogger(__name__)
 class DocumentConverter:
-    _layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
-    _table_model_path = "model_artifacts/tableformer"
     _default_download_filename = "file.pdf"
     def __init__(

docling/models/base_ocr_model.py ADDED Viewed

@@ -0,0 +1,124 @@
+import copy
+import logging
+from abc import abstractmethod
+from typing import Iterable, List, Tuple
+import numpy
+import numpy as np
+from PIL import Image, ImageDraw
+from rtree import index
+from scipy.ndimage import find_objects, label
+from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
+_log = logging.getLogger(__name__)
+class BaseOcrModel:
+    def __init__(self, config):
+        self.config = config
+        self.enabled = config["enabled"]
+    # Computes the optimum amount and coordinates of rectangles to OCR on a given page
+    def get_ocr_rects(self, page: Page) -> Tuple[bool, List[BoundingBox]]:
+        BITMAP_COVERAGE_TRESHOLD = 0.75
+        def find_ocr_rects(size, bitmap_rects):
+            image = Image.new(
+                "1", (round(size.width), round(size.height))
+            )  # '1' mode is binary
+            # Draw all bitmap rects into a binary image
+            draw = ImageDraw.Draw(image)
+            for rect in bitmap_rects:
+                x0, y0, x1, y1 = rect.as_tuple()
+                x0, y0, x1, y1 = round(x0), round(y0), round(x1), round(y1)
+                draw.rectangle([(x0, y0), (x1, y1)], fill=1)
+            np_image = np.array(image)
+            # Find the connected components
+            labeled_image, num_features = label(
+                np_image > 0
+            )  # Label black (0 value) regions
+            # Find enclosing bounding boxes for each connected component.
+            slices = find_objects(labeled_image)
+            bounding_boxes = [
+                BoundingBox(
+                    l=slc[1].start,
+                    t=slc[0].start,
+                    r=slc[1].stop - 1,
+                    b=slc[0].stop - 1,
+                    coord_origin=CoordOrigin.TOPLEFT,
+                )
+                for slc in slices
+            ]
+            # Compute area fraction on page covered by bitmaps
+            area_frac = np.sum(np_image > 0) / (size.width * size.height)
+            return (area_frac, bounding_boxes)  # fraction covered  # boxes
+        bitmap_rects = page._backend.get_bitmap_rects()
+        coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
+        # return full-page rectangle if sufficiently covered with bitmaps
+        if coverage > BITMAP_COVERAGE_TRESHOLD:
+            return [
+                BoundingBox(
+                    l=0,
+                    t=0,
+                    r=page.size.width,
+                    b=page.size.height,
+                    coord_origin=CoordOrigin.TOPLEFT,
+                )
+            ]
+        # return individual rectangles if the bitmap coverage is smaller
+        elif coverage < BITMAP_COVERAGE_TRESHOLD:
+            return ocr_rects
+    # Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell.
+    def filter_ocr_cells(self, ocr_cells, programmatic_cells):
+        # Create R-tree index for programmatic cells
+        p = index.Property()
+        p.dimension = 2
+        idx = index.Index(properties=p)
+        for i, cell in enumerate(programmatic_cells):
+            idx.insert(i, cell.bbox.as_tuple())
+        def is_overlapping_with_existing_cells(ocr_cell):
+            # Query the R-tree to get overlapping rectangles
+            possible_matches_index = list(idx.intersection(ocr_cell.bbox.as_tuple()))
+            return (
+                len(possible_matches_index) > 0
+            )  # this is a weak criterion but it works.
+        filtered_ocr_cells = [
+            rect for rect in ocr_cells if not is_overlapping_with_existing_cells(rect)
+        ]
+        return filtered_ocr_cells
+    def draw_ocr_rects_and_cells(self, page, ocr_rects):
+        image = copy.deepcopy(page.image)
+        draw = ImageDraw.Draw(image, "RGBA")
+        # Draw OCR rectangles as yellow filled rect
+        for rect in ocr_rects:
+            x0, y0, x1, y1 = rect.as_tuple()
+            shade_color = (255, 255, 0, 40)  # transparent yellow
+            draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
+        # Draw OCR and programmatic cells
+        for tc in page.cells:
+            x0, y0, x1, y1 = tc.bbox.as_tuple()
+            color = "red"
+            if isinstance(tc, OcrCell):
+                color = "magenta"
+            draw.rectangle([(x0, y0), (x1, y1)], outline=color)
+        image.show()
+    @abstractmethod
+    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+        pass

docling/models/easyocr_model.py CHANGED Viewed

@@ -1,20 +1,18 @@
-import copy
 import logging
-import random
 from typing import Iterable
 import numpy
-from PIL import ImageDraw
 from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
+from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
-class EasyOcrModel:
+class EasyOcrModel(BaseOcrModel):
     def __init__(self, config):
-        self.config = config
-        self.enabled = config["enabled"]
+        super().__init__(config)
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
         if self.enabled:
@@ -29,49 +27,44 @@ class EasyOcrModel:
             return
         for page in page_batch:
-            # rects = page._fpage.
-            high_res_image = page.get_image(scale=self.scale)
-            im = numpy.array(high_res_image)
-            result = self.reader.readtext(im)
-            del high_res_image
-            del im
-            cells = [
-                OcrCell(
-                    id=ix,
-                    text=line[1],
-                    confidence=line[2],
-                    bbox=BoundingBox.from_tuple(
-                        coord=(
-                            line[0][0][0] / self.scale,
-                            line[0][0][1] / self.scale,
-                            line[0][2][0] / self.scale,
-                            line[0][2][1] / self.scale,
-                        ),
-                        origin=CoordOrigin.TOPLEFT,
-                    ),
+            ocr_rects = self.get_ocr_rects(page)
+            all_ocr_cells = []
+            for ocr_rect in ocr_rects:
+                high_res_image = page._backend.get_page_image(
+                    scale=self.scale, cropbox=ocr_rect
                 )
-                for ix, line in enumerate(result)
-            ]
+                im = numpy.array(high_res_image)
+                result = self.reader.readtext(im)
+                del high_res_image
+                del im
+                cells = [
+                    OcrCell(
+                        id=ix,
+                        text=line[1],
+                        confidence=line[2],
+                        bbox=BoundingBox.from_tuple(
+                            coord=(
+                                (line[0][0][0] / self.scale) + ocr_rect.l,
+                                (line[0][0][1] / self.scale) + ocr_rect.t,
+                                (line[0][2][0] / self.scale) + ocr_rect.l,
+                                (line[0][2][1] / self.scale) + ocr_rect.t,
+                            ),
+                            origin=CoordOrigin.TOPLEFT,
+                        ),
+                    )
+                    for ix, line in enumerate(result)
+                ]
+                all_ocr_cells.extend(cells)
-            page.cells = cells  # For now, just overwrites all digital cells.
+            ## Remove OCR cells which overlap with programmatic cells.
+            filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
-            # DEBUG code:
-            def draw_clusters_and_cells():
-                image = copy.deepcopy(page.image)
-                draw = ImageDraw.Draw(image)
-                cell_color = (
-                    random.randint(30, 140),
-                    random.randint(30, 140),
-                    random.randint(30, 140),
-                )
-                for tc in cells:
-                    x0, y0, x1, y1 = tc.bbox.as_tuple()
-                    draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-                image.show()
+            page.cells.extend(filtered_ocr_cells)
-            # draw_clusters_and_cells()
+            # DEBUG code:
+            # self.draw_ocr_rects_and_cells(page, ocr_rects)
             yield page

docling/models/table_structure_model.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import copy
-import random
 from typing import Iterable, List
 import numpy

docling/pipeline/base_model_pipeline.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from abc import abstractmethod
 from pathlib import Path
 from typing import Iterable

docling/pipeline/standard_model_pipeline.py CHANGED Viewed

@@ -1,10 +1,8 @@
 from pathlib import Path
-from typing import Iterable
-from docling.datamodel.base_models import Page, PipelineOptions
+from docling.datamodel.base_models import PipelineOptions
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.layout_model import LayoutModel
-from docling.models.page_assemble_model import PageAssembleModel
 from docling.models.table_structure_model import TableStructureModel
 from docling.pipeline.base_model_pipeline import BaseModelPipeline

{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 1.5.0
+Version: 1.6.1
 Summary: Docling PDF conversion package
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -19,20 +19,21 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Provides-Extra: easyocr
 Provides-Extra: ocr
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=0.19.0,<1)
 Requires-Dist: docling-core (>=1.1.2,<2.0.0)
-Requires-Dist: docling-ibm-models (>=1.1.1,<2.0.0)
+Requires-Dist: docling-ibm-models (>=1.1.2,<2.0.0)
 Requires-Dist: docling-parse (>=0.2.0,<0.3.0)
-Requires-Dist: easyocr (>=1.7,<2.0) ; extra == "easyocr" or extra == "ocr"
+Requires-Dist: easyocr (>=1.7,<2.0) ; extra == "ocr"
 Requires-Dist: filetype (>=1.2.0,<2.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
 Requires-Dist: pydantic (>=2.0.0,<3.0.0)
 Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
 Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
 Requires-Dist: requests (>=2.32.3,<3.0.0)
+Requires-Dist: rtree (>=1.3.0,<2.0.0)
+Requires-Dist: scipy (>=1.14.1,<2.0.0)
 Project-URL: Repository, https://github.com/DS4SD/docling
 Description-Content-Type: text/markdown

docling-1.6.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,27 @@
+docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/backend/abstract_backend.py,sha256=ZfEHaBPGM1cmqrhaEoU3MHhnHU11NhOnhtFEIbVMYDo,1221
+docling/backend/docling_parse_backend.py,sha256=TN7Ln3Lkc8k0v6HzxA2iUGc8f2iqMw0I-3eryLQkpdw,6924
+docling/backend/pypdfium2_backend.py,sha256=xUiIYgd7i22YDx4-W2hfPUaQFszW0gcT6pavG5qZ8LE,8062
+docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/datamodel/base_models.py,sha256=5VHit5h7OleKnbhvy-sWDxQLizEdNrGUBrypyzwHyAE,8604
+docling/datamodel/document.py,sha256=Dgi9pSwXCgIoR26MKiRDiVMyMaFKdvGSKq2Fm5Lef9M,13173
+docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
+docling/document_converter.py,sha256=UFSELvUSWsr8s0VByu4lNuzu7bn7zZauJTL3FTSLSBg,10371
+docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/models/base_ocr_model.py,sha256=Ipl82a3AV2OsgMQSMEMpnWJ6MXcmyIQzmp52PmTaB0g,4465
+docling/models/ds_glm_model.py,sha256=wmb--2JKFQby-kvidw6PyM8wURPXYPQ_Z_eKKCBAdYQ,3192
+docling/models/easyocr_model.py,sha256=ABIqALvtNNrDQ47fXaZ0lDFhOwKsYGUUlAPnIsFZgZA,2232
+docling/models/layout_model.py,sha256=ZFmaLXlRWUfsT1pJCiYVxhQFrBBsiz6Aw0m9GM3UvVM,11249
+docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
+docling/models/table_structure_model.py,sha256=5jzTlpM-GdCSq4l0vD1W6aSPTJXeTcXEnNuPxnw-DlA,5437
+docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/pipeline/base_model_pipeline.py,sha256=AC5NTR0xLy5JIZqsTINkKEHeCPqpyvJpuE_bcnZhyvI,529
+docling/pipeline/standard_model_pipeline.py,sha256=UTjyaEXvz9htYZz-IMTkn11cZwNjgvo_Fl2dfBVnRQs,1442
+docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
+docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
+docling-1.6.1.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
+docling-1.6.1.dist-info/METADATA,sha256=5ML-S0PmaQqA1SMYhaZrNIL3RzU6FcwfAnzXprKf6Oc,7266
+docling-1.6.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling-1.6.1.dist-info/RECORD,,

docling-1.5.0.dist-info/RECORD DELETED Viewed

@@ -1,26 +0,0 @@
-docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/backend/abstract_backend.py,sha256=swwmXzNueZSHqEOvw4j-IFhP2OUJhBeB--gV7NtzKgo,1112
-docling/backend/docling_parse_backend.py,sha256=-bIjYJ-80R2SArAEw_lAyzgW5_BFEoX83n1oBMmUGF4,6284
-docling/backend/pypdfium2_backend.py,sha256=3Qeeal8z6DunUe4S10Z2TXrdeucanCpa8evt6SQtpKQ,7496
-docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/datamodel/base_models.py,sha256=uOq0zjUS60aIkROREiypp3Jn1yqQTlWEf34jXTT43ls,8391
-docling/datamodel/document.py,sha256=Dgi9pSwXCgIoR26MKiRDiVMyMaFKdvGSKq2Fm5Lef9M,13173
-docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
-docling/document_converter.py,sha256=r9z48VjL_hkq-rbAgyZ135njzUGBJ5AnhEH6-1zfyCA,10490
-docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/models/ds_glm_model.py,sha256=wmb--2JKFQby-kvidw6PyM8wURPXYPQ_Z_eKKCBAdYQ,3192
-docling/models/easyocr_model.py,sha256=Y-RWolIFE3By6gk8dnb2qFy7Cr9qcHs6eo65fWPT0Nc,2276
-docling/models/layout_model.py,sha256=ZFmaLXlRWUfsT1pJCiYVxhQFrBBsiz6Aw0m9GM3UvVM,11249
-docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
-docling/models/table_structure_model.py,sha256=lKsodvfZaGwxOHp-CbRW5nzCKZYMwf770h0Ka6Bdbgw,5451
-docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/pipeline/base_model_pipeline.py,sha256=ozHdJak0yQAxQf7pQN_C480vI35A2e5KL5Qq1xSkq5c,560
-docling/pipeline/standard_model_pipeline.py,sha256=UTwodKUKrisLoVcntbNUBDhjzRyFvpdUvyVw-gNmBlM,1541
-docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
-docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
-docling-1.5.0.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
-docling-1.5.0.dist-info/METADATA,sha256=jWcjsrdfYcpeYFCRQ1h5C1b8MyaKsJWyUhGheXQEGvY,7235
-docling-1.5.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling-1.5.0.dist-info/RECORD,,

{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-1.5.0.dist-info → docling-1.6.1.dist-info}/WHEEL RENAMED Viewed

File without changes

docling 1.5.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

docling 1.5.0py3-none-any.whl → 1.6.1py3-none-any.whl