PyPI - docling - Versions diffs - 2.11.0__py3-none-any.whl → 2.13.0__py3-none-any.whl - Mend

docling 2.11.0py3-none-any.whl → 2.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

docling/backend/xml/__init__.py +0 -0
docling/backend/xml/uspto_backend.py +1888 -0
docling/cli/main.py +8 -0
docling/datamodel/base_models.py +18 -4
docling/datamodel/document.py +77 -13
docling/datamodel/pipeline_options.py +68 -4
docling/datamodel/settings.py +1 -0
docling/document_converter.py +11 -2
docling/models/ds_glm_model.py +34 -4
docling/models/easyocr_model.py +37 -3
docling/models/layout_model.py +144 -280
docling/models/page_assemble_model.py +11 -1
docling/models/rapid_ocr_model.py +24 -45
docling/models/table_structure_model.py +49 -33
docling/pipeline/base_pipeline.py +3 -1
docling/pipeline/standard_pdf_pipeline.py +7 -3
docling/utils/accelerator_utils.py +42 -0
docling/utils/glm_utils.py +11 -3
docling/utils/layout_postprocessor.py +666 -0
{docling-2.11.0.dist-info → docling-2.13.0.dist-info}/METADATA +3 -3
{docling-2.11.0.dist-info → docling-2.13.0.dist-info}/RECORD +24 -21
docling/utils/layout_utils.py +0 -812
{docling-2.11.0.dist-info → docling-2.13.0.dist-info}/LICENSE +0 -0
{docling-2.11.0.dist-info → docling-2.13.0.dist-info}/WHEEL +0 -0
{docling-2.11.0.dist-info → docling-2.13.0.dist-info}/entry_points.txt +0 -0

docling/models/table_structure_model.py CHANGED Viewed

@@ -9,15 +9,25 @@ from PIL import ImageDraw
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    TableFormerMode,
+    TableStructureOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 class TableStructureModel(BasePageModel):
     def __init__(
-        self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
+        self,
+        enabled: bool,
+        artifacts_path: Path,
+        options: TableStructureOptions,
+        accelerator_options: AcceleratorOptions,
     ):
         self.options = options
         self.do_cell_matching = self.options.do_cell_matching
@@ -26,16 +36,26 @@ class TableStructureModel(BasePageModel):
         self.enabled = enabled
         if self.enabled:
             if self.mode == TableFormerMode.ACCURATE:
-                artifacts_path = artifacts_path / "fat"
+                artifacts_path = artifacts_path / "accurate"
+            else:
+                artifacts_path = artifacts_path / "fast"
             # Third Party
             import docling_ibm_models.tableformer.common as c
+            device = decide_device(accelerator_options.device)
+            # Disable MPS here, until we know why it makes things slower.
+            if device == AcceleratorDevice.MPS.value:
+                device = AcceleratorDevice.CPU.value
             self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
             self.tm_config["model"]["save_dir"] = artifacts_path
             self.tm_model_type = self.tm_config["model"]["type"]
-            self.tf_predictor = TFPredictor(self.tm_config)
+            self.tf_predictor = TFPredictor(
+                self.tm_config, device, accelerator_options.num_threads
+            )
             self.scale = 2.0  # Scale up table input images to 144 dpi
     def draw_table_and_cells(
@@ -56,6 +76,10 @@ class TableStructureModel(BasePageModel):
             x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
             draw.rectangle([(x0, y0), (x1, y1)], outline="red")
+            for cell in table_element.cluster.cells:
+                x0, y0, x1, y1 = cell.bbox.as_tuple()
+                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
             for tc in table_element.table_cells:
                 if tc.bbox is not None:
                     x0, y0, x1, y1 = tc.bbox.as_tuple()
@@ -69,7 +93,6 @@ class TableStructureModel(BasePageModel):
                         text=f"{tc.start_row_offset_idx}, {tc.start_col_offset_idx}",
                         fill="black",
                     )
         if show:
             image.show()
         else:
@@ -115,47 +138,40 @@ class TableStructureModel(BasePageModel):
                             ],
                         )
                         for cluster in page.predictions.layout.clusters
-                        if cluster.label == DocItemLabel.TABLE
+                        if cluster.label
+                        in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
                     ]
                     if not len(in_tables):
                         yield page
                         continue
-                    tokens = []
-                    for c in page.cells:
-                        for cluster, _ in in_tables:
-                            if c.bbox.area() > 0:
-                                if (
-                                    c.bbox.intersection_area_with(cluster.bbox)
-                                    / c.bbox.area()
-                                    > 0.2
-                                ):
-                                    # Only allow non empty stings (spaces) into the cells of a table
-                                    if len(c.text.strip()) > 0:
-                                        new_cell = copy.deepcopy(c)
-                                        new_cell.bbox = new_cell.bbox.scaled(
-                                            scale=self.scale
-                                        )
-                                        tokens.append(new_cell.model_dump())
                     page_input = {
-                        "tokens": tokens,
                         "width": page.size.width * self.scale,
                         "height": page.size.height * self.scale,
+                        "image": numpy.asarray(page.get_image(scale=self.scale)),
                     }
-                    page_input["image"] = numpy.asarray(
-                        page.get_image(scale=self.scale)
-                    )
                     table_clusters, table_bboxes = zip(*in_tables)
                     if len(table_bboxes):
-                        tf_output = self.tf_predictor.multi_table_predict(
-                            page_input, table_bboxes, do_matching=self.do_cell_matching
-                        )
+                        for table_cluster, tbl_box in in_tables:
+                            tokens = []
+                            for c in table_cluster.cells:
+                                # Only allow non empty stings (spaces) into the cells of a table
+                                if len(c.text.strip()) > 0:
+                                    new_cell = copy.deepcopy(c)
+                                    new_cell.bbox = new_cell.bbox.scaled(
+                                        scale=self.scale
+                                    )
+                                    tokens.append(new_cell.model_dump())
+                            page_input["tokens"] = tokens
-                        for table_cluster, table_out in zip(table_clusters, tf_output):
+                            tf_output = self.tf_predictor.multi_table_predict(
+                                page_input, [tbl_box], do_matching=self.do_cell_matching
+                            )
+                            table_out = tf_output[0]
                             table_cells = []
                             for element in table_out["tf_responses"]:
@@ -188,7 +204,7 @@ class TableStructureModel(BasePageModel):
                                 id=table_cluster.id,
                                 page_no=page.page_no,
                                 cluster=table_cluster,
-                                label=DocItemLabel.TABLE,
+                                label=table_cluster.label,
                             )
                             page.predictions.tablestructure.table_map[

docling/pipeline/base_pipeline.py CHANGED Viewed

@@ -168,7 +168,9 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
             except Exception as e:
                 conv_res.status = ConversionStatus.FAILURE
-                trace = "\n".join(traceback.format_exception(e))
+                trace = "\n".join(
+                    traceback.format_exception(type(e), e, e.__traceback__)
+                )
                 _log.warning(
                     f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
                     f"{trace}"

docling/pipeline/standard_pdf_pipeline.py CHANGED Viewed

@@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)
 class StandardPdfPipeline(PaginatedPipeline):
-    _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
+    _layout_model_path = "model_artifacts/layout"
     _table_model_path = "model_artifacts/tableformer"
     def __init__(self, pipeline_options: PdfPipelineOptions):
@@ -75,7 +75,8 @@ class StandardPdfPipeline(PaginatedPipeline):
             # Layout model
             LayoutModel(
                 artifacts_path=self.artifacts_path
-                / StandardPdfPipeline._layout_model_path
+                / StandardPdfPipeline._layout_model_path,
+                accelerator_options=pipeline_options.accelerator_options,
             ),
             # Table structure model
             TableStructureModel(
@@ -83,6 +84,7 @@ class StandardPdfPipeline(PaginatedPipeline):
                 artifacts_path=self.artifacts_path
                 / StandardPdfPipeline._table_model_path,
                 options=pipeline_options.table_structure_options,
+                accelerator_options=pipeline_options.accelerator_options,
             ),
             # Page assemble
             PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
@@ -104,7 +106,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             repo_id="ds4sd/docling-models",
             force_download=force,
             local_dir=local_dir,
-            revision="v2.0.1",
+            revision="v2.1.0",
         )
         return Path(download_path)
@@ -114,6 +116,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             return EasyOcrModel(
                 enabled=self.pipeline_options.do_ocr,
                 options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
             )
         elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
             return TesseractOcrCliModel(
@@ -129,6 +132,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             return RapidOcrModel(
                 enabled=self.pipeline_options.do_ocr,
                 options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
             )
         elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
             if "darwin" != sys.platform:

docling/utils/accelerator_utils.py ADDED Viewed

@@ -0,0 +1,42 @@
+import logging
+import torch
+from docling.datamodel.pipeline_options import AcceleratorDevice
+_log = logging.getLogger(__name__)
+def decide_device(accelerator_device: AcceleratorDevice) -> str:
+    r"""
+    Resolve the device based on the acceleration options and the available devices in the system
+    Rules:
+    1. AUTO: Check for the best available device on the system.
+    2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
+    """
+    cuda_index = 0
+    device = "cpu"
+    has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
+    has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    if accelerator_device == AcceleratorDevice.AUTO:
+        if has_cuda:
+            device = f"cuda:{cuda_index}"
+        elif has_mps:
+            device = "mps"
+    else:
+        if accelerator_device == AcceleratorDevice.CUDA:
+            if has_cuda:
+                device = f"cuda:{cuda_index}"
+            else:
+                _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
+        elif accelerator_device == AcceleratorDevice.MPS:
+            if has_mps:
+                device = "mps"
+            else:
+                _log.warning("MPS is not available in the system. Fall back to 'CPU'")
+    _log.info("Accelerator device: '%s'", device)
+    return device

docling/utils/glm_utils.py CHANGED Viewed

@@ -169,6 +169,8 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
             current_list = None
             text = ""
             caption_refs = []
+            item_label = DocItemLabel(pelem["name"])
             for caption in obj["captions"]:
                 text += caption["text"]
@@ -254,12 +256,18 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
                 ),
             )
-            tbl = doc.add_table(data=tbl_data, prov=prov)
+            tbl = doc.add_table(data=tbl_data, prov=prov, label=item_label)
             tbl.captions.extend(caption_refs)
-        elif ptype in ["form", "key_value_region"]:
+        elif ptype in [DocItemLabel.FORM.value, DocItemLabel.KEY_VALUE_REGION.value]:
             label = DocItemLabel(ptype)
-            container_el = doc.add_group(label=GroupLabel.UNSPECIFIED, name=label)
+            group_label = GroupLabel.UNSPECIFIED
+            if label == DocItemLabel.FORM:
+                group_label = GroupLabel.FORM_AREA
+            elif label == DocItemLabel.KEY_VALUE_REGION:
+                group_label = GroupLabel.KEY_VALUE_AREA
+            container_el = doc.add_group(label=group_label)
             _add_child_elements(container_el, doc, obj, pelem)

docling 2.11.0__py3-none-any.whl → 2.13.0__py3-none-any.whl

docling 2.11.0py3-none-any.whl → 2.13.0py3-none-any.whl