PyPI - docling-core - Versions diffs - 2.36.0__py3-none-any.whl → 2.38.0__py3-none-any.whl - Mend

docling-core 2.36.0py3-none-any.whl → 2.38.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling-core might be problematic. Click here for more details.

Files changed (13) hide show

docling_core/transforms/chunker/hybrid_chunker.py CHANGED Viewed

@@ -234,10 +234,13 @@ class HybridChunker(BaseChunker):
             if available_length <= 0:
                 warnings.warn(
                     "Headers and captions for this chunk are longer than the total "
-                    "amount of size for the chunk, chunk will be ignored: "
-                    f"{doc_chunk.text=}"
+                    "available size for the chunk, so they will be ignored: "
+                    f"{doc_chunk.text=}, {doc_chunk.meta=}"
                 )
-                return []
+                new_chunk = DocChunk(**doc_chunk.export_json_dict())
+                new_chunk.meta.captions = None
+                new_chunk.meta.headings = None
+                return self._split_using_plain_text(doc_chunk=new_chunk)
             text = doc_chunk.text
             segments = sem_chunker.chunk(text)
             chunks = [DocChunk(text=s, meta=doc_chunk.meta) for s in segments]

docling_core/transforms/serializer/html.py CHANGED Viewed

@@ -340,7 +340,7 @@ class HTMLTableSerializer(BaseTableSerializer):
                     content = html.escape(cell.text.strip())
                     celltag = "td"
-                    if cell.column_header:
+                    if cell.column_header or cell.row_header or cell.row_section:
                         celltag = "th"
                     opening_tag = f"{celltag}"

docling_core/transforms/visualizer/layout_visualizer.py CHANGED Viewed

@@ -163,8 +163,8 @@ class LayoutVisualizer(BaseVisualizer):
                 else:
                     raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
-                if prev_page_nr is None or page_nr > prev_page_nr:  # new page begins
-                    # complete previous drawing
+                if prev_page_nr is None or page_nr != prev_page_nr:  # changing page
+                    # dump previous drawing
                     if prev_page_nr is not None and prev_image and clusters:
                         self._draw_clusters(
                             image=prev_image,

docling_core/transforms/visualizer/reading_order_visualizer.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """Define classes for reading order visualization."""
 from copy import deepcopy
-from typing import Optional
+from typing import Optional, Union
-from PIL import ImageDraw
+from PIL import ImageDraw, ImageFont
 from PIL.Image import Image
+from PIL.ImageFont import FreeTypeFont
 from pydantic import BaseModel
 from typing_extensions import override
@@ -12,6 +13,11 @@ from docling_core.transforms.visualizer.base import BaseVisualizer
 from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocument
+class _NumberDrawingData(BaseModel):
+    xy: tuple[float, float]
+    text: str
 class ReadingOrderVisualizer(BaseVisualizer):
     """Reading order visualizer."""
@@ -19,6 +25,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
         """Layout visualization parameters."""
         show_label: bool = True
+        show_branch_numbering: bool = False
         content_layers: set[ContentLayer] = {
             cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
         }
@@ -76,10 +83,17 @@ class ReadingOrderVisualizer(BaseVisualizer):
         images: Optional[dict[Optional[int], Image]] = None,
     ):
         """Draw the reading order."""
-        # draw = ImageDraw.Draw(image)
+        font: Union[ImageFont.ImageFont, FreeTypeFont]
+        try:
+            font = ImageFont.truetype("arial.ttf", 12)
+        except OSError:
+            # Fallback to default font if arial is not available
+            font = ImageFont.load_default()
         x0, y0 = None, None
+        number_data_to_draw: dict[Optional[int], list[_NumberDrawingData]] = {}
         my_images: dict[Optional[int], Image] = images or {}
         prev_page = None
+        i = 0
         for elem, _ in doc.iterate_items(
             included_content_layers=self.params.content_layers,
         ):
@@ -92,7 +106,10 @@ class ReadingOrderVisualizer(BaseVisualizer):
                 page_no = prov.page_no
                 image = my_images.get(page_no)
-                if image is None or prev_page is None or page_no > prev_page:
+                if page_no not in number_data_to_draw:
+                    number_data_to_draw[page_no] = []
+                if image is None or prev_page is None or page_no != prev_page:
                     # new page begins
                     prev_page = page_no
                     x0 = y0 = None
@@ -109,7 +126,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
                         else:
                             image = deepcopy(pil_img)
                             my_images[page_no] = image
-                draw = ImageDraw.Draw(image)
+                draw = ImageDraw.Draw(image, "RGBA")
                 tlo_bbox = prov.bbox.to_top_left_origin(
                     page_height=doc.pages[prov.page_no].size.height
@@ -124,9 +141,20 @@ class ReadingOrderVisualizer(BaseVisualizer):
                     ro_bbox.b, ro_bbox.t = ro_bbox.t, ro_bbox.b
                 if x0 is None and y0 is None:
+                    # is_root= True
                     x0 = (ro_bbox.l + ro_bbox.r) / 2.0
                     y0 = (ro_bbox.b + ro_bbox.t) / 2.0
+                    number_data_to_draw[page_no].append(
+                        _NumberDrawingData(
+                            xy=(x0, y0),
+                            text=f"{i}",
+                        )
+                    )
+                    i += 1
                 else:
+                    # is_root = False
                     assert x0 is not None
                     assert y0 is not None
@@ -139,7 +167,40 @@ class ReadingOrderVisualizer(BaseVisualizer):
                         line_width=2,
                         color="red",
                     )
                     x0, y0 = x1, y1
+        if self.params.show_branch_numbering:
+            # post-drawing the numbers to ensure they are rendered on top-layer
+            for page in number_data_to_draw:
+                if (image := my_images.get(page)) is None:
+                    continue
+                draw = ImageDraw.Draw(image, "RGBA")
+                for num_item in number_data_to_draw[page]:
+                    text_bbox = draw.textbbox(num_item.xy, num_item.text, font)
+                    text_bg_padding = 5
+                    draw.ellipse(
+                        [
+                            (
+                                text_bbox[0] - text_bg_padding,
+                                text_bbox[1] - text_bg_padding,
+                            ),
+                            (
+                                text_bbox[2] + text_bg_padding,
+                                text_bbox[3] + text_bg_padding,
+                            ),
+                        ],
+                        fill="orange",
+                    )
+                    draw.text(
+                        num_item.xy,
+                        text=num_item.text,
+                        fill="black",
+                        font=font,
+                    )
         return my_images
     @override

docling_core/transforms/visualizer/table_visualizer.py CHANGED Viewed

@@ -23,8 +23,23 @@ class TableVisualizer(BaseVisualizer):
         # show_Label: bool = False
         show_cells: bool = True
-        # show_rows: bool = False
-        # show_cols: bool = False
+        show_rows: bool = False
+        show_cols: bool = False
+        cell_color: tuple[int, int, int, int] = (256, 0, 0, 32)
+        cell_outline: tuple[int, int, int, int] = (256, 0, 0, 128)
+        row_color: tuple[int, int, int, int] = (256, 0, 0, 32)
+        row_outline: tuple[int, int, int, int] = (256, 0, 0, 128)
+        row_header_color: tuple[int, int, int, int] = (0, 256, 0, 32)
+        row_header_outline: tuple[int, int, int, int] = (0, 256, 0, 128)
+        col_color: tuple[int, int, int, int] = (0, 256, 0, 32)
+        col_outline: tuple[int, int, int, int] = (0, 256, 0, 128)
+        col_header_color: tuple[int, int, int, int] = (0, 0, 256, 32)
+        col_header_outline: tuple[int, int, int, int] = (0, 0, 256, 128)
     base_visualizer: Optional[BaseVisualizer] = None
     params: Params = Params()
@@ -45,7 +60,21 @@ class TableVisualizer(BaseVisualizer):
                 tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
-                cell_color = (256, 0, 0, 32)  # Transparent black for cells
+                cell_color = self.params.cell_color  # Transparent black for cells
+                cell_outline = self.params.cell_outline
+                if cell.column_header:
+                    cell_color = (
+                        self.params.col_header_color
+                    )  # Transparent black for cells
+                    cell_outline = self.params.col_header_outline
+                if cell.row_header:
+                    cell_color = (
+                        self.params.row_header_color
+                    )  # Transparent black for cells
+                    cell_outline = self.params.row_header_outline
+                if cell.row_section:
+                    cell_color = self.params.row_header_color
+                    cell_outline = self.params.row_header_outline
                 cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
                 cx0 *= scale_x
@@ -55,10 +84,68 @@ class TableVisualizer(BaseVisualizer):
                 draw.rectangle(
                     [(cx0, cy0), (cx1, cy1)],
-                    outline=(256, 0, 0, 128),
+                    outline=cell_outline,
                     fill=cell_color,
                 )
+    def _draw_table_rows(
+        self,
+        table: TableItem,
+        page_image: Image,
+        page_height: float,
+        scale_x: float,
+        scale_y: float,
+    ):
+        """Draw individual table cells."""
+        draw = ImageDraw.Draw(page_image, "RGBA")
+        rows = table.data.get_row_bounding_boxes()
+        for rid, bbox in rows.items():
+            tl_bbox = bbox.to_top_left_origin(page_height=page_height)
+            cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
+            cx0 *= scale_x
+            cx1 *= scale_x
+            cy0 *= scale_y
+            cy1 *= scale_y
+            draw.rectangle(
+                [(cx0, cy0), (cx1, cy1)],
+                outline=self.params.row_outline,
+                fill=self.params.row_color,
+            )
+    def _draw_table_cols(
+        self,
+        table: TableItem,
+        page_image: Image,
+        page_height: float,
+        scale_x: float,
+        scale_y: float,
+    ):
+        """Draw individual table cells."""
+        draw = ImageDraw.Draw(page_image, "RGBA")
+        cols = table.data.get_column_bounding_boxes()
+        for cid, bbox in cols.items():
+            tl_bbox = bbox.to_top_left_origin(page_height=page_height)
+            cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
+            cx0 *= scale_x
+            cx1 *= scale_x
+            cy0 *= scale_y
+            cy1 *= scale_y
+            draw.rectangle(
+                [(cx0, cy0), (cx1, cy1)],
+                outline=self.params.col_outline,
+                fill=self.params.col_color,
+            )
     def _draw_doc_tables(
         self,
         doc: DoclingDocument,
@@ -108,6 +195,24 @@ class TableVisualizer(BaseVisualizer):
                             scale_y=image.height / doc.pages[page_nr].size.height,
                         )
+                    if self.params.show_rows:
+                        self._draw_table_rows(
+                            table=elem,
+                            page_height=doc.pages[page_nr].size.height,
+                            page_image=image,
+                            scale_x=image.width / doc.pages[page_nr].size.width,
+                            scale_y=image.height / doc.pages[page_nr].size.height,
+                        )
+                    if self.params.show_cols:
+                        self._draw_table_cols(
+                            table=elem,
+                            page_height=doc.pages[page_nr].size.height,
+                            page_image=image,
+                            scale_x=image.width / doc.pages[page_nr].size.width,
+                            scale_y=image.height / doc.pages[page_nr].size.height,
+                        )
                 else:
                     raise RuntimeError(f"Cannot visualize page-image for {page_nr}")

docling_core/types/doc/__init__.py CHANGED Viewed

@@ -7,26 +7,78 @@
 from .base import BoundingBox, CoordOrigin, ImageRefMode, Size
 from .document import (
+    BaseAnnotation,
+    ChartBar,
+    ChartLine,
+    ChartPoint,
+    ChartSlice,
+    ChartStackedBar,
     CodeItem,
+    ContentLayer,
+    DescriptionAnnotation,
     DocItem,
     DoclingDocument,
+    DocTagsDocument,
+    DocTagsPage,
     DocumentOrigin,
     FloatingItem,
+    Formatting,
+    FormItem,
+    FormulaItem,
+    GraphCell,
+    GraphData,
+    GraphLink,
     GroupItem,
     ImageRef,
+    InlineGroup,
     KeyValueItem,
+    ListItem,
+    MiscAnnotation,
     NodeItem,
+    OrderedList,
     PageItem,
+    PictureBarChartData,
+    PictureChartData,
     PictureClassificationClass,
     PictureClassificationData,
     PictureDataType,
     PictureItem,
+    PictureLineChartData,
+    PictureMoleculeData,
+    PicturePieChartData,
+    PictureScatterChartData,
+    PictureStackedBarChartData,
+    PictureTabularChartData,
     ProvenanceItem,
     RefItem,
+    Script,
     SectionHeaderItem,
     TableCell,
     TableData,
     TableItem,
     TextItem,
+    TitleItem,
+    UnorderedList,
 )
-from .labels import DocItemLabel, GroupLabel, TableCellLabel
+from .labels import (
+    CodeLanguageLabel,
+    DocItemLabel,
+    GraphCellLabel,
+    GraphLinkLabel,
+    GroupLabel,
+    PictureClassificationLabel,
+    TableCellLabel,
+)
+from .page import (
+    BoundingRectangle,
+    ColorMixin,
+    ColorRGBA,
+    Coord2D,
+    OrderedElement,
+    PdfCellRenderingMode,
+    PdfPageBoundaryType,
+    TextCell,
+    TextCellUnit,
+    TextDirection,
+)
+from .tokens import DocumentToken, TableToken

docling_core/types/doc/document.py CHANGED Viewed

@@ -38,7 +38,7 @@ from typing_extensions import Annotated, Self, deprecated
 from docling_core.search.package import VERSION_PATTERN
 from docling_core.types.base import _JSON_POINTER_REGEX
 from docling_core.types.doc import BoundingBox, Size
-from docling_core.types.doc.base import ImageRefMode
+from docling_core.types.doc.base import CoordOrigin, ImageRefMode
 from docling_core.types.doc.labels import (
     CodeLanguageLabel,
     DocItemLabel,
@@ -372,6 +372,119 @@ class TableData(BaseModel):  # TBD
         return table_data
+    def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
+        """Get the minimal bounding box for each row in the table.
+        Returns:
+        List[Optional[BoundingBox]]: A list where each element is the minimal
+        bounding box that encompasses all cells in that row, or None if no
+        cells in the row have bounding boxes.
+        """
+        coords = []
+        for cell in self.table_cells:
+            if cell.bbox is not None:
+                coords.append(cell.bbox.coord_origin)
+        if len(set(coords)) > 1:
+            raise ValueError(
+                "All bounding boxes must have the same \
+                CoordOrigin to compute their union."
+            )
+        row_bboxes: dict[int, BoundingBox] = {}
+        for row_idx in range(self.num_rows):
+            row_cells_with_bbox: dict[int, list[BoundingBox]] = {}
+            # Collect all cells in this row that have bounding boxes
+            for cell in self.table_cells:
+                if (
+                    cell.bbox is not None
+                    and cell.start_row_offset_idx <= row_idx < cell.end_row_offset_idx
+                ):
+                    row_span = cell.end_row_offset_idx - cell.start_row_offset_idx
+                    if row_span in row_cells_with_bbox:
+                        row_cells_with_bbox[row_span].append(cell.bbox)
+                    else:
+                        row_cells_with_bbox[row_span] = [cell.bbox]
+            # Calculate the enclosing bounding box for this row
+            if len(row_cells_with_bbox) > 0:
+                min_row_span = min(row_cells_with_bbox.keys())
+                row_bbox: BoundingBox = BoundingBox.enclosing_bbox(
+                    row_cells_with_bbox[min_row_span]
+                )
+                for rspan, bboxs in row_cells_with_bbox.items():
+                    for bbox in bboxs:
+                        row_bbox.l = min(row_bbox.l, bbox.l)
+                        row_bbox.r = max(row_bbox.r, bbox.r)
+                row_bboxes[row_idx] = row_bbox
+        return row_bboxes
+    def get_column_bounding_boxes(self) -> dict[int, BoundingBox]:
+        """Get the minimal bounding box for each column in the table.
+        Returns:
+            List[Optional[BoundingBox]]: A list where each element is the minimal
+            bounding box that encompasses all cells in that column, or None if no
+            cells in the column have bounding boxes.
+        """
+        coords = []
+        for cell in self.table_cells:
+            if cell.bbox is not None:
+                coords.append(cell.bbox.coord_origin)
+        if len(set(coords)) > 1:
+            raise ValueError(
+                "All bounding boxes must have the same \
+                CoordOrigin to compute their union."
+            )
+        col_bboxes: dict[int, BoundingBox] = {}
+        for col_idx in range(self.num_cols):
+            col_cells_with_bbox: dict[int, list[BoundingBox]] = {}
+            # Collect all cells in this row that have bounding boxes
+            for cell in self.table_cells:
+                if (
+                    cell.bbox is not None
+                    and cell.start_col_offset_idx <= col_idx < cell.end_col_offset_idx
+                ):
+                    col_span = cell.end_col_offset_idx - cell.start_col_offset_idx
+                    if col_span in col_cells_with_bbox:
+                        col_cells_with_bbox[col_span].append(cell.bbox)
+                    else:
+                        col_cells_with_bbox[col_span] = [cell.bbox]
+            # Calculate the enclosing bounding box for this row
+            if len(col_cells_with_bbox) > 0:
+                min_col_span = min(col_cells_with_bbox.keys())
+                col_bbox: BoundingBox = BoundingBox.enclosing_bbox(
+                    col_cells_with_bbox[min_col_span]
+                )
+                for rspan, bboxs in col_cells_with_bbox.items():
+                    for bbox in bboxs:
+                        if bbox.coord_origin == CoordOrigin.TOPLEFT:
+                            col_bbox.b = max(col_bbox.b, bbox.b)
+                            col_bbox.t = min(col_bbox.t, bbox.t)
+                        elif bbox.coord_origin == CoordOrigin.BOTTOMLEFT:
+                            col_bbox.b = min(col_bbox.b, bbox.b)
+                            col_bbox.t = max(col_bbox.t, bbox.t)
+                col_bboxes[col_idx] = col_bbox
+        return col_bboxes
 class PictureTabularChartData(PictureChartData):
     """Base class for picture chart data.
@@ -4056,6 +4169,7 @@ class DoclingDocument(BaseModel):
         add_table_cell_location: bool = False,
         add_table_cell_text: bool = True,
         minified: bool = False,
+        pages: Optional[set[int]] = None,
     ) -> str:
         r"""Exports the document content to a DocumentToken format.
@@ -4074,6 +4188,7 @@ class DoclingDocument(BaseModel):
         :param # table specific flagsadd_table_cell_location: bool
         :param add_table_cell_text: bool:  (Default value = True)
         :param minified: bool:  (Default value = False)
+        :param pages: set[int]: (Default value = None)
         :returns: The content of the document formatted as a DocTags string.
         :rtype: str
         """
@@ -4098,6 +4213,7 @@ class DoclingDocument(BaseModel):
                 add_page_break=add_page_index,
                 add_table_cell_location=add_table_cell_location,
                 add_table_cell_text=add_table_cell_text,
+                pages=pages,
                 mode=(
                     DocTagsParams.Mode.MINIFIED
                     if minified
@@ -4237,7 +4353,9 @@ class DoclingDocument(BaseModel):
         return pitem
     def get_visualization(
-        self, show_label: bool = True
+        self,
+        show_label: bool = True,
+        show_branch_numbering: bool = False,
     ) -> dict[Optional[int], PILImage.Image]:
         """Get visualization of the document as images by page."""
         from docling_core.transforms.visualizer.layout_visualizer import (
@@ -4253,6 +4371,9 @@ class DoclingDocument(BaseModel):
                     show_label=show_label,
                 ),
             ),
+            params=ReadingOrderVisualizer.Params(
+                show_branch_numbering=show_branch_numbering,
+            ),
         )
         images = visualizer.get_visualization(doc=self)
@@ -4343,3 +4464,67 @@ class DoclingDocument(BaseModel):
                     hyperlink=li.hyperlink,
                 )
         return self
+    def _normalize_references(self) -> None:
+        """Normalize ref numbering by ordering node items as per iterate_items()."""
+        new_body = GroupItem(**self.body.model_dump(exclude={"children"}))
+        item_lists: dict[str, list[NodeItem]] = {
+            "groups": [],
+            "texts": [],
+            "pictures": [],
+            "tables": [],
+            "key_value_items": [],
+            "form_items": [],
+        }
+        orig_ref_to_new_ref: dict[str, str] = {}
+        # collect items in traversal order
+        for item, _ in self.iterate_items(
+            with_groups=True,
+            traverse_pictures=True,
+            included_content_layers={c for c in ContentLayer},
+        ):
+            key = item.self_ref.split("/")[1]
+            is_body = key == "body"
+            new_cref = "#/body" if is_body else f"#/{key}/{len(item_lists[key])}"
+            # register cref mapping:
+            orig_ref_to_new_ref[item.self_ref] = new_cref
+            if not is_body:
+                new_item = copy.deepcopy(item)
+                new_item.children = []
+                # put item in the right list
+                item_lists[key].append(new_item)
+                # update item's self reference
+                new_item.self_ref = new_cref
+                if item.parent:
+                    # set item's parent
+                    new_parent_cref = orig_ref_to_new_ref[item.parent.cref]
+                    new_item.parent = RefItem(cref=new_parent_cref)
+                    # add item to parent's children
+                    path_components = new_parent_cref.split("/")
+                    num_components = len(path_components)
+                    parent_node: NodeItem
+                    if num_components == 3:
+                        _, parent_key, parent_index_str = path_components
+                        parent_index = int(parent_index_str)
+                        parent_node = item_lists[parent_key][parent_index]
+                    elif num_components == 2 and path_components[1] == "body":
+                        parent_node = new_body
+                    else:
+                        raise RuntimeError(f"Unsupported ref format: {new_parent_cref}")
+                    parent_node.children.append(RefItem(cref=new_cref))
+        # update document
+        self.groups = item_lists["groups"]  # type: ignore
+        self.texts = item_lists["texts"]  # type: ignore
+        self.pictures = item_lists["pictures"]  # type: ignore
+        self.tables = item_lists["tables"]  # type: ignore
+        self.key_value_items = item_lists["key_value_items"]  # type: ignore
+        self.form_items = item_lists["form_items"]  # type: ignore
+        self.body = new_body

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling-core
-Version: 2.36.0
+Version: 2.38.0
 Summary: A python library to define and validate data types in Docling.
 Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/RECORD RENAMED Viewed

@@ -20,7 +20,7 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
 docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
 docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
 docling_core/transforms/chunker/hierarchical_chunker.py,sha256=7Fpwwsn2BoiR12KGPrn8fU1uuhqBLp85MRLMF0aIsL8,8281
-docling_core/transforms/chunker/hybrid_chunker.py,sha256=i4Yskms48XRUAVhec8pTGDP1dbrTEgc1pNh5fNXqfKQ,12317
+docling_core/transforms/chunker/hybrid_chunker.py,sha256=xjkz8hy3tXXzkJzf7QMFOEq_v8V7Jcs9tCY0Mxjge74,12548
 docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
 docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
 docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
@@ -29,19 +29,19 @@ docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3
 docling_core/transforms/serializer/base.py,sha256=ZFIiZeplL-QbBs9EDUb1awqxapQ23PsApVetJtAs7Vs,6891
 docling_core/transforms/serializer/common.py,sha256=WP-qO-woidrKyvZ56m0vlKMysoLrMzzZtHSCIwsl3ek,19119
 docling_core/transforms/serializer/doctags.py,sha256=PuAExlP-2HxcDSP_R_phtYQU0yKBW94RrPgb85IUxck,19905
-docling_core/transforms/serializer/html.py,sha256=KiywrroYBS3yk07gQizlmk3oqkXg_NpFwE0VF31_Z-I,37112
+docling_core/transforms/serializer/html.py,sha256=SZgQa0QnknEoRwMFLdgmVsLQqLF2rQl3D7XyEZzUHCE,37151
 docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
 docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
 docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
 docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
-docling_core/transforms/visualizer/layout_visualizer.py,sha256=hpq7OnyBgGxt3iW3_aNy9KH_0kmKdgoiJIFPcA2SSHU,8040
-docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=yBra_W33bb16BxrTqP-ABu5NfRplTEJgu3dKdew3zKA,5601
-docling_core/transforms/visualizer/table_visualizer.py,sha256=XlLMSROyRW2UtAjKTltcESSs_rdQNKjO3QvO7ET7uc0,4275
+docling_core/transforms/visualizer/layout_visualizer.py,sha256=zHzQTWcy-z1J2BcsjvakLkrp8pgStgnxhDl8YqIAotY,8035
+docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao39X3Dut0934NAjU3I4v3JN5VzzdjmoGRY,7776
+docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
 docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
 docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
-docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
+docling_core/types/doc/__init__.py,sha256=pchsIq-9FH_kCTyuyDdB8L4yV77pmnxPwT7399xrqxI,1626
 docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
-docling_core/types/doc/document.py,sha256=elFR5J7O9FUWXiweNK2W7S-cPvAakdzkMls0Uh4ViU8,149361
+docling_core/types/doc/document.py,sha256=JPh-9MqfOxThP5njvXZAY8sxQyhiPJLjDsSJviggItc,156829
 docling_core/types/doc/labels.py,sha256=JiciRK7_DOkebsrfQ6PVCvS__TsKgWn1ANk84BeB14k,7359
 docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
 docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
 docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
 docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
 docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
-docling_core-2.36.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
-docling_core-2.36.0.dist-info/METADATA,sha256=8CnZkQHylNT1mgEEs_lIB18f2NL96R3kFAl-rBYVR0U,6453
-docling_core-2.36.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-docling_core-2.36.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
-docling_core-2.36.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
-docling_core-2.36.0.dist-info/RECORD,,
+docling_core-2.38.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
+docling_core-2.38.0.dist-info/METADATA,sha256=llcycAVzvc09CX0igt4VIGrGWT8UuMjnWN5rrQoEJ6s,6453
+docling_core-2.38.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+docling_core-2.38.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
+docling_core-2.38.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
+docling_core-2.38.0.dist-info/RECORD,,

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{docling_core-2.36.0.dist-info → docling_core-2.38.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

docling-core 2.36.0__py3-none-any.whl → 2.38.0__py3-none-any.whl

Potentially problematic release.

docling-core 2.36.0py3-none-any.whl → 2.38.0py3-none-any.whl