PyPI - docling-core - Versions diffs - 2.46.0__py3-none-any.whl → 2.48.0__py3-none-any.whl - Mend

docling-core 2.46.0py3-none-any.whl → 2.48.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling-core might be problematic. Click here for more details.

Files changed (12) hide show

docling_core/transforms/chunker/hierarchical_chunker.py CHANGED Viewed

@@ -145,7 +145,7 @@ class TripletTableSerializer(BaseTableSerializer):
             parts.append(cap_res)
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
-            table_df = item.export_to_dataframe()
+            table_df = item.export_to_dataframe(doc)
             if table_df.shape[0] >= 1 and table_df.shape[1] >= 2:
                 # copy header as first row and shift all rows by one

docling_core/transforms/serializer/common.py CHANGED Viewed

@@ -394,6 +394,7 @@ class DocSerializer(BaseModel, BaseDocSerializer):
                 item=item,
                 doc_serializer=self,
                 doc=self.doc,
+                visited=my_visited,
                 **my_kwargs,
             )
         return part

docling_core/transforms/serializer/doctags.py CHANGED Viewed

@@ -32,6 +32,7 @@ from docling_core.types.doc.document import (
     DoclingDocument,
     FloatingItem,
     FormItem,
+    GroupItem,
     InlineGroup,
     KeyValueItem,
     ListGroup,
@@ -42,6 +43,7 @@ from docling_core.types.doc.document import (
     PictureMoleculeData,
     PictureTabularChartData,
     ProvenanceItem,
+    SectionHeaderItem,
     TableItem,
     TextItem,
 )
@@ -94,11 +96,11 @@ class DocTagsTextSerializer(BaseModel, BaseTextSerializer):
         item: TextItem,
         doc_serializer: BaseDocSerializer,
         doc: DoclingDocument,
+        visited: Optional[set[str]] = None,
         **kwargs: Any,
     ) -> SerializationResult:
         """Serializes the passed item."""
-        from docling_core.types.doc.document import SectionHeaderItem
+        my_visited = visited if visited is not None else set()
         params = DocTagsParams(**kwargs)
         wrap_tag: Optional[str] = DocumentToken.create_token_name_from_doc_item_label(
             label=item.label,
@@ -116,12 +118,21 @@ class DocTagsTextSerializer(BaseModel, BaseTextSerializer):
                 parts.append(location)
         if params.add_content:
-            text_part = item.text
-            text_part = doc_serializer.post_process(
-                text=text_part,
-                formatting=item.formatting,
-                hyperlink=item.hyperlink,
-            )
+            if (
+                item.text == ""
+                and len(item.children) == 1
+                and isinstance(
+                    (child_group := item.children[0].resolve(doc)), InlineGroup
+                )
+            ):
+                ser_res = doc_serializer.serialize(item=child_group, visited=my_visited)
+                text_part = ser_res.text
+            else:
+                text_part = doc_serializer.post_process(
+                    text=item.text,
+                    formatting=item.formatting,
+                    hyperlink=item.hyperlink,
+                )
             if isinstance(item, CodeItem):
                 language_token = DocumentToken.get_code_language_token(
@@ -506,7 +517,12 @@ class DocTagsFallbackSerializer(BaseFallbackSerializer):
         **kwargs: Any,
     ) -> SerializationResult:
         """Serializes the passed item."""
-        return create_ser_result()
+        if isinstance(item, GroupItem):
+            parts = doc_serializer.get_parts(item=item, **kwargs)
+            text_res = "\n".join([p.text for p in parts if p.text])
+            return create_ser_result(text=text_res, span_source=parts)
+        else:
+            return create_ser_result()
 class DocTagsAnnotationSerializer(BaseAnnotationSerializer):

docling_core/transforms/serializer/html.py CHANGED Viewed

@@ -55,6 +55,7 @@ from docling_core.types.doc.document import (
     FormItem,
     FormulaItem,
     GraphData,
+    GroupItem,
     ImageRef,
     InlineGroup,
     KeyValueItem,
@@ -139,21 +140,34 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
         res_parts: list[SerializationResult] = []
         post_processed = False
-        # Prepare the HTML based on item type
-        if isinstance(item, TitleItem):
-            text_inner = self._prepare_content(item.text)
-            text = get_html_tag_with_text_direction(html_tag="h1", text=text_inner)
+        has_inline_repr = (
+            item.text == ""
+            and len(item.children) == 1
+            and isinstance((child_group := item.children[0].resolve(doc)), InlineGroup)
+        )
+        if has_inline_repr:
+            text = doc_serializer.serialize(item=child_group, visited=my_visited).text
+            post_processed = True
+        else:
+            text = item.text
+            if not isinstance(item, (CodeItem, FormulaItem)):
+                text = html.escape(text, quote=False)
+                text = text.replace("\n", "<br>")
-        elif isinstance(item, SectionHeaderItem):
-            section_level = min(item.level + 1, 6)
-            text_inner = self._prepare_content(item.text)
+        # Prepare the HTML based on item type
+        if isinstance(item, (TitleItem, SectionHeaderItem)):
+            section_level = (
+                min(item.level + 1, 6) if isinstance(item, SectionHeaderItem) else 1
+            )
             text = get_html_tag_with_text_direction(
-                html_tag=f"h{section_level}", text=text_inner
+                html_tag=f"h{section_level}", text=text
             )
         elif isinstance(item, FormulaItem):
             text = self._process_formula(
                 item=item,
+                text=text,
+                orig=item.orig,
                 doc=doc,
                 image_mode=params.image_mode,
                 formula_to_mathml=params.formula_to_mathml,
@@ -161,19 +175,26 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
             )
         elif isinstance(item, CodeItem):
-            text = self._process_code(item=item, is_inline_scope=is_inline_scope)
+            text = (
+                f"<code>{text}</code>"
+                if is_inline_scope
+                else f"<pre><code>{text}</code></pre>"
+            )
         elif isinstance(item, ListItem):
             # List items are handled by list serializer
             text_parts: list[str] = []
-            if item_text := self._prepare_content(item.text):
-                item_text = doc_serializer.post_process(
-                    text=item_text,
-                    formatting=item.formatting,
-                    hyperlink=item.hyperlink,
-                )
-                post_processed = True
-                text_parts.append(item_text)
+            if text:
+                if has_inline_repr:
+                    text = f"\n{text}\n"
+                else:
+                    text = doc_serializer.post_process(
+                        text=text,
+                        formatting=item.formatting,
+                        hyperlink=item.hyperlink,
+                    )
+                    post_processed = True
+                text_parts.append(text)
             nested_parts = [
                 r.text
                 for r in doc_serializer.get_parts(
@@ -184,29 +205,26 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
                 )
             ]
             text_parts.extend(nested_parts)
-            text_inner = "\n".join(text_parts)
+            text = "\n".join(text_parts)
             if nested_parts:
-                text_inner = f"\n{text_inner}\n"
+                text = f"\n{text}\n"
             text = (
                 get_html_tag_with_text_direction(
                     html_tag="li",
-                    text=text_inner,
+                    text=text,
                     attrs=(
                         {"style": f"list-style-type: '{item.marker} ';"}
                         if params.show_original_list_item_marker and item.marker
                         else {}
                     ),
                 )
-                if text_inner
+                if text
                 else ""
             )
-        elif is_inline_scope:
-            text = self._prepare_content(item.text)
-        else:
+        elif not is_inline_scope:
             # Regular text item
-            text_inner = self._prepare_content(item.text)
-            text = get_html_tag_with_text_direction(html_tag="p", text=text_inner)
+            text = get_html_tag_with_text_direction(html_tag="p", text=text)
         # Apply formatting and hyperlinks
         if not post_processed:
@@ -227,66 +245,44 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
         return create_ser_result(text=text, span_source=res_parts)
-    def _prepare_content(
-        self, text: str, do_escape_html=True, do_replace_newline=True
-    ) -> str:
-        """Prepare text content for HTML inclusion."""
-        if do_escape_html:
-            text = html.escape(text, quote=False)
-        if do_replace_newline:
-            text = text.replace("\n", "<br>")
-        return text
-    def _process_code(
-        self,
-        item: CodeItem,
-        is_inline_scope: bool,
-    ) -> str:
-        code_text = self._prepare_content(
-            item.text, do_escape_html=False, do_replace_newline=False
-        )
-        if is_inline_scope:
-            text = f"<code>{code_text}</code>"
-        else:
-            text = f"<pre><code>{code_text}</code></pre>"
-        return text
     def _process_formula(
         self,
-        item: FormulaItem,
+        *,
+        item: DocItem,
+        text: str,
+        orig: str,
         doc: DoclingDocument,
         image_mode: ImageRefMode,
         formula_to_mathml: bool,
         is_inline_scope: bool,
     ) -> str:
         """Process a formula item to HTML/MathML."""
-        math_formula = self._prepare_content(
-            item.text, do_escape_html=False, do_replace_newline=False
-        )
         # If formula is empty, try to use an image fallback
-        if item.text == "" and item.orig != "":
-            img_fallback = self._get_formula_image_fallback(item, doc)
-            if (
-                image_mode == ImageRefMode.EMBEDDED
-                and len(item.prov) > 0
-                and img_fallback
-            ):
-                return img_fallback
+        if (
+            text == ""
+            and orig != ""
+            and len(item.prov) > 0
+            and image_mode == ImageRefMode.EMBEDDED
+            and (
+                img_fallback := self._get_formula_image_fallback(
+                    item=item, orig=orig, doc=doc
+                )
+            )
+        ):
+            return img_fallback
         # Try to generate MathML
-        if formula_to_mathml and math_formula:
+        elif formula_to_mathml and text:
             try:
                 # Set display mode based on context
                 display_mode = "inline" if is_inline_scope else "block"
                 mathml_element = latex2mathml.converter.convert_to_element(
-                    math_formula, display=display_mode
+                    text, display=display_mode
                 )
                 annotation = SubElement(
                     mathml_element, "annotation", dict(encoding="TeX")
                 )
-                annotation.text = math_formula
+                annotation.text = text
                 mathml = unescape(tostring(mathml_element, encoding="unicode"))
                 # Don't wrap in div for inline formulas
@@ -296,40 +292,40 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
                     return f"<div>{mathml}</div>"
             except Exception:
-                img_fallback = self._get_formula_image_fallback(item, doc)
+                img_fallback = self._get_formula_image_fallback(
+                    item=item, orig=orig, doc=doc
+                )
                 if (
                     image_mode == ImageRefMode.EMBEDDED
                     and len(item.prov) > 0
                     and img_fallback
                 ):
                     return img_fallback
-                elif math_formula:
-                    return f"<pre>{math_formula}</pre>"
+                elif text:
+                    return f"<pre>{text}</pre>"
                 else:
                     return "<pre>Formula not decoded</pre>"
         _logger.warning("Could not parse formula with MathML")
         # Fallback options if we got here
-        if math_formula and is_inline_scope:
-            return f"<code>{math_formula}</code>"
-        elif math_formula and (not is_inline_scope):
-            f"<pre>{math_formula}</pre>"
+        if text and is_inline_scope:
+            return f"<code>{text}</code>"
+        elif text and (not is_inline_scope):
+            f"<pre>{text}</pre>"
         elif is_inline_scope:
             return '<span class="formula-not-decoded">Formula not decoded</span>'
         return '<div class="formula-not-decoded">Formula not decoded</div>'
     def _get_formula_image_fallback(
-        self, item: TextItem, doc: DoclingDocument
+        self, *, item: DocItem, orig: str, doc: DoclingDocument
     ) -> Optional[str]:
         """Try to get an image fallback for a formula."""
         item_image = item.get_image(doc=doc)
         if item_image is not None:
             img_ref = ImageRef.from_pil(item_image, dpi=72)
-            return (
-                "<figure>" f'<img src="{img_ref.uri}" alt="{item.orig}" />' "</figure>"
-            )
+            return "<figure>" f'<img src="{img_ref.uri}" alt="{orig}" />' "</figure>"
         return None
@@ -792,21 +788,30 @@ class HTMLFallbackSerializer(BaseFallbackSerializer):
     """HTML-specific fallback serializer."""
     @override
-    def serialize(self, *, item: NodeItem, **kwargs: Any) -> SerializationResult:
+    def serialize(
+        self,
+        *,
+        item: NodeItem,
+        doc_serializer: "BaseDocSerializer",
+        doc: DoclingDocument,
+        **kwargs: Any,
+    ) -> SerializationResult:
         """Fallback serializer for items not handled by other serializers."""
-        if isinstance(item, DocItem):
+        if isinstance(item, GroupItem):
+            parts = doc_serializer.get_parts(item=item, **kwargs)
+            text_res = "\n".join([p.text for p in parts if p.text])
+            return create_ser_result(text=text_res, span_source=parts)
+        else:
             return create_ser_result(
                 text=f"<!-- Unhandled item type: {item.__class__.__name__} -->",
-                span_source=item,
+                span_source=item if isinstance(item, DocItem) else [],
             )
-        else:
-            # For group items, we don't generate any markup
-            return create_ser_result()
 class HTMLAnnotationSerializer(BaseModel, BaseAnnotationSerializer):
     """HTML-specific annotation serializer."""
+    @override
     def serialize(
         self,
         *,

docling_core/transforms/serializer/markdown.py CHANGED Viewed

@@ -45,6 +45,7 @@ from docling_core.types.doc.document import (
     Formatting,
     FormItem,
     FormulaItem,
+    GroupItem,
     ImageRef,
     InlineGroup,
     KeyValueItem,
@@ -124,26 +125,24 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
         my_visited = visited if visited is not None else set()
         params = MarkdownParams(**kwargs)
         res_parts: list[SerializationResult] = []
-        text = item.text
         escape_html = True
         escape_underscores = True
-        processing_pending = True
-        if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
-            # case where processing/formatting should be applied first (in inner scope)
+        has_inline_repr = (
+            item.text == ""
+            and len(item.children) == 1
+            and isinstance((child_group := item.children[0].resolve(doc)), InlineGroup)
+        )
+        if has_inline_repr:
+            text = doc_serializer.serialize(item=child_group, visited=my_visited).text
             processing_pending = False
-            if (
-                text == ""
-                and len(item.children) == 1
-                and isinstance(
-                    (child_group := item.children[0].resolve(doc)), InlineGroup
-                )
-            ):
-                # case of inline within heading / list item
-                ser_res = doc_serializer.serialize(item=child_group)
-                text = ser_res.text
-                for span in ser_res.spans:
-                    my_visited.add(span.item.self_ref)
-            else:
+        else:
+            text = item.text
+            processing_pending = True
+        if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
+            if not has_inline_repr:
+                # case where processing/formatting should be applied first (in inner scope)
                 text = doc_serializer.post_process(
                     text=text,
                     escape_html=escape_html,
@@ -151,6 +150,7 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
                     formatting=item.formatting,
                     hyperlink=item.hyperlink,
                 )
+                processing_pending = False
             if isinstance(item, ListItem):
                 pieces: list[str] = []
@@ -600,13 +600,15 @@ class MarkdownFallbackSerializer(BaseFallbackSerializer):
         **kwargs: Any,
     ) -> SerializationResult:
         """Serializes the passed item."""
-        if isinstance(item, DocItem):
+        if isinstance(item, GroupItem):
+            parts = doc_serializer.get_parts(item=item, **kwargs)
+            text_res = "\n\n".join([p.text for p in parts if p.text])
+            return create_ser_result(text=text_res, span_source=parts)
+        else:
             return create_ser_result(
                 text="<!-- missing-text -->",
-                span_source=item,
+                span_source=item if isinstance(item, DocItem) else [],
             )
-        else:
-            return create_ser_result()
 class MarkdownDocSerializer(DocSerializer):

docling_core/types/doc/document.py CHANGED Viewed

@@ -60,7 +60,7 @@ _logger = logging.getLogger(__name__)
 Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
 LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
-CURRENT_VERSION: Final = "1.6.0"
+CURRENT_VERSION: Final = "1.7.0"
 DEFAULT_EXPORT_LABELS = {
     DocItemLabel.TITLE,
@@ -310,6 +310,7 @@ class TableCell(BaseModel):
     column_header: bool = False
     row_header: bool = False
     row_section: bool = False
+    fillable: bool = False
     @model_validator(mode="before")
     @classmethod
@@ -4045,7 +4046,7 @@ class DoclingDocument(BaseModel):
             root=root,
             with_groups=with_groups,
             traverse_pictures=traverse_pictures,
-            page_no=page_no,
+            page_nrs={page_no} if page_no is not None else None,
             included_content_layers=included_content_layers,
         ):
             yield item, len(stack)
@@ -4055,7 +4056,7 @@ class DoclingDocument(BaseModel):
         root: Optional[NodeItem] = None,
         with_groups: bool = False,
         traverse_pictures: bool = False,
-        page_no: Optional[int] = None,
+        page_nrs: Optional[set[int]] = None,
         included_content_layers: Optional[set[ContentLayer]] = None,
         _stack: Optional[list[int]] = None,
     ) -> typing.Iterable[Tuple[NodeItem, list[int]]]:  # tuple of node and level
@@ -4078,8 +4079,8 @@ class DoclingDocument(BaseModel):
             and (
                 not isinstance(root, DocItem)
                 or (
-                    page_no is None
-                    or any(prov.page_no == page_no for prov in root.prov)
+                    page_nrs is None
+                    or any(prov.page_no in page_nrs for prov in root.prov)
                 )
             )
             and root.content_layer in my_layers
@@ -4113,7 +4114,7 @@ class DoclingDocument(BaseModel):
                     child,
                     with_groups=with_groups,
                     traverse_pictures=traverse_pictures,
-                    page_no=page_no,
+                    page_nrs=page_nrs,
                     _stack=my_stack,
                     included_content_layers=my_layers,
                 )
@@ -5603,7 +5604,9 @@ class DoclingDocument(BaseModel):
         def get_item_list(self, key: str) -> list[NodeItem]:
             return getattr(self, key)
-        def index(self, doc: "DoclingDocument") -> None:
+        def index(
+            self, doc: "DoclingDocument", page_nrs: Optional[set[int]] = None
+        ) -> None:
             orig_ref_to_new_ref: dict[str, str] = {}
             page_delta = self._max_page - min(doc.pages.keys()) + 1 if doc.pages else 0
@@ -5614,10 +5617,11 @@ class DoclingDocument(BaseModel):
             self._names.append(doc.name)
             # collect items in traversal order
-            for item, _ in doc.iterate_items(
+            for item, _ in doc._iterate_items_with_stack(
                 with_groups=True,
                 traverse_pictures=True,
                 included_content_layers={c for c in ContentLayer},
+                page_nrs=page_nrs,
             ):
                 key = item.self_ref.split("/")[1]
                 is_body = key == "body"
@@ -5686,12 +5690,13 @@ class DoclingDocument(BaseModel):
             # update pages
             new_max_page = None
             for page_nr in doc.pages:
-                new_page = copy.deepcopy(doc.pages[page_nr])
-                new_page_nr = page_nr + page_delta
-                new_page.page_no = new_page_nr
-                self.pages[new_page_nr] = new_page
-                if new_max_page is None or new_page_nr > new_max_page:
-                    new_max_page = new_page_nr
+                if page_nrs is None or page_nr in page_nrs:
+                    new_page = copy.deepcopy(doc.pages[page_nr])
+                    new_page_nr = page_nr + page_delta
+                    new_page.page_no = new_page_nr
+                    self.pages[new_page_nr] = new_page
+                    if new_max_page is None or new_page_nr > new_max_page:
+                        new_max_page = new_page_nr
             if new_max_page is not None:
                 self._max_page = new_max_page
@@ -5715,6 +5720,14 @@ class DoclingDocument(BaseModel):
         doc_index.index(doc=self)
         self._update_from_index(doc_index)
+    def filter(self, page_nrs: Optional[set[int]] = None) -> "DoclingDocument":
+        """Create a new document based on the provided filter parameters."""
+        doc_index = DoclingDocument._DocIndex()
+        doc_index.index(doc=self, page_nrs=page_nrs)
+        res_doc = DoclingDocument(name=self.name)
+        res_doc._update_from_index(doc_index)
+        return res_doc
     @classmethod
     def concatenate(cls, docs: Sequence["DoclingDocument"]) -> "DoclingDocument":
         """Concatenate multiple documents into a single document."""

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling-core
-Version: 2.46.0
+Version: 2.48.0
 Summary: A python library to define and validate data types in Docling.
 Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/RECORD RENAMED Viewed

@@ -19,7 +19,7 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
 docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
 docling_core/transforms/chunker/__init__.py,sha256=Qg5RhC-2QqdXKEfjzNGJaVi0NqBCL3xAhKWJGOlrE3M,375
 docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
-docling_core/transforms/chunker/hierarchical_chunker.py,sha256=uDf-qGiIT_4JUEg9NOdzvDqAPOTqycKJ-jEpDkV3jJU,8243
+docling_core/transforms/chunker/hierarchical_chunker.py,sha256=qc-gnuxji-2lrlZCRr34VubBciBTE4ClZ3QplgNpUx8,8246
 docling_core/transforms/chunker/hybrid_chunker.py,sha256=xjkz8hy3tXXzkJzf7QMFOEq_v8V7Jcs9tCY0Mxjge74,12548
 docling_core/transforms/chunker/page_chunker.py,sha256=gLUlqA_klK-rkuPVYuJKi3ZuTIGdd2HD7ces72AiZ2U,2018
 docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
@@ -28,11 +28,11 @@ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZ
 docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
 docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
 docling_core/transforms/serializer/base.py,sha256=TI8Epj7gyxdTet9j-Rs4o5U09gfACfAIVoirlschviM,7266
-docling_core/transforms/serializer/common.py,sha256=RwfdzZ9FRSHQjKM0vskg1CVqar0Z_ms38arSlLAgITc,19150
-docling_core/transforms/serializer/doctags.py,sha256=VXPjAZPhBur7LaEeuqH9k31TgZWSN32lK8z8rJXzFwY,19935
-docling_core/transforms/serializer/html.py,sha256=GRfRaqFIb4FXRMplB4Agl4fSNa5jsHV7P4tBtFMro9I,38453
+docling_core/transforms/serializer/common.py,sha256=vfJhu0b4vAcIres85PX774RQSTKu9RueBOWMO95FQyc,19186
+docling_core/transforms/serializer/doctags.py,sha256=9_aV_ffTOTtQKZQTKz_I3kRTQ_GXHCePKwXnR-rnggA,20644
+docling_core/transforms/serializer/html.py,sha256=h0yiDgTNIeOS-rJaMRfinUFgrZygd3MjheM7pjLw5F0,38380
 docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
-docling_core/transforms/serializer/markdown.py,sha256=hilGM1yWpbbRTjuEjfBRrhavspD5vFF_6SDvlKx8BrM,24230
+docling_core/transforms/serializer/markdown.py,sha256=9Sy7xWSegX0zdQb9vPzEUFucyGQUA4TcQxMfE70SJsk,24354
 docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
 docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
 docling_core/transforms/visualizer/key_value_visualizer.py,sha256=fp7nFLy4flOSiavdRgg5y1Mu7WVLIDGh1zEHsq8kgVM,8979
@@ -43,7 +43,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
 docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
 docling_core/types/doc/__init__.py,sha256=Vsl3oJV3_BLpS7rIwvahhcWOwmEBvj7ZbQzQCCl-IQk,1678
 docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
-docling_core/types/doc/document.py,sha256=Ab-JOc6fkzocXP3PcxPRXJPjLOhOTYo_0571vSr6VXo,202093
+docling_core/types/doc/document.py,sha256=sZsLV6GfFF8TzTgD6C47a9YrurLZFhwqt8I9PZmYkJY,202734
 docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
 docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
 docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -76,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
 docling_core/utils/legacy.py,sha256=G7ed8fkBpIO8hG3DKEY83cHsrKJHyvDst_1jSdgBXMI,24406
 docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
 docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
-docling_core-2.46.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
-docling_core-2.46.0.dist-info/METADATA,sha256=txMHh-7y8N3RiJ_M_HbrsvzRyGPJVXv8UcA6_DpAfok,6453
-docling_core-2.46.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-docling_core-2.46.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
-docling_core-2.46.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
-docling_core-2.46.0.dist-info/RECORD,,
+docling_core-2.48.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
+docling_core-2.48.0.dist-info/METADATA,sha256=WybgSJP5TG0mMu5sA2bN0pVKCoZxKCf4KR70MGK3904,6453
+docling_core-2.48.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+docling_core-2.48.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
+docling_core-2.48.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
+docling_core-2.48.0.dist-info/RECORD,,

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{docling_core-2.46.0.dist-info → docling_core-2.48.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

docling-core 2.46.0__py3-none-any.whl → 2.48.0__py3-none-any.whl

Potentially problematic release.

docling-core 2.46.0py3-none-any.whl → 2.48.0py3-none-any.whl