PyPI - docling-core - Versions diffs - 1.7.2__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

docling-core 1.7.2py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling-core might be problematic. Click here for more details.

Files changed (36) hide show

docling_core/transforms/chunker/__init__.py +2 -8
docling_core/transforms/chunker/base.py +27 -40
docling_core/transforms/chunker/hierarchical_chunker.py +144 -312
docling_core/types/__init__.py +12 -8
docling_core/types/doc/__init__.py +25 -0
docling_core/types/doc/base.py +136 -451
docling_core/types/doc/document.py +1288 -559
docling_core/types/{experimental → doc}/labels.py +4 -1
docling_core/types/legacy_doc/__init__.py +6 -0
docling_core/types/legacy_doc/base.py +485 -0
docling_core/types/{doc → legacy_doc}/doc_ann.py +1 -1
docling_core/types/{doc → legacy_doc}/doc_ocr.py +1 -1
docling_core/types/{doc → legacy_doc}/doc_raw.py +1 -1
docling_core/types/legacy_doc/document.py +715 -0
docling_core/types/rec/subject.py +1 -1
docling_core/utils/generate_docs.py +82 -0
docling_core/utils/{ds_generate_jsonschema.py → generate_jsonschema.py} +4 -4
docling_core/utils/validators.py +3 -3
{docling_core-1.7.2.dist-info → docling_core-2.0.0.dist-info}/METADATA +10 -10
{docling_core-1.7.2.dist-info → docling_core-2.0.0.dist-info}/RECORD +24 -31
docling_core-2.0.0.dist-info/entry_points.txt +5 -0
docling_core/transforms/id_generator/__init__.py +0 -12
docling_core/transforms/id_generator/base.py +0 -30
docling_core/transforms/id_generator/doc_hash_id_generator.py +0 -27
docling_core/transforms/id_generator/uuid_generator.py +0 -34
docling_core/transforms/metadata_extractor/__init__.py +0 -13
docling_core/transforms/metadata_extractor/base.py +0 -59
docling_core/transforms/metadata_extractor/simple_metadata_extractor.py +0 -59
docling_core/types/experimental/__init__.py +0 -30
docling_core/types/experimental/base.py +0 -167
docling_core/types/experimental/document.py +0 -1192
docling_core/utils/ds_generate_docs.py +0 -144
docling_core-1.7.2.dist-info/entry_points.txt +0 -5
/docling_core/types/{doc → legacy_doc}/tokens.py +0 -0
{docling_core-1.7.2.dist-info → docling_core-2.0.0.dist-info}/LICENSE +0 -0
{docling_core-1.7.2.dist-info → docling_core-2.0.0.dist-info}/WHEEL +0 -0

docling_core/transforms/chunker/__init__.py CHANGED Viewed

@@ -5,11 +5,5 @@
 """Define the chunker types."""
-from docling_core.transforms.chunker.base import (  # noqa
-    BaseChunker,
-    Chunk,
-    ChunkWithMetadata,
-)
-from docling_core.transforms.chunker.hierarchical_chunker import (  # noqa
-    HierarchicalChunker,
-)
+from docling_core.transforms.chunker.base import BaseChunk, BaseChunker, BaseMeta
+from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker

docling_core/transforms/chunker/base.py CHANGED Viewed

@@ -4,71 +4,58 @@
 #
 """Define base classes for chunking."""
-import re
 from abc import ABC, abstractmethod
-from typing import Final, Iterator, Optional
+from typing import Any, ClassVar, Iterator
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel
-from docling_core.types import BoundingBox, Document
-from docling_core.types.base import _JSON_POINTER_REGEX
+from docling_core.types.doc import DoclingDocument as DLDocument
-# (subset of) JSONPath format, e.g. "$.main-text[84]" (for migration purposes)
-_DEPRECATED_JSON_PATH_PATTERN: Final = re.compile(r"^\$\.([\w-]+)\[(\d+)\]$")
+class BaseMeta(BaseModel):
+    """Metadata base class."""
-def _create_path(pos: int, path_prefix: str = "main-text") -> str:
-    return f"#/{path_prefix}/{pos}"
+    excluded_embed: ClassVar[list[str]] = []
+    excluded_llm: ClassVar[list[str]] = []
+    def export_json_dict(self) -> dict[str, Any]:
+        """Helper method for exporting non-None keys to JSON mode.
-class Chunk(BaseModel):
-    """Data model for Chunk."""
+        Returns:
+            dict[str, Any]: The exported dictionary.
+        """
+        return self.model_dump(mode="json", by_alias=True, exclude_none=True)
-    path: str = Field(pattern=_JSON_POINTER_REGEX)
-    text: str
-    heading: Optional[str] = None
-    @field_validator("path", mode="before")
-    @classmethod
-    def _json_pointer_from_json_path(cls, path: str):
-        if (match := _DEPRECATED_JSON_PATH_PATTERN.match(path)) is not None:
-            groups = match.groups()
-            if len(groups) == 2 and groups[0] is not None and groups[1] is not None:
-                return _create_path(
-                    pos=int(groups[1]),
-                    path_prefix=groups[0],
-                )
-        return path
+class BaseChunk(BaseModel):
+    """Chunk base class."""
+    text: str
+    meta: BaseMeta
-class ChunkWithMetadata(Chunk):
-    """Data model for Chunk including metadata."""
+    def export_json_dict(self) -> dict[str, Any]:
+        """Helper method for exporting non-None keys to JSON mode.
-    page: Optional[int] = None
-    bbox: Optional[BoundingBox] = None
+        Returns:
+            dict[str, Any]: The exported dictionary.
+        """
+        return self.model_dump(mode="json", by_alias=True, exclude_none=True)
 class BaseChunker(BaseModel, ABC):
-    """Base class for Chunker."""
+    """Chunker base class."""
     @abstractmethod
-    def chunk(self, dl_doc: Document, **kwargs) -> Iterator[Chunk]:
+    def chunk(self, dl_doc: DLDocument, **kwargs) -> Iterator[BaseChunk]:
         """Chunk the provided document.
         Args:
-            dl_doc (Document): document to chunk
+            dl_doc (DLDocument): document to chunk
         Raises:
             NotImplementedError: in this abstract implementation
         Yields:
-            Iterator[Chunk]: iterator over extracted chunks
+            Iterator[BaseChunk]: iterator over extracted chunks
         """
         raise NotImplementedError()
-    @classmethod
-    def _create_path(cls, pos: int, path_prefix: str = "main-text") -> str:
-        return _create_path(
-            pos=pos,
-            path_prefix=path_prefix,
-        )

docling_core/transforms/chunker/hierarchical_chunker.py CHANGED Viewed

@@ -8,347 +8,179 @@
 from __future__ import annotations
 import logging
-from enum import Enum
-from typing import Any, Iterator, Optional, Union
-import pandas as pd
-from pydantic import BaseModel, Field, PositiveInt
-from docling_core.transforms.chunker import BaseChunker, Chunk, ChunkWithMetadata
-from docling_core.types import BaseText
-from docling_core.types import Document as DLDocument
-from docling_core.types import Ref, Table
+from typing import Any, ClassVar, Iterator, Optional
+from pandas import DataFrame
+from pydantic import Field
+from docling_core.transforms.chunker import BaseChunk, BaseChunker, BaseMeta
+from docling_core.types.doc import DoclingDocument as DLDocument
+from docling_core.types.doc.document import (
+    DocItem,
+    LevelNumber,
+    ListItem,
+    SectionHeaderItem,
+    TableItem,
+    TextItem,
+)
+from docling_core.types.doc.labels import DocItemLabel
+_KEY_DOC_ITEMS = "doc_items"
+_KEY_HEADINGS = "headings"
+_KEY_CAPTIONS = "captions"
 _logger = logging.getLogger(__name__)
-class HierarchicalChunker(BaseChunker):
-    """Chunker implementation leveraging the document layout."""
+class DocMeta(BaseMeta):
+    """Data model for Hierarchical Chunker metadata."""
-    heading_as_metadata: bool = Field(
-        default=False,
-        description="Whether heading should be in metadata (instead of text)",
+    doc_items: list[DocItem] = Field(
+        alias=_KEY_DOC_ITEMS,
+        min_length=1,
     )
-    include_metadata: bool = Field(
-        default=True,
-        description="Whether to include extras in the metadata",
+    headings: Optional[list[str]] = Field(
+        default=None,
+        alias=_KEY_HEADINGS,
+        min_length=1,
     )
-    min_chunk_len: PositiveInt = Field(
-        default=64, description="Minimum chunk text length to consider (in chars)"
+    captions: Optional[list[str]] = Field(
+        default=None,
+        alias=_KEY_CAPTIONS,
+        min_length=1,
     )
-    class _NodeType(str, Enum):
-        PARAGRAPH = "paragraph"
-        SUBTITLE_LEVEL_1 = "subtitle-level-1"
-        TABLE = "table"
-        CAPTION = "caption"
-    class _NodeName(str, Enum):
-        TITLE = "title"
-        REFERENCE = "reference"
-        LIST_ITEM = "list-item"
-        SUBTITLE_LEVEL_1 = "subtitle-level-1"
-    _allowed_types: list[str] = [
-        _NodeType.PARAGRAPH,
-        _NodeType.SUBTITLE_LEVEL_1,
-        _NodeType.TABLE,
-        _NodeType.CAPTION,
-    ]
-    _disallowed_names_by_type: dict[str, list[str]] = {
-        _NodeType.PARAGRAPH: [
-            _NodeName.REFERENCE,
-        ],
-    }
-    @classmethod
-    def _norm(cls, text: Optional[str]) -> Optional[str]:
-        return text.lower() if text is not None else None
-    @classmethod
-    def _convert_table_to_dataframe(cls, table: Table) -> Optional[pd.DataFrame]:
-        if table.data:
-            table_content = [[cell.text for cell in row] for row in table.data]
-            return pd.DataFrame(table_content)
-        else:
-            return None
-    @classmethod
-    def _triplet_serialize(cls, table) -> Optional[str]:
-        output_text: Optional[str] = None
-        table_df = cls._convert_table_to_dataframe(table)
-        if table_df is not None and table_df.shape[0] > 1 and table_df.shape[1] > 1:
-            rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
-            cols = [item.strip() for item in table_df.iloc[0, :].to_list()]
-            nrows = table_df.shape[0]
-            ncols = table_df.shape[1]
-            texts = [
-                f"{rows[i]}, {cols[j]} = {str(table_df.iloc[i, j]).strip()}"
-                for i in range(1, nrows)
-                for j in range(1, ncols)
-            ]
-            output_text = ". ".join(texts)
-        return output_text
-    class _MainTextItemNode(BaseModel):
-        parent: Optional[int] = None
-        children: list[int] = []
-    class _TitleInfo(BaseModel):
-        text: str
-        path_in_doc: str
-    class _GlobalContext(BaseModel):
-        title: Optional[_HC._TitleInfo] = None
-    class _DocContext(BaseModel):
-        dmap: dict[int, _HC._MainTextItemNode]  # main text element context
-        glob: _HC._GlobalContext  # global context
-        @classmethod
-        def from_doc(cls, doc: DLDocument) -> _HC._DocContext:
-            dmap: dict[int, _HC._MainTextItemNode] = {}
-            glob: _HC._GlobalContext = _HC._GlobalContext()
-            if doc.description.title:
-                glob.title = _HC._TitleInfo(
-                    text=doc.description.title,
-                    path_in_doc="description.title",
-                )
-            parent = None
-            if doc.main_text:
-                idx = 0
-                while idx < len(doc.main_text):
-                    item = doc.main_text[idx]
-                    if (
-                        not glob.title
-                        and isinstance(item, BaseText)
-                        and _HC._norm(item.name) == _HC._NodeName.TITLE
-                    ):
-                        glob.title = _HC._TitleInfo(
-                            text=item.text,
-                            path_in_doc=_HC._create_path(idx),
-                        )
-                    # start of a subtitle-level-1 parent
-                    if (
-                        isinstance(item, BaseText)
-                        and _HC._norm(item.obj_type) == _HC._NodeType.SUBTITLE_LEVEL_1
-                    ):
-                        dmap[idx] = _HC._MainTextItemNode(parent=None)
-                        parent = idx
-                        if not glob.title:
-                            glob.title = _HC._TitleInfo(
-                                text=item.text,
-                                path_in_doc=_HC._create_path(idx),
-                            )
-                    # start of a list parent
-                    elif (
-                        isinstance(item, BaseText)
-                        and _HC._norm(item.name) != _HC._NodeName.LIST_ITEM
-                        and idx + 1 < len(doc.main_text)
-                        and _HC._norm(doc.main_text[idx + 1].name)
-                        == _HC._NodeName.LIST_ITEM
-                    ):
-                        if parent is not None:
-                            dmap[parent].children.append(idx)
-                        dmap[idx] = _HC._MainTextItemNode(parent=parent)
+    excluded_embed: ClassVar[list[str]] = [_KEY_DOC_ITEMS]
+    excluded_llm: ClassVar[list[str]] = [_KEY_DOC_ITEMS]
-                        # have all children register locally
-                        li = idx + 1
-                        while (
-                            li < len(doc.main_text)
-                            and _HC._norm(doc.main_text[li].name)
-                            == _HC._NodeName.LIST_ITEM
-                        ):
-                            dmap[idx].children.append(li)
-                            dmap[li] = _HC._MainTextItemNode(parent=idx)
-                            li += 1
-                        idx = li
-                        continue
-                    # normal case
-                    else:
-                        if parent is not None:
-                            dmap[parent].children.append(idx)
-                        dmap[idx] = _HC._MainTextItemNode(parent=parent)
+class DocChunk(BaseChunk):
+    """Data model for Hierarchical Chunker chunks."""
-                    idx += 1
-            else:
-                pass
-            return cls(
-                dmap=dmap,
-                glob=glob,
-            )
+    meta: DocMeta
-    class _TextEntry(BaseModel):
-        text: str
-        path: str
-    def _build_chunk_impl(
-        self, doc: DLDocument, doc_map: _DocContext, idx: int, rec: bool = False
-    ) -> tuple[list[_TextEntry], Optional[str]]:
-        if doc.main_text:
-            item = doc.main_text[idx]
-            item_type = _HC._norm(item.obj_type)
-            item_name = _HC._norm(item.name)
-            if (
-                item_type not in self._allowed_types
-                or item_name in self._disallowed_names_by_type.get(item_type, [])
-            ):
-                return [], None
+class HierarchicalChunker(BaseChunker):
+    r"""Chunker implementation leveraging the document layout.
-            c2p = doc_map.dmap
+    Args:
+        merge_list_items (bool): Whether to merge successive list items.
+            Defaults to True.
+        delim (str): Delimiter to use for merging text. Defaults to "\n".
+    """
-            text_entries: list[_HC._TextEntry] = []
-            if (
-                isinstance(item, Ref)
-                and item_type == _HC._NodeType.TABLE
-                and doc.tables
-            ):
-                # resolve table reference
-                ref_nr = int(item.ref.split("/")[2])  # e.g. '#/tables/0'
-                table = doc.tables[ref_nr]
-                ser_out = _HC._triplet_serialize(table)
-                if table.data:
-                    text_entries = (
-                        [
-                            self._TextEntry(
-                                text=ser_out,
-                                path=self._create_path(idx),
-                            )
-                        ]
-                        if ser_out
-                        else []
-                    )
-                else:
-                    return [], None
-            elif isinstance(item, BaseText):
-                text_entries = [
-                    self._TextEntry(
-                        text=item.text,
-                        path=self._create_path(idx),
-                    )
-                ]
+    merge_list_items: bool = True
+    delim: str = "\n"
-            # squash in any children of type list-item
-            if not rec:
-                if (
-                    c2p[idx].children
-                    and _HC._norm(doc.main_text[c2p[idx].children[0]].name)
-                    == _HC._NodeName.LIST_ITEM
-                ):
-                    text_entries = text_entries + [
-                        self._TextEntry(
-                            text=doc.main_text[c].text,  # type: ignore[union-attr]
-                            path=self._create_path(c),
-                        )
-                        for c in c2p[idx].children
-                        if isinstance(doc.main_text[c], BaseText)
-                        and _HC._norm(doc.main_text[c].name) == _HC._NodeName.LIST_ITEM
-                    ]
-                elif item_name in [
-                    _HC._NodeName.LIST_ITEM,
-                    _HC._NodeName.SUBTITLE_LEVEL_1,
-                ]:
-                    return [], None
+    @classmethod
+    def _triplet_serialize(cls, table_df: DataFrame) -> str:
-            if (parent := c2p[idx].parent) is not None:
-                # prepend with ancestors
+        # copy header as first row and shift all rows by one
+        table_df.loc[-1] = table_df.columns  # type: ignore[call-overload]
+        table_df.index = table_df.index + 1
+        table_df = table_df.sort_index()
-                parent_res = self._build_chunk_impl(
-                    doc=doc, doc_map=doc_map, idx=parent, rec=True
-                )
-                return (
-                    parent_res[0] + text_entries,  # expanded text
-                    parent_res[1],  # heading
-                )
-            else:
-                if (
-                    self.heading_as_metadata
-                    and isinstance(item, BaseText)
-                    and _HC._norm(item.obj_type) == _HC._NodeType.SUBTITLE_LEVEL_1
-                ):
-                    return [], text_entries[0].text
-                else:
-                    return text_entries, None
-        else:
-            return [], None
+        rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
+        cols = [item.strip() for item in table_df.iloc[0, :].to_list()]
-    def _build_chunk(
-        self,
-        doc: DLDocument,
-        doc_map: _DocContext,
-        idx: int,
-        delim: str,
-        rec: bool = False,
-    ) -> Optional[Chunk]:
-        res = self._build_chunk_impl(doc=doc, doc_map=doc_map, idx=idx, rec=rec)
-        texts = res[0]
-        heading = res[1]
-        concat = delim.join([t.text for t in texts if t.text])
-        assert doc.main_text is not None
-        if len(concat) >= self.min_chunk_len:
-            orig_item = doc.main_text[idx]
-            item: Union[BaseText, Table]
-            if isinstance(orig_item, Ref):
-                if _HC._norm(orig_item.obj_type) == _HC._NodeType.TABLE and doc.tables:
-                    pos = int(orig_item.ref.split("/")[2])
-                    item = doc.tables[pos]
-                    path = self._create_path(pos, path_prefix="tables")
-                else:  # currently disregarding non-table references
-                    return None
-            else:
-                item = orig_item
-                path = self._create_path(idx)
+        nrows = table_df.shape[0]
+        ncols = table_df.shape[1]
+        texts = [
+            f"{rows[i]}, {cols[j]} = {str(table_df.iloc[i, j]).strip()}"
+            for i in range(1, nrows)
+            for j in range(1, ncols)
+        ]
+        output_text = ". ".join(texts)
-            if self.include_metadata:
-                return ChunkWithMetadata(
-                    text=concat,
-                    path=path,
-                    heading=heading,
-                    page=item.prov[0].page if item.prov else None,
-                    bbox=item.prov[0].bbox if item.prov else None,
-                )
-            else:
-                return Chunk(
-                    text=concat,
-                    path=path,
-                    heading=heading,
-                )
-        else:
-            return None
+        return output_text
-    def chunk(self, dl_doc: DLDocument, delim="\n", **kwargs: Any) -> Iterator[Chunk]:
+    def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]:
         r"""Chunk the provided document.
         Args:
             dl_doc (DLDocument): document to chunk
-            delim (str, optional): delimiter to use when concatenating sub-items.
-                Defaults to "\n".
         Yields:
             Iterator[Chunk]: iterator over extracted chunks
         """
-        if dl_doc.main_text:
-            # extract doc structure incl. metadata for
-            # each item (e.g. parent, children)
-            doc_ctx = self._DocContext.from_doc(doc=dl_doc)
-            _logger.debug(f"{doc_ctx.model_dump()=}")
+        heading_by_level: dict[LevelNumber, str] = {}
+        list_items: list[TextItem] = []
+        for item, level in dl_doc.iterate_items():
+            captions = None
+            if isinstance(item, DocItem):
+                # first handle any merging needed
+                if self.merge_list_items:
+                    if isinstance(
+                        item, ListItem
+                    ) or (  # TODO remove when all captured as ListItem:
+                        isinstance(item, TextItem)
+                        and item.label == DocItemLabel.LIST_ITEM
+                    ):
+                        list_items.append(item)
+                        continue
+                    elif list_items:  # need to yield
+                        yield DocChunk(
+                            text=self.delim.join([i.text for i in list_items]),
+                            meta=DocMeta(
+                                doc_items=list_items,
+                                headings=[
+                                    heading_by_level[k]
+                                    for k in sorted(heading_by_level)
+                                ]
+                                or None,
+                            ),
+                        )
+                        list_items = []  # reset
-            for i, item in enumerate(dl_doc.main_text):
-                if (
-                    isinstance(item, BaseText)
-                    or _HC._norm(item.obj_type) == _HC._NodeType.TABLE
+                if isinstance(
+                    item, SectionHeaderItem
+                ) or (  # TODO remove when all captured as SectionHeaderItem:
+                    isinstance(item, TextItem)
+                    and item.label == DocItemLabel.SECTION_HEADER
                 ):
-                    chunk = self._build_chunk(
-                        doc=dl_doc, doc_map=doc_ctx, idx=i, delim=delim
-                    )
-                    if chunk:
-                        _logger.info(f"{i=}, {chunk=}")
-                        yield chunk
-_HC = HierarchicalChunker
+                    # TODO second branch not needed once cleanup above complete:
+                    level = item.level if isinstance(item, SectionHeaderItem) else 1
+                    heading_by_level[level] = item.text
+                    # remove headings of higher level as they just went out of scope
+                    keys_to_del = [k for k in heading_by_level if k > level]
+                    for k in keys_to_del:
+                        heading_by_level.pop(k, None)
+                    continue
+                if isinstance(item, TextItem) or (
+                    (not self.merge_list_items) and isinstance(item, ListItem)
+                ):
+                    text = item.text
+                elif isinstance(item, TableItem):
+                    table_df = item.export_to_dataframe()
+                    if table_df.shape[0] < 1 or table_df.shape[1] < 2:
+                        # at least two cols needed, as first column contains row headers
+                        continue
+                    text = self._triplet_serialize(table_df=table_df)
+                    captions = [
+                        c.text for c in [r.resolve(dl_doc) for r in item.captions]
+                    ] or None
+                else:
+                    continue
+                c = DocChunk(
+                    text=text,
+                    meta=DocMeta(
+                        doc_items=[item],
+                        headings=[heading_by_level[k] for k in sorted(heading_by_level)]
+                        or None,
+                        captions=captions,
+                    ),
+                )
+                yield c
+        if self.merge_list_items and list_items:  # need to yield
+            yield DocChunk(
+                text=self.delim.join([i.text for i in list_items]),
+                meta=DocMeta(
+                    doc_items=list_items,
+                    headings=[heading_by_level[k] for k in sorted(heading_by_level)]
+                    or None,
+                ),
+            )

docling_core/types/__init__.py CHANGED Viewed

@@ -5,10 +5,11 @@
 """Define the main types."""
-from docling_core.types.doc.base import BoundingBox  # noqa
-from docling_core.types.doc.base import Table  # noqa
-from docling_core.types.doc.base import TableCell  # noqa
-from docling_core.types.doc.base import (  # noqa
+from docling_core.types.gen.generic import Generic  # noqa
+from docling_core.types.legacy_doc.base import BoundingBox  # noqa
+from docling_core.types.legacy_doc.base import Table  # noqa
+from docling_core.types.legacy_doc.base import TableCell  # noqa
+from docling_core.types.legacy_doc.base import (  # noqa
     BaseCell,
     BaseText,
     PageDimensions,
@@ -16,10 +17,13 @@ from docling_core.types.doc.base import (  # noqa
     Prov,
     Ref,
 )
-from docling_core.types.doc.document import (  # noqa
+from docling_core.types.legacy_doc.document import (  # noqa
     CCSDocumentDescription as DocumentDescription,
 )
-from docling_core.types.doc.document import CCSFileInfoObject as FileInfoObject  # noqa
-from docling_core.types.doc.document import ExportedCCSDocument as Document  # noqa
-from docling_core.types.gen.generic import Generic  # noqa
+from docling_core.types.legacy_doc.document import (  # noqa
+    CCSFileInfoObject as FileInfoObject,
+)
+from docling_core.types.legacy_doc.document import (  # noqa
+    ExportedCCSDocument as Document,
+)
 from docling_core.types.rec.record import Record  # noqa

docling_core/types/doc/__init__.py CHANGED Viewed

@@ -4,3 +4,28 @@
 #
 """Package for models defined by the Document type."""
+from .base import BoundingBox, CoordOrigin, Size
+from .document import (
+    DocItem,
+    DoclingDocument,
+    DocumentOrigin,
+    FloatingItem,
+    GroupItem,
+    ImageRef,
+    KeyValueItem,
+    NodeItem,
+    PageItem,
+    PictureClassificationClass,
+    PictureClassificationData,
+    PictureDataType,
+    PictureItem,
+    ProvenanceItem,
+    RefItem,
+    SectionHeaderItem,
+    TableCell,
+    TableData,
+    TableItem,
+    TextItem,
+)
+from .labels import DocItemLabel, GroupLabel, TableCellLabel

docling-core 1.7.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

Potentially problematic release.

docling-core 1.7.2py3-none-any.whl → 2.0.0py3-none-any.whl