docling-core 2.3.0__py3-none-any.whl → 2.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/transforms/chunker/hierarchical_chunker.py +7 -6
- docling_core/types/doc/document.py +5 -8
- {docling_core-2.3.0.dist-info → docling_core-2.3.2.dist-info}/METADATA +1 -1
- {docling_core-2.3.0.dist-info → docling_core-2.3.2.dist-info}/RECORD +7 -7
- {docling_core-2.3.0.dist-info → docling_core-2.3.2.dist-info}/LICENSE +0 -0
- {docling_core-2.3.0.dist-info → docling_core-2.3.2.dist-info}/WHEEL +0 -0
- {docling_core-2.3.0.dist-info → docling_core-2.3.2.dist-info}/entry_points.txt +0 -0
|
@@ -183,14 +183,15 @@ class HierarchicalChunker(BaseChunker):
|
|
|
183
183
|
)
|
|
184
184
|
list_items = [] # reset
|
|
185
185
|
|
|
186
|
-
if isinstance(
|
|
187
|
-
item, SectionHeaderItem
|
|
188
|
-
) or ( # TODO remove when all captured as SectionHeaderItem:
|
|
186
|
+
if isinstance(item, SectionHeaderItem) or (
|
|
189
187
|
isinstance(item, TextItem)
|
|
190
|
-
and item.label
|
|
188
|
+
and item.label in [DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE]
|
|
191
189
|
):
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
level = (
|
|
191
|
+
item.level
|
|
192
|
+
if isinstance(item, SectionHeaderItem)
|
|
193
|
+
else (0 if item.label == DocItemLabel.TITLE else 1)
|
|
194
|
+
)
|
|
194
195
|
heading_by_level[level] = item.text
|
|
195
196
|
|
|
196
197
|
# remove headings of higher level as they just went out of scope
|
|
@@ -1284,8 +1284,8 @@ class DoclingDocument(BaseModel):
|
|
|
1284
1284
|
) -> str:
|
|
1285
1285
|
r"""Serialize to Markdown.
|
|
1286
1286
|
|
|
1287
|
-
Operates on a slice of the document's
|
|
1288
|
-
|
|
1287
|
+
Operates on a slice of the document's body as defined through arguments
|
|
1288
|
+
from_element and to_element; defaulting to the whole document.
|
|
1289
1289
|
|
|
1290
1290
|
:param delim: Delimiter to use when concatenating the various
|
|
1291
1291
|
Markdown parts. Defaults to "\n\n".
|
|
@@ -1294,11 +1294,9 @@ class DoclingDocument(BaseModel):
|
|
|
1294
1294
|
Defaults to 0.
|
|
1295
1295
|
:type from_element: int
|
|
1296
1296
|
:param to_element: Body slicing stop index
|
|
1297
|
-
(exclusive). Defaults to
|
|
1298
|
-
:type to_element:
|
|
1297
|
+
(exclusive). Defaults to 0maxint.
|
|
1298
|
+
:type to_element: int
|
|
1299
1299
|
:param delim: str: (Default value = "\n\n")
|
|
1300
|
-
:param from_element: int: (Default value = 0)
|
|
1301
|
-
:param to_element: Optional[int]: (Default value = None)
|
|
1302
1300
|
:param labels: set[DocItemLabel]
|
|
1303
1301
|
:param "subtitle-level-1":
|
|
1304
1302
|
:param "paragraph":
|
|
@@ -1306,7 +1304,6 @@ class DoclingDocument(BaseModel):
|
|
|
1306
1304
|
:param "table":
|
|
1307
1305
|
:param "Text":
|
|
1308
1306
|
:param "text":
|
|
1309
|
-
:param ]:
|
|
1310
1307
|
:param strict_text: bool: (Default value = False)
|
|
1311
1308
|
:param image_placeholder str: (Default value = "<!-- image -->")
|
|
1312
1309
|
the placeholder to include to position images in the markdown.
|
|
@@ -1331,7 +1328,7 @@ class DoclingDocument(BaseModel):
|
|
|
1331
1328
|
|
|
1332
1329
|
previous_level = level # Update previous_level for next iteration
|
|
1333
1330
|
|
|
1334
|
-
if ix < from_element
|
|
1331
|
+
if ix < from_element or to_element <= ix:
|
|
1335
1332
|
continue # skip as many items as you want
|
|
1336
1333
|
|
|
1337
1334
|
# Handle newlines between different types of content
|
|
@@ -16,12 +16,12 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
|
|
|
16
16
|
docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
|
|
17
17
|
docling_core/transforms/chunker/__init__.py,sha256=sSSTnt7ZCt8Og1e0jhApNTtA0pyyHyzwcl8yXFLb2J8,292
|
|
18
18
|
docling_core/transforms/chunker/base.py,sha256=iPouZOJ3cYWvai4P0Gpd3QmsTKQuY5fFUXzTMk_XNmE,1571
|
|
19
|
-
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=
|
|
19
|
+
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=V4FiOYqL0GgBqVB7x6CafAJs3WF5oYifKIiexVggGPE,8086
|
|
20
20
|
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
21
21
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
22
22
|
docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
|
|
23
23
|
docling_core/types/doc/base.py,sha256=zvx631U_yQCcJam83hNdDanXEYnO3eN-CCw9vDr6S-I,4442
|
|
24
|
-
docling_core/types/doc/document.py,sha256=
|
|
24
|
+
docling_core/types/doc/document.py,sha256=ED-x3Hc0E3gEYZBt3ue3auYhiuEegRfz9_cq-LJEThE,56196
|
|
25
25
|
docling_core/types/doc/labels.py,sha256=A8vWP82VAeXO1rlCO0oDKo_Hb8uDeQe0myOTY3P03hk,1596
|
|
26
26
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
27
27
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
@@ -49,8 +49,8 @@ docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6
|
|
|
49
49
|
docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
|
|
50
50
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
51
51
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
52
|
-
docling_core-2.3.
|
|
53
|
-
docling_core-2.3.
|
|
54
|
-
docling_core-2.3.
|
|
55
|
-
docling_core-2.3.
|
|
56
|
-
docling_core-2.3.
|
|
52
|
+
docling_core-2.3.2.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
53
|
+
docling_core-2.3.2.dist-info/METADATA,sha256=VUY20vSISoTRU7vdkP-mAlInUVWyqyGURfIfmqTeGdA,5432
|
|
54
|
+
docling_core-2.3.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
55
|
+
docling_core-2.3.2.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
|
|
56
|
+
docling_core-2.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|