docling-core 2.18.0__py3-none-any.whl → 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -19,6 +19,7 @@ from docling_core.search.package import VERSION_PATTERN
19
19
  from docling_core.transforms.chunker import BaseChunk, BaseChunker, BaseMeta
20
20
  from docling_core.types import DoclingDocument as DLDocument
21
21
  from docling_core.types.doc.document import (
22
+ CodeItem,
22
23
  DocItem,
23
24
  DocumentOrigin,
24
25
  LevelNumber,
@@ -199,8 +200,10 @@ class HierarchicalChunker(BaseChunker):
199
200
  heading_by_level.pop(k, None)
200
201
  continue
201
202
 
202
- if isinstance(item, TextItem) or (
203
- (not self.merge_list_items) and isinstance(item, ListItem)
203
+ if (
204
+ isinstance(item, TextItem)
205
+ or ((not self.merge_list_items) and isinstance(item, ListItem))
206
+ or isinstance(item, CodeItem)
204
207
  ):
205
208
  text = item.text
206
209
  elif isinstance(item, TableItem):
@@ -75,6 +75,14 @@ DEFAULT_EXPORT_LABELS = {
75
75
  DocItemLabel.PAGE_FOOTER,
76
76
  }
77
77
 
78
+ DOCUMENT_TOKENS_EXPORT_LABELS = DEFAULT_EXPORT_LABELS.copy()
79
+ DOCUMENT_TOKENS_EXPORT_LABELS.update(
80
+ [
81
+ DocItemLabel.FOOTNOTE,
82
+ DocItemLabel.CAPTION,
83
+ ]
84
+ )
85
+
78
86
 
79
87
  class BasePictureData(BaseModel):
80
88
  """BasePictureData."""
@@ -564,9 +572,8 @@ class DocItem(
564
572
  self,
565
573
  doc: "DoclingDocument",
566
574
  new_line: str,
567
- xsize: int = 100,
568
- ysize: int = 100,
569
- add_page_index: bool = True,
575
+ xsize: int = 500,
576
+ ysize: int = 500,
570
577
  ) -> str:
571
578
  """Get the location string for the BaseCell."""
572
579
  if not len(self.prov):
@@ -576,17 +583,12 @@ class DocItem(
576
583
  for prov in self.prov:
577
584
  page_w, page_h = doc.pages[prov.page_no].size.as_tuple()
578
585
 
579
- page_i = -1
580
- if add_page_index:
581
- page_i = prov.page_no
582
-
583
586
  loc_str = DocumentToken.get_location(
584
- bbox=prov.bbox.to_bottom_left_origin(page_h).as_tuple(),
587
+ bbox=prov.bbox.to_top_left_origin(page_h).as_tuple(),
585
588
  page_w=page_w,
586
589
  page_h=page_h,
587
590
  xsize=xsize,
588
591
  ysize=ysize,
589
- page_i=page_i,
590
592
  )
591
593
  location += f"{loc_str}{new_line}"
592
594
 
@@ -641,57 +643,40 @@ class TextItem(DocItem):
641
643
  def export_to_document_tokens(
642
644
  self,
643
645
  doc: "DoclingDocument",
644
- new_line: str = "\n",
645
- xsize: int = 100,
646
- ysize: int = 100,
646
+ new_line: str = "",
647
+ xsize: int = 500,
648
+ ysize: int = 500,
647
649
  add_location: bool = True,
648
650
  add_content: bool = True,
649
- add_page_index: bool = True,
650
651
  ):
651
652
  r"""Export text element to document tokens format.
652
653
 
653
654
  :param doc: "DoclingDocument":
654
- :param new_line: str: (Default value = "\n")
655
- :param xsize: int: (Default value = 100)
656
- :param ysize: int: (Default value = 100)
655
+ :param new_line: str (Default value = "")
656
+ :param xsize: int: (Default value = 500)
657
+ :param ysize: int: (Default value = 500)
657
658
  :param add_location: bool: (Default value = True)
658
659
  :param add_content: bool: (Default value = True)
659
- :param add_page_index: bool: (Default value = True)
660
660
 
661
661
  """
662
- body = f"<{self.label.value}>"
663
-
664
- # TODO: This must be done through an explicit mapping.
665
- # assert DocumentToken.is_known_token(
666
- # body
667
- # ), f"failed DocumentToken.is_known_token({body})"
662
+ body = f"<{self.label.value}>{new_line}"
668
663
 
669
664
  if add_location:
670
665
  body += self.get_location_tokens(
671
666
  doc=doc,
672
- new_line="",
667
+ new_line=new_line,
673
668
  xsize=xsize,
674
669
  ysize=ysize,
675
- add_page_index=add_page_index,
676
670
  )
677
671
 
678
672
  if add_content and self.text is not None:
679
- body += self.text.strip()
673
+ body += f"{self.text.strip()}{new_line}"
680
674
 
681
- body += f"</{self.label.value}>{new_line}"
675
+ body += f"</{self.label.value}>\n"
682
676
 
683
677
  return body
684
678
 
685
679
 
686
- class CodeItem(TextItem):
687
- """CodeItem."""
688
-
689
- label: typing.Literal[DocItemLabel.CODE] = (
690
- DocItemLabel.CODE # type: ignore[assignment]
691
- )
692
- code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN
693
-
694
-
695
680
  class SectionHeaderItem(TextItem):
696
681
  """SectionItem."""
697
682
 
@@ -703,25 +688,23 @@ class SectionHeaderItem(TextItem):
703
688
  def export_to_document_tokens(
704
689
  self,
705
690
  doc: "DoclingDocument",
706
- new_line: str = "\n",
707
- xsize: int = 100,
708
- ysize: int = 100,
691
+ new_line: str = "",
692
+ xsize: int = 500,
693
+ ysize: int = 500,
709
694
  add_location: bool = True,
710
695
  add_content: bool = True,
711
- add_page_index: bool = True,
712
696
  ):
713
697
  r"""Export text element to document tokens format.
714
698
 
715
699
  :param doc: "DoclingDocument":
716
- :param new_line: str: (Default value = "\n")
717
- :param xsize: int: (Default value = 100)
718
- :param ysize: int: (Default value = 100)
700
+ :param new_line: str (Default value = "")
701
+ :param xsize: int: (Default value = 500)
702
+ :param ysize: int: (Default value = 500)
719
703
  :param add_location: bool: (Default value = True)
720
704
  :param add_content: bool: (Default value = True)
721
- :param add_page_index: bool: (Default value = True)
722
705
 
723
706
  """
724
- body = f"<{self.label.value}_level_{self.level}>"
707
+ body = f"<{self.label.value}_level_{self.level}>{new_line}"
725
708
 
726
709
  # TODO: This must be done through an explicit mapping.
727
710
  # assert DocumentToken.is_known_token(
@@ -731,16 +714,15 @@ class SectionHeaderItem(TextItem):
731
714
  if add_location:
732
715
  body += self.get_location_tokens(
733
716
  doc=doc,
734
- new_line="",
717
+ new_line=new_line,
735
718
  xsize=xsize,
736
719
  ysize=ysize,
737
- add_page_index=add_page_index,
738
720
  )
739
721
 
740
722
  if add_content and self.text is not None:
741
- body += self.text.strip()
723
+ body += f"{self.text.strip()}{new_line}"
742
724
 
743
- body += f"</{self.label.value}_level_{self.level}>{new_line}"
725
+ body += f"</{self.label.value}_level_{self.level}>\n"
744
726
 
745
727
  return body
746
728
 
@@ -785,6 +767,51 @@ class FloatingItem(DocItem):
785
767
  return super().get_image(doc=doc)
786
768
 
787
769
 
770
+ class CodeItem(FloatingItem, TextItem):
771
+ """CodeItem."""
772
+
773
+ label: typing.Literal[DocItemLabel.CODE] = (
774
+ DocItemLabel.CODE # type: ignore[assignment]
775
+ )
776
+ code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN
777
+
778
+ def export_to_document_tokens(
779
+ self,
780
+ doc: "DoclingDocument",
781
+ new_line: str = "",
782
+ xsize: int = 500,
783
+ ysize: int = 500,
784
+ add_location: bool = True,
785
+ add_content: bool = True,
786
+ ):
787
+ r"""Export text element to document tokens format.
788
+
789
+ :param doc: "DoclingDocument":
790
+ :param new_line: str (Default value = "")
791
+ :param xsize: int: (Default value = 500)
792
+ :param ysize: int: (Default value = 500)
793
+ :param add_location: bool: (Default value = True)
794
+ :param add_content: bool: (Default value = True)
795
+
796
+ """
797
+ body = f"<{self.label.value}{new_line}"
798
+
799
+ if add_location:
800
+ body += self.get_location_tokens(
801
+ doc=doc,
802
+ new_line=new_line,
803
+ xsize=xsize,
804
+ ysize=ysize,
805
+ )
806
+
807
+ if add_content and self.text is not None:
808
+ body += f"<_{self.code_language.value}_>{self.text}{new_line}"
809
+
810
+ body += f"</{self.label.value}\n"
811
+
812
+ return body
813
+
814
+
788
815
  class PictureItem(FloatingItem):
789
816
  """PictureItem."""
790
817
 
@@ -931,47 +958,62 @@ class PictureItem(FloatingItem):
931
958
  def export_to_document_tokens(
932
959
  self,
933
960
  doc: "DoclingDocument",
934
- new_line: str = "\n",
935
- xsize: int = 100,
936
- ysize: int = 100,
961
+ new_line: str = "",
962
+ xsize: int = 500,
963
+ ysize: int = 500,
937
964
  add_location: bool = True,
938
965
  add_caption: bool = True,
939
966
  add_content: bool = True, # not used at the moment
940
- add_page_index: bool = True,
941
967
  ):
942
968
  r"""Export picture to document tokens format.
943
969
 
944
970
  :param doc: "DoclingDocument":
945
- :param new_line: str: (Default value = "\n")
946
- :param xsize: int: (Default value = 100)
947
- :param ysize: int: (Default value = 100)
971
+ :param new_line: str (Default value = "")
972
+ :param xsize: int: (Default value = 500)
973
+ :param ysize: int: (Default value = 500)
948
974
  :param add_location: bool: (Default value = True)
949
975
  :param add_caption: bool: (Default value = True)
950
976
  :param add_content: bool: (Default value = True)
951
- :param # not used at the momentadd_page_index: bool: (Default value = True)
977
+ :param # not used at the moment
952
978
 
953
979
  """
954
- body = f"{DocumentToken.BEG_FIGURE.value}{new_line}"
955
-
980
+ body = f"<{self.label.value}>{new_line}"
956
981
  if add_location:
957
982
  body += self.get_location_tokens(
958
983
  doc=doc,
959
984
  new_line=new_line,
960
985
  xsize=xsize,
961
986
  ysize=ysize,
962
- add_page_index=add_page_index,
963
987
  )
964
988
 
989
+ classifications = [
990
+ ann
991
+ for ann in self.annotations
992
+ if isinstance(ann, PictureClassificationData)
993
+ ]
994
+ if len(classifications) > 0:
995
+ # ! TODO: currently this code assumes class_name is of type 'str'
996
+ # ! TODO: when it will change to an ENUM --> adapt code
997
+ predicted_class = classifications[0].predicted_classes[0].class_name
998
+ body += DocumentToken.get_picture_classification_token(predicted_class)
999
+
965
1000
  if add_caption and len(self.captions):
966
1001
  text = self.caption_text(doc)
967
1002
 
968
1003
  if len(text):
969
- body += f"{DocumentToken.BEG_CAPTION.value}"
1004
+ body += f"<{DocItemLabel.CAPTION.value}>"
1005
+ for caption in self.captions:
1006
+ body += caption.resolve(doc).get_location_tokens(
1007
+ doc=doc,
1008
+ new_line=new_line,
1009
+ xsize=xsize,
1010
+ ysize=ysize,
1011
+ )
970
1012
  body += f"{text.strip()}"
971
- body += f"{DocumentToken.END_CAPTION.value}"
1013
+ body += f"</{DocItemLabel.CAPTION.value}>"
972
1014
  body += f"{new_line}"
973
1015
 
974
- body += f"{DocumentToken.END_FIGURE.value}{new_line}"
1016
+ body += f"</{self.label.value}>\n"
975
1017
 
976
1018
  return body
977
1019
 
@@ -1143,8 +1185,8 @@ class TableItem(FloatingItem):
1143
1185
  doc: "DoclingDocument",
1144
1186
  add_cell_location: bool = True,
1145
1187
  add_cell_text: bool = True,
1146
- xsize: int = 100,
1147
- ysize: int = 100,
1188
+ xsize: int = 500,
1189
+ ysize: int = 500,
1148
1190
  ) -> str:
1149
1191
  """Export the table as OTSL."""
1150
1192
  # Possible OTSL tokens...
@@ -1194,7 +1236,6 @@ class TableItem(FloatingItem):
1194
1236
  page_h=page_h,
1195
1237
  xsize=xsize,
1196
1238
  ysize=ysize,
1197
- page_i=page_no,
1198
1239
  )
1199
1240
 
1200
1241
  if rowstart == i and colstart == j:
@@ -1234,33 +1275,29 @@ class TableItem(FloatingItem):
1234
1275
  def export_to_document_tokens(
1235
1276
  self,
1236
1277
  doc: "DoclingDocument",
1237
- new_line: str = "\n",
1238
- xsize: int = 100,
1239
- ysize: int = 100,
1278
+ new_line: str = "",
1279
+ xsize: int = 500,
1280
+ ysize: int = 500,
1240
1281
  add_location: bool = True,
1241
- add_caption: bool = True,
1242
- add_content: bool = True,
1243
1282
  add_cell_location: bool = True,
1244
- add_cell_label: bool = True,
1245
1283
  add_cell_text: bool = True,
1246
- add_page_index: bool = True,
1284
+ add_caption: bool = True,
1247
1285
  ):
1248
1286
  r"""Export table to document tokens format.
1249
1287
 
1250
1288
  :param doc: "DoclingDocument":
1251
- :param new_line: str: (Default value = "\n")
1252
- :param xsize: int: (Default value = 100)
1253
- :param ysize: int: (Default value = 100)
1289
+ :param new_line: str (Default value = "")
1290
+ :param xsize: int: (Default value = 500)
1291
+ :param ysize: int: (Default value = 500)
1254
1292
  :param add_location: bool: (Default value = True)
1255
- :param add_caption: bool: (Default value = True)
1256
- :param add_content: bool: (Default value = True)
1257
1293
  :param add_cell_location: bool: (Default value = True)
1258
- :param add_cell_label: bool: (Default value = True)
1259
1294
  :param add_cell_text: bool: (Default value = True)
1260
- :param add_page_index: bool: (Default value = True)
1295
+ :param add_caption: bool: (Default value = True)
1261
1296
 
1262
1297
  """
1263
- body = f"{DocumentToken.BEG_TABLE.value}{new_line}"
1298
+ otsl_tag = DocumentToken.OTSL.value
1299
+
1300
+ body = f"<{otsl_tag}>{new_line}"
1264
1301
 
1265
1302
  if add_location:
1266
1303
  body += self.get_location_tokens(
@@ -1268,76 +1305,27 @@ class TableItem(FloatingItem):
1268
1305
  new_line=new_line,
1269
1306
  xsize=xsize,
1270
1307
  ysize=ysize,
1271
- add_page_index=add_page_index,
1272
1308
  )
1273
1309
 
1310
+ body += self.export_to_otsl(doc, add_cell_location, add_cell_text, xsize, ysize)
1311
+
1274
1312
  if add_caption and len(self.captions):
1275
1313
  text = self.caption_text(doc)
1276
1314
 
1277
1315
  if len(text):
1278
- body += f"{DocumentToken.BEG_CAPTION.value}"
1316
+ body += f"<{DocItemLabel.CAPTION.value}>"
1317
+ for caption in self.captions:
1318
+ body += caption.resolve(doc).get_location_tokens(
1319
+ doc=doc,
1320
+ new_line=new_line,
1321
+ xsize=xsize,
1322
+ ysize=ysize,
1323
+ )
1279
1324
  body += f"{text.strip()}"
1280
- body += f"{DocumentToken.END_CAPTION.value}"
1325
+ body += f"</{DocItemLabel.CAPTION.value}>"
1281
1326
  body += f"{new_line}"
1282
1327
 
1283
- if add_content and len(self.data.table_cells) > 0:
1284
- for i, row in enumerate(self.data.grid):
1285
- body += f"<row_{i}>"
1286
- for j, col in enumerate(row):
1287
-
1288
- text = ""
1289
- if add_cell_text:
1290
- text = col.text.strip()
1291
-
1292
- cell_loc = ""
1293
- if (
1294
- col.bbox is not None
1295
- and add_cell_location
1296
- and add_page_index
1297
- and len(self.prov) > 0
1298
- ):
1299
- page_w, page_h = doc.pages[self.prov[0].page_no].size.as_tuple()
1300
- cell_loc = DocumentToken.get_location(
1301
- bbox=col.bbox.to_bottom_left_origin(page_h).as_tuple(),
1302
- page_w=page_w,
1303
- page_h=page_h,
1304
- xsize=xsize,
1305
- ysize=ysize,
1306
- page_i=self.prov[0].page_no,
1307
- )
1308
- elif (
1309
- col.bbox is not None
1310
- and add_cell_location
1311
- and not add_page_index
1312
- and len(self.prov) > 0
1313
- ):
1314
- page_w, page_h = doc.pages[self.prov[0].page_no].size.as_tuple()
1315
-
1316
- cell_loc = DocumentToken.get_location(
1317
- bbox=col.bbox.to_bottom_left_origin(page_h).as_tuple(),
1318
- page_w=page_w,
1319
- page_h=page_h,
1320
- xsize=xsize,
1321
- ysize=ysize,
1322
- page_i=-1,
1323
- )
1324
-
1325
- cell_label = ""
1326
- if add_cell_label:
1327
- if col.column_header:
1328
- cell_label = "<col_header>"
1329
- elif col.row_header:
1330
- cell_label = "<row_header>"
1331
- elif col.row_section:
1332
- cell_label = "<row_section>"
1333
- else:
1334
- cell_label = "<body>"
1335
-
1336
- body += f"<col_{j}>{cell_loc}{cell_label}{text}</col_{j}>"
1337
-
1338
- body += f"</row_{i}>{new_line}"
1339
-
1340
- body += f"{DocumentToken.END_TABLE.value}{new_line}"
1328
+ body += f"</{otsl_tag}>\n"
1341
1329
 
1342
1330
  return body
1343
1331
 
@@ -1777,6 +1765,7 @@ class DoclingDocument(BaseModel):
1777
1765
  text: str,
1778
1766
  code_language: Optional[CodeLanguageLabel] = None,
1779
1767
  orig: Optional[str] = None,
1768
+ caption: Optional[Union[TextItem, RefItem]] = None,
1780
1769
  prov: Optional[ProvenanceItem] = None,
1781
1770
  parent: Optional[NodeItem] = None,
1782
1771
  content_layer: Optional[ContentLayer] = None,
@@ -1786,6 +1775,8 @@ class DoclingDocument(BaseModel):
1786
1775
  :param text: str:
1787
1776
  :param code_language: Optional[str]: (Default value = None)
1788
1777
  :param orig: Optional[str]: (Default value = None)
1778
+ :param caption: Optional[Union[TextItem:
1779
+ :param RefItem]]: (Default value = None)
1789
1780
  :param prov: Optional[ProvenanceItem]: (Default value = None)
1790
1781
  :param parent: Optional[NodeItem]: (Default value = None)
1791
1782
  """
@@ -1809,6 +1800,8 @@ class DoclingDocument(BaseModel):
1809
1800
  code_item.content_layer = content_layer
1810
1801
  if prov:
1811
1802
  code_item.prov.append(prov)
1803
+ if caption:
1804
+ code_item.captions.append(caption.get_ref())
1812
1805
 
1813
1806
  self.texts.append(code_item)
1814
1807
  parent.children.append(RefItem(cref=cref))
@@ -1927,6 +1920,7 @@ class DoclingDocument(BaseModel):
1927
1920
  traverse_pictures=traverse_pictures,
1928
1921
  page_no=page_no,
1929
1922
  _level=_level + 1,
1923
+ included_content_layers=included_content_layers,
1930
1924
  )
1931
1925
 
1932
1926
  def _clear_picture_pil_cache(self):
@@ -2708,22 +2702,18 @@ class DoclingDocument(BaseModel):
2708
2702
  def save_as_document_tokens(
2709
2703
  self,
2710
2704
  filename: Path,
2711
- delim: str = "\n\n",
2705
+ delim: str = "",
2712
2706
  from_element: int = 0,
2713
2707
  to_element: int = sys.maxsize,
2714
- labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
2715
- xsize: int = 100,
2716
- ysize: int = 100,
2708
+ labels: set[DocItemLabel] = DOCUMENT_TOKENS_EXPORT_LABELS,
2709
+ xsize: int = 500,
2710
+ ysize: int = 500,
2717
2711
  add_location: bool = True,
2718
2712
  add_content: bool = True,
2719
2713
  add_page_index: bool = True,
2720
2714
  # table specific flags
2721
2715
  add_table_cell_location: bool = False,
2722
- add_table_cell_label: bool = True,
2723
2716
  add_table_cell_text: bool = True,
2724
- # specifics
2725
- page_no: Optional[int] = None,
2726
- with_groups: bool = True,
2727
2717
  ):
2728
2718
  r"""Save the document content to a DocumentToken format."""
2729
2719
  out = self.export_to_document_tokens(
@@ -2738,198 +2728,230 @@ class DoclingDocument(BaseModel):
2738
2728
  add_page_index=add_page_index,
2739
2729
  # table specific flags
2740
2730
  add_table_cell_location=add_table_cell_location,
2741
- add_table_cell_label=add_table_cell_label,
2742
2731
  add_table_cell_text=add_table_cell_text,
2743
- # specifics
2744
- page_no=page_no,
2745
- with_groups=with_groups,
2746
2732
  )
2747
2733
 
2748
2734
  with open(filename, "w", encoding="utf-8") as fw:
2749
2735
  fw.write(out)
2750
2736
 
2751
- def export_to_document_tokens(
2737
+ def export_to_document_tokens( # noqa: C901
2752
2738
  self,
2753
- delim: str = "\n",
2739
+ delim: str = "",
2754
2740
  from_element: int = 0,
2755
2741
  to_element: int = sys.maxsize,
2756
- labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
2757
- xsize: int = 100,
2758
- ysize: int = 100,
2742
+ labels: set[DocItemLabel] = DOCUMENT_TOKENS_EXPORT_LABELS,
2743
+ xsize: int = 500,
2744
+ ysize: int = 500,
2759
2745
  add_location: bool = True,
2760
2746
  add_content: bool = True,
2761
2747
  add_page_index: bool = True,
2762
2748
  # table specific flags
2763
2749
  add_table_cell_location: bool = False,
2764
- add_table_cell_label: bool = True,
2765
2750
  add_table_cell_text: bool = True,
2766
- # specifics
2767
- page_no: Optional[int] = None,
2768
- with_groups: bool = True,
2769
- newline: bool = True,
2770
2751
  ) -> str:
2771
2752
  r"""Exports the document content to a DocumentToken format.
2772
2753
 
2773
2754
  Operates on a slice of the document's body as defined through arguments
2774
2755
  from_element and to_element; defaulting to the whole main_text.
2775
2756
 
2776
- :param delim: str: (Default value = "\n\n")
2757
+ :param delim: str: (Default value = "")
2777
2758
  :param from_element: int: (Default value = 0)
2778
2759
  :param to_element: Optional[int]: (Default value = None)
2779
2760
  :param labels: set[DocItemLabel]
2780
- :param xsize: int: (Default value = 100)
2781
- :param ysize: int: (Default value = 100)
2761
+ :param xsize: int: (Default value = 500)
2762
+ :param ysize: int: (Default value = 500)
2782
2763
  :param add_location: bool: (Default value = True)
2783
2764
  :param add_content: bool: (Default value = True)
2784
2765
  :param add_page_index: bool: (Default value = True)
2785
2766
  :param # table specific flagsadd_table_cell_location: bool
2786
- :param add_table_cell_label: bool: (Default value = True)
2787
2767
  :param add_table_cell_text: bool: (Default value = True)
2788
2768
  :returns: The content of the document formatted as a DocTags string.
2789
2769
  :rtype: str
2790
2770
  """
2791
2771
 
2792
- def close_lists(
2793
- curr_level: int,
2794
- prev_level: int,
2795
- in_ordered_list: List[bool],
2796
- result: str,
2797
- delim: str,
2798
- ):
2799
-
2800
- if len(in_ordered_list) == 0:
2801
- return (in_ordered_list, result)
2802
-
2803
- while curr_level < prev_level and len(in_ordered_list) > 0:
2804
- if in_ordered_list[-1]:
2805
- result += f"</ordered_list>{delim}"
2772
+ def _close_lists(
2773
+ current_level: int,
2774
+ previous_level: int,
2775
+ ordered_list_stack: List[bool],
2776
+ output_parts: List[str],
2777
+ ) -> List[bool]:
2778
+ """Close open list tags until the nesting level matches item's level."""
2779
+ while current_level < previous_level and ordered_list_stack:
2780
+ last_is_ordered = ordered_list_stack.pop()
2781
+ if last_is_ordered:
2782
+ output_parts.append(f"</{DocumentToken.ORDERED_LIST.value}>\n")
2806
2783
  else:
2807
- result += f"</unordered_list>{delim}"
2808
-
2809
- prev_level -= 1
2810
- in_ordered_list.pop() # = in_ordered_list[:-1]
2811
-
2812
- return (in_ordered_list, result)
2813
-
2814
- if newline:
2815
- delim = "\n"
2816
- else:
2817
- delim = ""
2818
-
2819
- prev_level = 0 # Track the previous item's level
2820
-
2821
- in_ordered_list: List[bool] = [] # False
2822
-
2823
- result = f"{DocumentToken.BEG_DOCUMENT.value}{delim}"
2824
-
2825
- for ix, (item, curr_level) in enumerate(
2826
- self.iterate_items(self.body, with_groups=True)
2784
+ output_parts.append(f"</{DocumentToken.UNORDERED_LIST.value}>\n")
2785
+ previous_level -= 1
2786
+ return ordered_list_stack
2787
+
2788
+ def _add_page_break_if_needed(
2789
+ output_parts: List[str],
2790
+ item,
2791
+ prev_page_no,
2792
+ page_break_enabled: bool,
2827
2793
  ):
2828
-
2829
- # If we've moved to a lower level, we're exiting one or more groups
2830
- if curr_level < prev_level and len(in_ordered_list) > 0:
2831
- # Calculate how many levels we've exited
2832
- # level_difference = previous_level - level
2833
- # Decrement list_nesting_level for each list group we've exited
2834
- # list_nesting_level = max(0, list_nesting_level - level_difference)
2835
-
2836
- in_ordered_list, result = close_lists(
2837
- curr_level=curr_level,
2838
- prev_level=prev_level,
2839
- in_ordered_list=in_ordered_list,
2840
- result=result,
2841
- delim=delim,
2794
+ """Inserts a page-break token.
2795
+
2796
+ Inserts a page-break token if the item's page number is different
2797
+ from the previous item and page breaks are enabled.
2798
+ Returns the updated output_parts list and the current page number.
2799
+ """
2800
+ if not page_break_enabled:
2801
+ return output_parts, prev_page_no
2802
+
2803
+ if not item.prov:
2804
+ return output_parts, prev_page_no
2805
+
2806
+ current_page_no = item.prov[0].page_no
2807
+ if prev_page_no is None:
2808
+ return output_parts, current_page_no
2809
+
2810
+ if current_page_no != prev_page_no:
2811
+ output_parts.append(f"<{DocumentToken.PAGE_BREAK.value}>\n")
2812
+
2813
+ return output_parts, current_page_no
2814
+
2815
+ def _get_standalone_captions(document_body):
2816
+ """Identify captions that are not attached to any table or figure."""
2817
+ all_captions = set()
2818
+ matched_captions = set()
2819
+ for item, _ in self.iterate_items(document_body, with_groups=True):
2820
+ if item.label == DocItemLabel.CAPTION:
2821
+ all_captions.update([item.self_ref])
2822
+ if item.label in [DocItemLabel.PICTURE, DocItemLabel.TABLE]:
2823
+ matched_captions.update([caption.cref for caption in item.captions])
2824
+
2825
+ return all_captions - matched_captions
2826
+
2827
+ # Initialization
2828
+ output_parts: List[str] = []
2829
+ ordered_list_stack: List[bool] = []
2830
+ previous_level = 0
2831
+ previous_page_no = None
2832
+
2833
+ # Precompute standalone captions
2834
+ standalone_captions = _get_standalone_captions(self.body)
2835
+
2836
+ # Begin document
2837
+ output_parts.append(f"<{DocumentToken.DOCUMENT.value}>{delim}")
2838
+
2839
+ for ix, (item, current_level) in enumerate(
2840
+ self.iterate_items(
2841
+ self.body,
2842
+ with_groups=True,
2843
+ included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE},
2844
+ )
2845
+ ):
2846
+ # Close lists if we've moved to a lower nesting level
2847
+ if current_level < previous_level and ordered_list_stack:
2848
+ ordered_list_stack = _close_lists(
2849
+ current_level, previous_level, ordered_list_stack, output_parts
2842
2850
  )
2851
+ previous_level = current_level
2843
2852
 
2844
- prev_level = curr_level # Update previous_level for next iteration
2845
-
2846
- if ix < from_element or to_element <= ix:
2847
- continue # skip as many items as you want
2848
-
2849
- if (isinstance(item, DocItem)) and (item.label not in labels):
2850
- continue # skip any label that is not whitelisted
2851
-
2852
- if isinstance(item, GroupItem) and item.label in [
2853
- GroupLabel.ORDERED_LIST,
2854
- ]:
2853
+ # Skip items outside the specified element range
2854
+ if ix < from_element or ix >= to_element:
2855
+ continue
2855
2856
 
2856
- result += f"<ordered_list>{delim}"
2857
- in_ordered_list.append(True)
2857
+ # Skip items whose label is not in the allowed set
2858
+ if isinstance(item, DocItem) and (item.label not in labels):
2859
+ continue
2858
2860
 
2859
- elif isinstance(item, GroupItem) and item.label in [
2860
- GroupLabel.LIST,
2861
- ]:
2861
+ # Skip captions that are not standalone as they will be included below
2862
+ # by the export functions of Table and Picture
2863
+ if (
2864
+ isinstance(item, TextItem)
2865
+ and item.label == DocItemLabel.CAPTION
2866
+ and item.self_ref not in standalone_captions
2867
+ ):
2868
+ continue
2862
2869
 
2863
- result += f"<unordered_list>{delim}"
2864
- in_ordered_list.append(False)
2870
+ # Handle list groups
2871
+ if isinstance(item, GroupItem):
2872
+ if item.label == GroupLabel.ORDERED_LIST:
2873
+ output_parts.append(f"<{DocumentToken.ORDERED_LIST.value}>{delim}")
2874
+ ordered_list_stack.append(True)
2875
+ elif item.label == GroupLabel.LIST:
2876
+ output_parts.append(
2877
+ f"<{DocumentToken.UNORDERED_LIST.value}>{delim}"
2878
+ )
2879
+ ordered_list_stack.append(False)
2880
+ continue
2865
2881
 
2866
- elif isinstance(item, SectionHeaderItem):
2882
+ # For other item types, optionally insert page-break if the page changed
2883
+ output_parts, previous_page_no = _add_page_break_if_needed(
2884
+ output_parts, item, previous_page_no, add_page_index
2885
+ )
2867
2886
 
2868
- result += item.export_to_document_tokens(
2869
- doc=self,
2870
- new_line=delim,
2871
- xsize=xsize,
2872
- ysize=ysize,
2873
- add_location=add_location,
2874
- add_content=add_content,
2875
- add_page_index=add_page_index,
2887
+ if isinstance(item, SectionHeaderItem):
2888
+ output_parts.append(
2889
+ item.export_to_document_tokens(
2890
+ doc=self,
2891
+ new_line=delim,
2892
+ xsize=xsize,
2893
+ ysize=ysize,
2894
+ add_location=add_location,
2895
+ add_content=add_content,
2896
+ )
2876
2897
  )
2877
- elif isinstance(item, CodeItem) and (item.label in labels):
2878
-
2879
- result += item.export_to_document_tokens(
2880
- doc=self,
2881
- new_line=delim,
2882
- xsize=xsize,
2883
- ysize=ysize,
2884
- add_location=add_location,
2885
- add_content=add_content,
2886
- add_page_index=add_page_index,
2898
+ elif isinstance(item, CodeItem):
2899
+ output_parts.append(
2900
+ item.export_to_document_tokens(
2901
+ doc=self,
2902
+ new_line=delim,
2903
+ xsize=xsize,
2904
+ ysize=ysize,
2905
+ add_location=add_location,
2906
+ add_content=add_content,
2907
+ )
2887
2908
  )
2888
-
2889
- elif isinstance(item, TextItem) and (item.label in labels):
2890
-
2891
- result += item.export_to_document_tokens(
2892
- doc=self,
2893
- new_line=delim,
2894
- xsize=xsize,
2895
- ysize=ysize,
2896
- add_location=add_location,
2897
- add_content=add_content,
2898
- add_page_index=add_page_index,
2909
+ elif isinstance(item, TextItem):
2910
+ output_parts.append(
2911
+ item.export_to_document_tokens(
2912
+ doc=self,
2913
+ new_line=delim,
2914
+ xsize=xsize,
2915
+ ysize=ysize,
2916
+ add_location=add_location,
2917
+ add_content=add_content,
2918
+ )
2899
2919
  )
2900
-
2901
- elif isinstance(item, TableItem) and (item.label in labels):
2902
-
2903
- result += item.export_to_document_tokens(
2904
- doc=self,
2905
- new_line=delim,
2906
- xsize=xsize,
2907
- ysize=ysize,
2908
- add_caption=True,
2909
- add_location=add_location,
2910
- add_content=add_content,
2911
- add_cell_location=add_table_cell_location,
2912
- add_cell_label=add_table_cell_label,
2913
- add_cell_text=add_table_cell_text,
2914
- add_page_index=add_page_index,
2920
+ elif isinstance(item, TableItem):
2921
+ output_parts.append(
2922
+ item.export_to_document_tokens(
2923
+ doc=self,
2924
+ new_line=delim,
2925
+ xsize=xsize,
2926
+ ysize=ysize,
2927
+ add_location=add_location,
2928
+ add_cell_location=add_table_cell_location,
2929
+ add_cell_text=add_table_cell_text,
2930
+ add_caption=True,
2931
+ )
2915
2932
  )
2916
-
2917
- elif isinstance(item, PictureItem) and (item.label in labels):
2918
-
2919
- result += item.export_to_document_tokens(
2920
- doc=self,
2921
- new_line=delim,
2922
- xsize=xsize,
2923
- ysize=ysize,
2924
- add_caption=True,
2925
- add_location=add_location,
2926
- add_content=add_content,
2927
- add_page_index=add_page_index,
2933
+ elif isinstance(item, PictureItem):
2934
+ output_parts.append(
2935
+ item.export_to_document_tokens(
2936
+ doc=self,
2937
+ new_line=delim,
2938
+ xsize=xsize,
2939
+ ysize=ysize,
2940
+ add_caption=True,
2941
+ add_location=add_location,
2942
+ add_content=add_content,
2943
+ )
2928
2944
  )
2929
2945
 
2930
- result += DocumentToken.END_DOCUMENT.value
2946
+ # End any lists that might still be open
2947
+ ordered_list_stack = _close_lists(
2948
+ 0, previous_level, ordered_list_stack, output_parts
2949
+ )
2931
2950
 
2932
- return result
2951
+ # End document
2952
+ output_parts.append(f"</{DocumentToken.DOCUMENT.value}>")
2953
+
2954
+ return "".join(output_parts)
2933
2955
 
2934
2956
  def _export_to_indented_text(
2935
2957
  self, indent=" ", max_text_len: int = -1, explicit_tables: bool = False
@@ -111,7 +111,7 @@ class PictureClassificationLabel(str, Enum):
111
111
  SIGNATURE = "signature"
112
112
  STAMP = "stamp"
113
113
  QR_CODE = "qr_code"
114
- BAR_CODE = "bat_code"
114
+ BAR_CODE = "bar_code"
115
115
  SCREENSHOT = "screenshot"
116
116
 
117
117
  # Geology/Geography
@@ -8,13 +8,15 @@
8
8
  from enum import Enum
9
9
  from typing import Tuple
10
10
 
11
+ from docling_core.types.doc.labels import PictureClassificationLabel
12
+
11
13
 
12
14
  class TableToken(Enum):
13
15
  """Class to represent an LLM friendly representation of a Table."""
14
16
 
15
17
  CELL_LABEL_COLUMN_HEADER = "<column_header>"
16
18
  CELL_LABEL_ROW_HEADER = "<row_header>"
17
- CELL_LABEL_SECTION_HEADERE = "<section_header>"
19
+ CELL_LABEL_SECTION_HEADER = "<shed>"
18
20
  CELL_LABEL_DATA = "<data>"
19
21
 
20
22
  OTSL_ECEL = "<ecel>" # empty cell
@@ -42,83 +44,30 @@ class TableToken(Enum):
42
44
  class DocumentToken(Enum):
43
45
  """Class to represent an LLM friendly representation of a Document."""
44
46
 
45
- BEG_DOCUMENT = "<document>"
46
- END_DOCUMENT = "</document>"
47
-
48
- BEG_TITLE = "<title>"
49
- END_TITLE = "</title>"
50
-
51
- BEG_ABSTRACT = "<abstract>"
52
- END_ABSTRACT = "</abstract>"
53
-
54
- BEG_DOI = "<doi>"
55
- END_DOI = "</doi>"
56
- BEG_DATE = "<date>"
57
- END_DATE = "</date>"
58
-
59
- BEG_AUTHORS = "<authors>"
60
- END_AUTHORS = "</authors>"
61
- BEG_AUTHOR = "<author>"
62
- END_AUTHOR = "</author>"
63
-
64
- BEG_AFFILIATIONS = "<affiliations>"
65
- END_AFFILIATIONS = "</affiliations>"
66
- BEG_AFFILIATION = "<affiliation>"
67
- END_AFFILIATION = "</affiliation>"
68
-
69
- BEG_HEADER = "<section-header>"
70
- END_HEADER = "</section-header>"
71
- BEG_TEXT = "<text>"
72
- END_TEXT = "</text>"
73
- BEG_PARAGRAPH = "<paragraph>"
74
- END_PARAGRAPH = "</paragraph>"
75
- BEG_TABLE = "<table>"
76
- END_TABLE = "</table>"
77
- BEG_FIGURE = "<figure>"
78
- END_FIGURE = "</figure>"
79
- BEG_CAPTION = "<caption>"
80
- END_CAPTION = "</caption>"
81
- BEG_EQUATION = "<equation>"
82
- END_EQUATION = "</equation>"
83
- BEG_LIST = "<list>"
84
- END_LIST = "</list>"
85
- BEG_LISTITEM = "<list-item>"
86
- END_LISTITEM = "</list-item>"
87
-
88
- BEG_LOCATION = "<location>"
89
- END_LOCATION = "</location>"
90
- BEG_GROUP = "<group>"
91
- END_GROUP = "</group>"
47
+ DOCUMENT = "doctag"
48
+ OTSL = "otsl"
49
+ ORDERED_LIST = "ordered_list"
50
+ UNORDERED_LIST = "unordered_list"
51
+ LOC = "loc_"
52
+ PAGE_BREAK = "page_break"
92
53
 
93
54
  @classmethod
94
55
  def get_special_tokens(
95
56
  cls,
96
- max_rows: int = 100,
97
- max_cols: int = 100,
98
- max_pages: int = 1000,
99
57
  page_dimension: Tuple[int, int] = (100, 100),
100
58
  ):
101
59
  """Function to get all special document tokens."""
102
60
  special_tokens = [token.value for token in cls]
103
61
 
104
- # Adding dynamically generated row and col tokens
105
- for i in range(0, max_rows + 1):
106
- special_tokens += [f"<row_{i}>", f"</row_{i}>"]
107
-
108
- for i in range(0, max_cols + 1):
109
- special_tokens += [f"<col_{i}>", f"</col_{i}>"]
110
-
111
- for i in range(6):
112
- special_tokens += [f"<section-header-{i}>", f"</section-header-{i}>"]
113
-
114
- # FIXME: this is synonym of section header
115
62
  for i in range(6):
116
- special_tokens += [f"<subtitle-level-{i}>", f"</subtitle-level-{i}>"]
63
+ special_tokens += [
64
+ f"<section_header_level_{i}>",
65
+ f"</section_header_level_{i}>",
66
+ ]
117
67
 
118
- # Adding dynamically generated page-tokens
119
- for i in range(0, max_pages + 1):
120
- special_tokens.append(f"<page_{i}>")
121
- special_tokens.append(f"</page_{i}>")
68
+ # Add dynamically picture classification tokens
69
+ for _, member in PictureClassificationLabel.__members__.items():
70
+ special_tokens.append(f"<{member}>")
122
71
 
123
72
  # Adding dynamically generated location-tokens
124
73
  for i in range(0, max(page_dimension[0] + 1, page_dimension[1] + 1)):
@@ -132,25 +81,9 @@ class DocumentToken(Enum):
132
81
  return label in DocumentToken.get_special_tokens()
133
82
 
134
83
  @staticmethod
135
- def get_row_token(row: int, beg=bool) -> str:
136
- """Function to get page tokens."""
137
- if beg:
138
- return f"<row_{row}>"
139
- else:
140
- return f"</row_{row}>"
141
-
142
- @staticmethod
143
- def get_col_token(col: int, beg=bool) -> str:
144
- """Function to get page tokens."""
145
- if beg:
146
- return f"<col_{col}>"
147
- else:
148
- return f"</col_{col}>"
149
-
150
- @staticmethod
151
- def get_page_token(page: int):
152
- """Function to get page tokens."""
153
- return f"<page_{page}>"
84
+ def get_picture_classification_token(classification: str) -> str:
85
+ """Function to get picture classification tokens."""
86
+ return f"<{classification}>"
154
87
 
155
88
  @staticmethod
156
89
  def get_location_token(val: float, rnorm: int = 100):
@@ -172,7 +105,6 @@ class DocumentToken(Enum):
172
105
  page_h: float,
173
106
  xsize: int = 100,
174
107
  ysize: int = 100,
175
- page_i: int = -1,
176
108
  ):
177
109
  """Get the location string give bbox and page-dim."""
178
110
  assert bbox[0] <= bbox[2], f"bbox[0]<=bbox[2] => {bbox[0]}<={bbox[2]}"
@@ -183,17 +115,11 @@ class DocumentToken(Enum):
183
115
  x1 = bbox[2] / page_w
184
116
  y1 = bbox[3] / page_h
185
117
 
186
- page_tok = ""
187
- if page_i != -1:
188
- page_tok = DocumentToken.get_page_token(page=page_i)
189
-
190
118
  x0_tok = DocumentToken.get_location_token(val=min(x0, x1), rnorm=xsize)
191
119
  y0_tok = DocumentToken.get_location_token(val=min(y0, y1), rnorm=ysize)
192
120
  x1_tok = DocumentToken.get_location_token(val=max(x0, x1), rnorm=xsize)
193
121
  y1_tok = DocumentToken.get_location_token(val=max(y0, y1), rnorm=ysize)
194
122
 
195
- loc_str = f"{DocumentToken.BEG_LOCATION.value}"
196
- loc_str += f"{page_tok}{x0_tok}{y0_tok}{x1_tok}{y1_tok}"
197
- loc_str += f"{DocumentToken.END_LOCATION.value}"
123
+ loc_str = f"{x0_tok}{y0_tok}{x1_tok}{y1_tok}"
198
124
 
199
125
  return loc_str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.18.0
3
+ Version: 2.19.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -30,7 +30,7 @@ Requires-Dist: jsonref (>=1.1.0,<2.0.0)
30
30
  Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
31
31
  Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
32
32
  Requires-Dist: pandas (>=2.1.4,<3.0.0)
33
- Requires-Dist: pillow (>=10.3.0,<11.0.0)
33
+ Requires-Dist: pillow (>=10.0.0,<12.0.0)
34
34
  Requires-Dist: pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)
35
35
  Requires-Dist: pyyaml (>=5.1,<7.0.0)
36
36
  Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking"
@@ -18,15 +18,15 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
18
18
  docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
19
19
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
20
20
  docling_core/transforms/chunker/base.py,sha256=BSWTiFOsF5YaZaZJZY8nwIdOXb9uufJMRIds7LxRNh8,2546
21
- docling_core/transforms/chunker/hierarchical_chunker.py,sha256=cy3sE9w_7l-uoIEUcfnZlQweDHUoyAJTQ6IkzxxVjFY,8052
21
+ docling_core/transforms/chunker/hierarchical_chunker.py,sha256=MStDUDtzFGc6j8v9AkcAnnSHTDxdoiVrp8FTmRdGqU8,8138
22
22
  docling_core/transforms/chunker/hybrid_chunker.py,sha256=kokjDdxjc_gygOokQwYFVnHv2NjWTgf9uex8o0ole7w,9876
23
23
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
24
24
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
25
25
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
26
26
  docling_core/types/doc/base.py,sha256=lMRNq1DUK7K26L2VNZRqFaItCSZ6m9BdYTVaJA98PZQ,11495
27
- docling_core/types/doc/document.py,sha256=Rn2hA0LPpnt7tGJOD2ME6t5x8R42mttPFD2Ks2cbvVU,102698
28
- docling_core/types/doc/labels.py,sha256=8Luymal9SKXTwyqq1ONKiUTxuMo_nRMYfBkRPFkdSSo,5306
29
- docling_core/types/doc/tokens.py,sha256=GMtm5TsNljBPaMYkgmD3WWZmC0FHqKF9imKEEySz4ps,6020
27
+ docling_core/types/doc/document.py,sha256=GGwtTZspuv2Nd9d9kX1qkcHjb0Soxp2WE1l_RkZ2pNw,103687
28
+ docling_core/types/doc/labels.py,sha256=cqH4DGN9lgZns6gOtL5urzZzUPGOjHJ75xQbIKSh_h8,5306
29
+ docling_core/types/doc/tokens.py,sha256=i73PXkmqXCLsQ5SddnJX8L9e_Ub2_K_DYSE-VE8NDq0,3925
30
30
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
31
31
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
32
32
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
@@ -56,8 +56,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
56
56
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
57
57
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
58
58
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
59
- docling_core-2.18.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
60
- docling_core-2.18.0.dist-info/METADATA,sha256=opvNoxyiZ0gaASwafQo5a3qJsyYU_o4xtZeubElVZT0,5803
61
- docling_core-2.18.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
62
- docling_core-2.18.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
63
- docling_core-2.18.0.dist-info/RECORD,,
59
+ docling_core-2.19.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
60
+ docling_core-2.19.0.dist-info/METADATA,sha256=inIV9-4qlmmQUldiQyEfm2kUUDCz6vZgKXXpVOzy73s,5803
61
+ docling_core-2.19.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
62
+ docling_core-2.19.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
63
+ docling_core-2.19.0.dist-info/RECORD,,