docling-core 2.38.2__py3-none-any.whl → 2.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -35,11 +35,10 @@ from docling_core.types.doc.document import (
35
35
  DocumentOrigin,
36
36
  InlineGroup,
37
37
  LevelNumber,
38
- OrderedList,
38
+ ListGroup,
39
39
  SectionHeaderItem,
40
40
  TableItem,
41
41
  TitleItem,
42
- UnorderedList,
43
42
  )
44
43
 
45
44
  _VERSION: Final = "1.0.0"
@@ -240,7 +239,7 @@ class HierarchicalChunker(BaseChunker):
240
239
  heading_by_level.pop(k, None)
241
240
  continue
242
241
  elif (
243
- isinstance(item, (OrderedList, UnorderedList, InlineGroup, DocItem))
242
+ isinstance(item, (ListGroup, InlineGroup, DocItem))
244
243
  and item.self_ref not in visited
245
244
  ):
246
245
  ser_res = my_doc_ser.serialize(item=item, visited=visited)
@@ -17,12 +17,11 @@ from docling_core.types.doc.document import (
17
17
  FormItem,
18
18
  InlineGroup,
19
19
  KeyValueItem,
20
+ ListGroup,
20
21
  NodeItem,
21
- OrderedList,
22
22
  PictureItem,
23
23
  TableItem,
24
24
  TextItem,
25
- UnorderedList,
26
25
  )
27
26
 
28
27
 
@@ -128,7 +127,7 @@ class BaseListSerializer(ABC):
128
127
  def serialize(
129
128
  self,
130
129
  *,
131
- item: Union[UnorderedList, OrderedList],
130
+ item: ListGroup,
132
131
  doc_serializer: "BaseDocSerializer",
133
132
  doc: DoclingDocument,
134
133
  **kwargs: Any,
@@ -39,8 +39,8 @@ from docling_core.types.doc.document import (
39
39
  FormItem,
40
40
  InlineGroup,
41
41
  KeyValueItem,
42
+ ListGroup,
42
43
  NodeItem,
43
- OrderedList,
44
44
  PictureClassificationData,
45
45
  PictureDataType,
46
46
  PictureItem,
@@ -49,7 +49,6 @@ from docling_core.types.doc.document import (
49
49
  TableAnnotationType,
50
50
  TableItem,
51
51
  TextItem,
52
- UnorderedList,
53
52
  )
54
53
  from docling_core.types.doc.labels import DocItemLabel
55
54
 
@@ -89,7 +88,7 @@ def _iterate_items(
89
88
  ):
90
89
  if add_page_breaks:
91
90
  if (
92
- isinstance(item, (UnorderedList, OrderedList, InlineGroup))
91
+ isinstance(item, (ListGroup, InlineGroup))
93
92
  and item.self_ref not in my_visited
94
93
  ):
95
94
  # if group starts with new page, yield page break before group node
@@ -316,7 +315,7 @@ class DocSerializer(BaseModel, BaseDocSerializer):
316
315
  ########
317
316
  # groups
318
317
  ########
319
- if isinstance(item, (UnorderedList, OrderedList)):
318
+ if isinstance(item, ListGroup):
320
319
  part = self.list_serializer.serialize(
321
320
  item=item,
322
321
  doc_serializer=self,
@@ -1,7 +1,7 @@
1
1
  """Define classes for Doctags serialization."""
2
2
 
3
3
  from enum import Enum
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, Dict, List, Optional
5
5
 
6
6
  from pydantic import BaseModel
7
7
  from typing_extensions import override
@@ -34,9 +34,9 @@ from docling_core.types.doc.document import (
34
34
  FormItem,
35
35
  InlineGroup,
36
36
  KeyValueItem,
37
+ ListGroup,
37
38
  ListItem,
38
39
  NodeItem,
39
- OrderedList,
40
40
  PictureClassificationData,
41
41
  PictureItem,
42
42
  PictureMoleculeData,
@@ -44,7 +44,6 @@ from docling_core.types.doc.document import (
44
44
  ProvenanceItem,
45
45
  TableItem,
46
46
  TextItem,
47
- UnorderedList,
48
47
  )
49
48
  from docling_core.types.doc.labels import DocItemLabel, PictureClassificationLabel
50
49
  from docling_core.types.doc.tokens import DocumentToken
@@ -376,7 +375,7 @@ class DocTagsListSerializer(BaseModel, BaseListSerializer):
376
375
  def serialize(
377
376
  self,
378
377
  *,
379
- item: Union[UnorderedList, OrderedList],
378
+ item: ListGroup,
380
379
  doc_serializer: "BaseDocSerializer",
381
380
  doc: DoclingDocument,
382
381
  list_level: int = 0,
@@ -406,7 +405,7 @@ class DocTagsListSerializer(BaseModel, BaseListSerializer):
406
405
  text_res = f"{text_res}{delim}"
407
406
  wrap_tag = (
408
407
  DocumentToken.ORDERED_LIST.value
409
- if isinstance(item, OrderedList)
408
+ if item.first_item_is_enumerated(doc)
410
409
  else DocumentToken.UNORDERED_LIST.value
411
410
  )
412
411
  text_res = _wrap(text=text_res, wrap_tag=wrap_tag)
@@ -58,9 +58,9 @@ from docling_core.types.doc.document import (
58
58
  ImageRef,
59
59
  InlineGroup,
60
60
  KeyValueItem,
61
+ ListGroup,
61
62
  ListItem,
62
63
  NodeItem,
63
- OrderedList,
64
64
  PictureClassificationData,
65
65
  PictureItem,
66
66
  PictureMoleculeData,
@@ -70,7 +70,6 @@ from docling_core.types.doc.document import (
70
70
  TableItem,
71
71
  TextItem,
72
72
  TitleItem,
73
- UnorderedList,
74
73
  )
75
74
  from docling_core.types.doc.labels import DocItemLabel
76
75
  from docling_core.types.doc.utils import (
@@ -117,6 +116,8 @@ class HTMLParams(CommonParams):
117
116
 
118
117
  include_annotations: bool = True
119
118
 
119
+ show_original_list_item_marker: bool = True
120
+
120
121
 
121
122
  class HTMLTextSerializer(BaseModel, BaseTextSerializer):
122
123
  """HTML-specific text item serializer."""
@@ -162,7 +163,19 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
162
163
  elif isinstance(item, ListItem):
163
164
  # List items are handled by list serializer
164
165
  text_inner = self._prepare_content(item.text)
165
- text = get_html_tag_with_text_direction(html_tag="li", text=text_inner)
166
+ text = (
167
+ get_html_tag_with_text_direction(
168
+ html_tag="li",
169
+ text=text_inner,
170
+ attrs=(
171
+ {"style": f"list-style-type: '{item.marker} ';"}
172
+ if params.show_original_list_item_marker and item.marker
173
+ else {}
174
+ ),
175
+ )
176
+ if text_inner
177
+ else ""
178
+ )
166
179
 
167
180
  elif is_inline_scope:
168
181
  text = self._prepare_content(item.text)
@@ -680,7 +693,7 @@ class HTMLListSerializer(BaseModel, BaseListSerializer):
680
693
  def serialize(
681
694
  self,
682
695
  *,
683
- item: Union[UnorderedList, OrderedList],
696
+ item: ListGroup,
684
697
  doc_serializer: "BaseDocSerializer",
685
698
  doc: DoclingDocument,
686
699
  list_level: int = 0,
@@ -690,7 +703,7 @@ class HTMLListSerializer(BaseModel, BaseListSerializer):
690
703
  ) -> SerializationResult:
691
704
  """Serializes a list to HTML."""
692
705
  my_visited: set[str] = visited if visited is not None else set()
693
-
706
+ params = HTMLParams(**kwargs)
694
707
  # Get all child parts
695
708
  parts = doc_serializer.get_parts(
696
709
  item=item,
@@ -706,17 +719,51 @@ class HTMLListSerializer(BaseModel, BaseListSerializer):
706
719
  (
707
720
  p.text
708
721
  if (
709
- (p.text.startswith("<li>") and p.text.endswith("</li>"))
710
- or (p.text.startswith("<ol>") and p.text.endswith("</ol>"))
711
- or (p.text.startswith("<ul>") and p.text.endswith("</ul>"))
722
+ (
723
+ p.text.startswith(("<li>", "<li "))
724
+ and p.text.endswith("</li>")
725
+ )
726
+ or (
727
+ p.text.startswith(("<ol>", "<ol "))
728
+ and p.text.endswith("</ol>")
729
+ )
730
+ or (
731
+ p.text.startswith(("<ul>", "<ul "))
732
+ and p.text.endswith("</ul>")
733
+ )
734
+ )
735
+ else (
736
+ get_html_tag_with_text_direction(
737
+ html_tag="li",
738
+ text=p.text,
739
+ attrs=(
740
+ {
741
+ "style": f"list-style-type: '{grandparent_item.marker} ';"
742
+ }
743
+ if params.show_original_list_item_marker
744
+ and grandparent_item.marker
745
+ else {}
746
+ ),
747
+ )
748
+ if p.spans
749
+ and p.spans[0].item.parent
750
+ and isinstance(
751
+ (parent_item := p.spans[0].item.parent.resolve(doc)),
752
+ InlineGroup,
753
+ )
754
+ and parent_item.parent
755
+ and isinstance(
756
+ (grandparent_item := parent_item.parent.resolve(doc)),
757
+ ListItem,
758
+ )
759
+ else f"<li>{p.text}</li>"
712
760
  )
713
- else f"<li>{p.text}</li>"
714
761
  )
715
762
  for p in parts
716
763
  ]
717
764
  )
718
765
  if text_res:
719
- tag = "ol" if isinstance(item, OrderedList) else "ul"
766
+ tag = "ol" if item.first_item_is_enumerated(doc) else "ul"
720
767
  text_res = f"<{tag}>\n{text_res}\n</{tag}>"
721
768
 
722
769
  return create_ser_result(text=text_res, span_source=parts)
@@ -7,6 +7,7 @@
7
7
  import html
8
8
  import re
9
9
  import textwrap
10
+ from enum import Enum
10
11
  from pathlib import Path
11
12
  from typing import Any, Optional, Union
12
13
 
@@ -31,7 +32,6 @@ from docling_core.transforms.serializer.common import (
31
32
  CommonParams,
32
33
  DocSerializer,
33
34
  _get_annotation_text,
34
- _PageBreakSerResult,
35
35
  create_ser_result,
36
36
  )
37
37
  from docling_core.types.doc.base import ImageRefMode
@@ -48,8 +48,9 @@ from docling_core.types.doc.document import (
48
48
  ImageRef,
49
49
  InlineGroup,
50
50
  KeyValueItem,
51
+ ListGroup,
52
+ ListItem,
51
53
  NodeItem,
52
- OrderedList,
53
54
  PictureClassificationData,
54
55
  PictureItem,
55
56
  PictureMoleculeData,
@@ -58,7 +59,6 @@ from docling_core.types.doc.document import (
58
59
  TableItem,
59
60
  TextItem,
60
61
  TitleItem,
61
- UnorderedList,
62
62
  )
63
63
 
64
64
 
@@ -79,6 +79,14 @@ def _get_annotation_ser_result(
79
79
  )
80
80
 
81
81
 
82
+ class OrigListItemMarkerMode(str, Enum):
83
+ """Display mode for original list item marker."""
84
+
85
+ NEVER = "never"
86
+ ALWAYS = "always"
87
+ AUTO = "auto"
88
+
89
+
82
90
  class MarkdownParams(CommonParams):
83
91
  """Markdown-specific serialization parameters."""
84
92
 
@@ -93,6 +101,8 @@ class MarkdownParams(CommonParams):
93
101
  escape_html: bool = True
94
102
  include_annotations: bool = True
95
103
  mark_annotations: bool = False
104
+ orig_list_item_marker_mode: OrigListItemMarkerMode = OrigListItemMarkerMode.AUTO
105
+ ensure_valid_list_item_marker: bool = True
96
106
 
97
107
 
98
108
  class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
@@ -117,7 +127,7 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
117
127
  escape_html = True
118
128
  escape_underscores = True
119
129
  processing_pending = True
120
- if isinstance(item, (TitleItem, SectionHeaderItem)):
130
+ if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
121
131
  # case where processing/formatting should be applied first (in inner scope)
122
132
  processing_pending = False
123
133
  if (
@@ -127,7 +137,7 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
127
137
  (child_group := item.children[0].resolve(doc)), InlineGroup
128
138
  )
129
139
  ):
130
- # case of heading with inline
140
+ # case of inline within heading / list item
131
141
  ser_res = doc_serializer.serialize(item=child_group)
132
142
  text = ser_res.text
133
143
  for span in ser_res.spans:
@@ -140,8 +150,55 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
140
150
  formatting=item.formatting,
141
151
  hyperlink=item.hyperlink,
142
152
  )
143
- num_hashes = 1 if isinstance(item, TitleItem) else item.level + 1
144
- text_part = f"{num_hashes * '#'} {text}"
153
+
154
+ if isinstance(item, ListItem):
155
+ pieces: list[str] = []
156
+ case_auto = (
157
+ params.orig_list_item_marker_mode == OrigListItemMarkerMode.AUTO
158
+ and bool(re.search(r"[a-zA-Z0-9]", item.marker))
159
+ )
160
+ case_already_valid = (
161
+ params.ensure_valid_list_item_marker
162
+ and params.orig_list_item_marker_mode
163
+ != OrigListItemMarkerMode.NEVER
164
+ and (
165
+ item.marker in ["-", "*", "+"]
166
+ or re.fullmatch(r"\d+\.", item.marker)
167
+ )
168
+ )
169
+
170
+ # wrap with outer marker (if applicable)
171
+ if params.ensure_valid_list_item_marker and not case_already_valid:
172
+ assert item.parent and isinstance(
173
+ (list_group := item.parent.resolve(doc)), ListGroup
174
+ )
175
+ if list_group.first_item_is_enumerated(doc) and (
176
+ params.orig_list_item_marker_mode != OrigListItemMarkerMode.AUTO
177
+ or not item.marker
178
+ ):
179
+ pos = -1
180
+ for i, child in enumerate(list_group.children):
181
+ if child.resolve(doc) == item:
182
+ pos = i
183
+ break
184
+ md_marker = f"{pos + 1}."
185
+ else:
186
+ md_marker = "-"
187
+ pieces.append(md_marker)
188
+
189
+ # include original marker (if applicable)
190
+ if item.marker and (
191
+ params.orig_list_item_marker_mode == OrigListItemMarkerMode.ALWAYS
192
+ or case_auto
193
+ or case_already_valid
194
+ ):
195
+ pieces.append(item.marker)
196
+
197
+ pieces.append(text)
198
+ text_part = " ".join(pieces)
199
+ else:
200
+ num_hashes = 1 if isinstance(item, TitleItem) else item.level + 1
201
+ text_part = f"{num_hashes * '#'} {text}"
145
202
  elif isinstance(item, CodeItem):
146
203
  text_part = f"`{text}`" if is_inline_scope else f"```\n{text}\n```"
147
204
  escape_html = False
@@ -452,7 +509,7 @@ class MarkdownListSerializer(BaseModel, BaseListSerializer):
452
509
  def serialize(
453
510
  self,
454
511
  *,
455
- item: Union[UnorderedList, OrderedList],
512
+ item: ListGroup,
456
513
  doc_serializer: "BaseDocSerializer",
457
514
  doc: DoclingDocument,
458
515
  list_level: int = 0,
@@ -473,27 +530,24 @@ class MarkdownListSerializer(BaseModel, BaseListSerializer):
473
530
  sep = "\n"
474
531
  my_parts: list[SerializationResult] = []
475
532
  for p in parts:
476
- if p.text and p.text[0] == " " and my_parts:
477
- my_parts[-1].text = sep.join([my_parts[-1].text, p.text]) # update last
533
+ if (
534
+ my_parts
535
+ and p.text
536
+ and p.spans
537
+ and p.spans[0].item.parent
538
+ and isinstance(p.spans[0].item.parent.resolve(doc), InlineGroup)
539
+ ):
540
+ my_parts[-1].text = f"{my_parts[-1].text}{p.text}" # append to last
478
541
  my_parts[-1].spans.extend(p.spans)
479
542
  else:
480
543
  my_parts.append(p)
481
544
 
482
545
  indent_str = list_level * params.indent * " "
483
- is_ol = isinstance(item, OrderedList)
484
546
  text_res = sep.join(
485
547
  [
486
548
  # avoid additional marker on already evaled sublists
487
- (
488
- c.text
489
- if c.text and c.text[0] == " "
490
- else (
491
- f"{indent_str}"
492
- f"{'' if isinstance(c, _PageBreakSerResult) else (f'{i + 1}. ' if is_ol else '- ')}" # noqa: E501
493
- f"{c.text}"
494
- )
495
- )
496
- for i, c in enumerate(my_parts)
549
+ (c.text if c.text and c.text[0] == " " else f"{indent_str}{c.text}")
550
+ for c in my_parts
497
551
  ]
498
552
  )
499
553
  return create_ser_result(text=text_res, span_source=my_parts)
@@ -32,6 +32,7 @@ from .document import (
32
32
  ImageRef,
33
33
  InlineGroup,
34
34
  KeyValueItem,
35
+ ListGroup,
35
36
  ListItem,
36
37
  MiscAnnotation,
37
38
  NodeItem,
@@ -54,7 +54,7 @@ _logger = logging.getLogger(__name__)
54
54
 
55
55
  Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
56
56
  LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
57
- CURRENT_VERSION: Final = "1.4.0"
57
+ CURRENT_VERSION: Final = "1.5.0"
58
58
 
59
59
  DEFAULT_EXPORT_LABELS = {
60
60
  DocItemLabel.TITLE,
@@ -133,12 +133,6 @@ class MiscAnnotation(BaseAnnotation):
133
133
  content: Dict[str, Any]
134
134
 
135
135
 
136
- # deprecated aliases:
137
- BasePictureData = BaseAnnotation
138
- PictureDescriptionData = DescriptionAnnotation
139
- PictureMiscData = MiscAnnotation
140
-
141
-
142
136
  class ChartLine(BaseModel):
143
137
  """Represents a line in a line chart.
144
138
 
@@ -737,9 +731,11 @@ class ProvenanceItem(BaseModel):
737
731
  class ContentLayer(str, Enum):
738
732
  """ContentLayer."""
739
733
 
740
- BODY = "body"
741
- FURNITURE = "furniture"
742
- BACKGROUND = "background"
734
+ BODY = "body" # main content of the document
735
+ FURNITURE = "furniture" # eg page-headers/footers
736
+ BACKGROUND = "background" # eg watermarks
737
+ INVISIBLE = "invisible" # hidden or invisible text
738
+ NOTES = "notes" # author/speaker notes, corrections, etc
743
739
 
744
740
 
745
741
  DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
@@ -860,12 +856,27 @@ class GroupItem(NodeItem): # Container type, can't be a leaf node
860
856
  label: GroupLabel = GroupLabel.UNSPECIFIED
861
857
 
862
858
 
863
- class UnorderedList(GroupItem):
864
- """UnorderedList."""
859
+ class ListGroup(GroupItem):
860
+ """ListGroup."""
865
861
 
866
862
  label: typing.Literal[GroupLabel.LIST] = GroupLabel.LIST # type: ignore[assignment]
867
863
 
864
+ @field_validator("label", mode="before")
865
+ @classmethod
866
+ def patch_ordered(cls, value):
867
+ """patch_ordered."""
868
+ return GroupLabel.LIST if value == GroupLabel.ORDERED_LIST else value
869
+
870
+ def first_item_is_enumerated(self, doc: "DoclingDocument"):
871
+ """Whether the first list item is enumerated."""
872
+ return (
873
+ len(self.children) > 0
874
+ and isinstance(first_child := self.children[0].resolve(doc), ListItem)
875
+ and first_child.enumerated
876
+ )
877
+
868
878
 
879
+ @deprecated("Use ListGroup instead.")
869
880
  class OrderedList(GroupItem):
870
881
  """OrderedList."""
871
882
 
@@ -1752,7 +1763,7 @@ class DoclingDocument(BaseModel):
1752
1763
  ) # List[RefItem] = []
1753
1764
  body: GroupItem = GroupItem(name="_root_", self_ref="#/body") # List[RefItem] = []
1754
1765
 
1755
- groups: List[Union[OrderedList, UnorderedList, InlineGroup, GroupItem]] = []
1766
+ groups: List[Union[ListGroup, InlineGroup, GroupItem]] = []
1756
1767
  texts: List[
1757
1768
  Union[TitleItem, SectionHeaderItem, ListItem, CodeItem, FormulaItem, TextItem]
1758
1769
  ] = []
@@ -1938,7 +1949,7 @@ class DoclingDocument(BaseModel):
1938
1949
 
1939
1950
  self.form_items.append(item)
1940
1951
 
1941
- elif isinstance(item, (UnorderedList, OrderedList, InlineGroup)):
1952
+ elif isinstance(item, (ListGroup, InlineGroup)):
1942
1953
  item_label = "groups"
1943
1954
  item_index = len(self.groups)
1944
1955
 
@@ -2160,16 +2171,16 @@ class DoclingDocument(BaseModel):
2160
2171
  # TODO: refactor add* methods below
2161
2172
  ###################################
2162
2173
 
2163
- def add_ordered_list(
2174
+ def add_list_group(
2164
2175
  self,
2165
2176
  name: Optional[str] = None,
2166
2177
  parent: Optional[NodeItem] = None,
2167
2178
  content_layer: Optional[ContentLayer] = None,
2168
- ) -> GroupItem:
2169
- """add_ordered_list."""
2179
+ ) -> ListGroup:
2180
+ """add_list_group."""
2170
2181
  _parent = parent or self.body
2171
2182
  cref = f"#/groups/{len(self.groups)}"
2172
- group = OrderedList(self_ref=cref, parent=_parent.get_ref())
2183
+ group = ListGroup(self_ref=cref, parent=_parent.get_ref())
2173
2184
  if name is not None:
2174
2185
  group.name = name
2175
2186
  if content_layer:
@@ -2179,6 +2190,21 @@ class DoclingDocument(BaseModel):
2179
2190
  _parent.children.append(RefItem(cref=cref))
2180
2191
  return group
2181
2192
 
2193
+ @deprecated("Use add_list_group() instead.")
2194
+ def add_ordered_list(
2195
+ self,
2196
+ name: Optional[str] = None,
2197
+ parent: Optional[NodeItem] = None,
2198
+ content_layer: Optional[ContentLayer] = None,
2199
+ ) -> GroupItem:
2200
+ """add_ordered_list."""
2201
+ return self.add_list_group(
2202
+ name=name,
2203
+ parent=parent,
2204
+ content_layer=content_layer,
2205
+ )
2206
+
2207
+ @deprecated("Use add_list_group() instead.")
2182
2208
  def add_unordered_list(
2183
2209
  self,
2184
2210
  name: Optional[str] = None,
@@ -2186,25 +2212,18 @@ class DoclingDocument(BaseModel):
2186
2212
  content_layer: Optional[ContentLayer] = None,
2187
2213
  ) -> GroupItem:
2188
2214
  """add_unordered_list."""
2189
- _parent = parent or self.body
2190
- cref = f"#/groups/{len(self.groups)}"
2191
- group = UnorderedList(self_ref=cref, parent=_parent.get_ref())
2192
- if name is not None:
2193
- group.name = name
2194
- if content_layer:
2195
- group.content_layer = content_layer
2196
-
2197
- self.groups.append(group)
2198
- _parent.children.append(RefItem(cref=cref))
2199
- return group
2215
+ return self.add_list_group(
2216
+ name=name,
2217
+ parent=parent,
2218
+ content_layer=content_layer,
2219
+ )
2200
2220
 
2201
2221
  def add_inline_group(
2202
2222
  self,
2203
2223
  name: Optional[str] = None,
2204
2224
  parent: Optional[NodeItem] = None,
2205
2225
  content_layer: Optional[ContentLayer] = None,
2206
- # marker: Optional[UnorderedList.ULMarker] = None,
2207
- ) -> GroupItem:
2226
+ ) -> InlineGroup:
2208
2227
  """add_inline_group."""
2209
2228
  _parent = parent or self.body
2210
2229
  cref = f"#/groups/{len(self.groups)}"
@@ -2232,14 +2251,8 @@ class DoclingDocument(BaseModel):
2232
2251
  :param parent: Optional[NodeItem]: (Default value = None)
2233
2252
 
2234
2253
  """
2235
- if label == GroupLabel.LIST:
2236
- return self.add_unordered_list(
2237
- name=name,
2238
- parent=parent,
2239
- content_layer=content_layer,
2240
- )
2241
- elif label == GroupLabel.ORDERED_LIST:
2242
- return self.add_ordered_list(
2254
+ if label in [GroupLabel.LIST, GroupLabel.ORDERED_LIST]:
2255
+ return self.add_list_group(
2243
2256
  name=name,
2244
2257
  parent=parent,
2245
2258
  content_layer=content_layer,
@@ -2291,17 +2304,16 @@ class DoclingDocument(BaseModel):
2291
2304
  :param parent: Optional[NodeItem]: (Default value = None)
2292
2305
 
2293
2306
  """
2294
- if not isinstance(parent, (OrderedList, UnorderedList)):
2295
- warnings.warn("ListItem's parent must be a list group.", DeprecationWarning)
2296
-
2297
- if not parent:
2298
- parent = self.body
2307
+ if not isinstance(parent, ListGroup):
2308
+ warnings.warn(
2309
+ "ListItem parent must be a list group, creating one on the fly.",
2310
+ DeprecationWarning,
2311
+ )
2312
+ parent = self.add_list_group(parent=parent)
2299
2313
 
2300
2314
  if not orig:
2301
2315
  orig = text
2302
2316
 
2303
- marker = marker or "-"
2304
-
2305
2317
  text_index = len(self.texts)
2306
2318
  cref = f"#/texts/{text_index}"
2307
2319
  list_item = ListItem(
@@ -2310,7 +2322,7 @@ class DoclingDocument(BaseModel):
2310
2322
  self_ref=cref,
2311
2323
  parent=parent.get_ref(),
2312
2324
  enumerated=enumerated,
2313
- marker=marker,
2325
+ marker=marker or "",
2314
2326
  formatting=formatting,
2315
2327
  hyperlink=hyperlink,
2316
2328
  )
@@ -2864,7 +2876,7 @@ class DoclingDocument(BaseModel):
2864
2876
  if (
2865
2877
  root_is_picture
2866
2878
  and not traverse_pictures
2867
- and isinstance(child, DocItem)
2879
+ and isinstance(child, NodeItem)
2868
2880
  and child.self_ref not in allowed_pic_refs
2869
2881
  ):
2870
2882
  continue
@@ -4056,18 +4068,18 @@ class DoclingDocument(BaseModel):
4056
4068
  DocumentToken.ORDERED_LIST.value,
4057
4069
  DocumentToken.UNORDERED_LIST.value,
4058
4070
  ]:
4059
- list_label = GroupLabel.LIST
4071
+ GroupLabel.LIST
4060
4072
  enum_marker = ""
4061
4073
  enum_value = 0
4062
4074
  if tag_name == DocumentToken.ORDERED_LIST.value:
4063
- list_label = GroupLabel.ORDERED_LIST
4075
+ GroupLabel.ORDERED_LIST
4064
4076
 
4065
4077
  list_item_pattern = (
4066
4078
  rf"<(?P<tag>{DocItemLabel.LIST_ITEM})>.*?</(?P=tag)>"
4067
4079
  )
4068
4080
  li_pattern = re.compile(list_item_pattern, re.DOTALL)
4069
4081
  # Add list group:
4070
- new_list = doc.add_group(label=list_label, name="list")
4082
+ new_list = doc.add_list_group(name="list")
4071
4083
  # Pricess list items
4072
4084
  for li_match in li_pattern.finditer(full_chunk):
4073
4085
  enum_value += 1
@@ -4385,17 +4397,17 @@ class DoclingDocument(BaseModel):
4385
4397
  @field_validator("version")
4386
4398
  @classmethod
4387
4399
  def check_version_is_compatible(cls, v: str) -> str:
4388
- """Check if this document version is compatible with current version."""
4389
- current_match = re.match(VERSION_PATTERN, CURRENT_VERSION)
4400
+ """Check if this document version is compatible with SDK schema version."""
4401
+ sdk_match = re.match(VERSION_PATTERN, CURRENT_VERSION)
4390
4402
  doc_match = re.match(VERSION_PATTERN, v)
4391
4403
  if (
4392
4404
  doc_match is None
4393
- or current_match is None
4394
- or doc_match["major"] != current_match["major"]
4395
- or doc_match["minor"] > current_match["minor"]
4405
+ or sdk_match is None
4406
+ or doc_match["major"] != sdk_match["major"]
4407
+ or doc_match["minor"] > sdk_match["minor"]
4396
4408
  ):
4397
4409
  raise ValueError(
4398
- f"incompatible version {v} with schema version {CURRENT_VERSION}"
4410
+ f"Doc version {v} incompatible with SDK schema version {CURRENT_VERSION}"
4399
4411
  )
4400
4412
  else:
4401
4413
  return CURRENT_VERSION
@@ -4425,9 +4437,7 @@ class DoclingDocument(BaseModel):
4425
4437
  ):
4426
4438
  if isinstance(item, ListItem) and (
4427
4439
  item.parent is None
4428
- or not isinstance(
4429
- item.parent.resolve(doc=self), (OrderedList, UnorderedList)
4430
- )
4440
+ or not isinstance(item.parent.resolve(doc=self), ListGroup)
4431
4441
  ):
4432
4442
  if isinstance(prev, ListItem) and (
4433
4443
  prev.parent is None or prev.parent.resolve(self) == self.body
@@ -4440,11 +4450,7 @@ class DoclingDocument(BaseModel):
4440
4450
  for curr_list_items in reversed(misplaced_list_items):
4441
4451
 
4442
4452
  # add group
4443
- new_group = (
4444
- OrderedList(self_ref="#")
4445
- if curr_list_items[0].enumerated
4446
- else UnorderedList(self_ref="#")
4447
- )
4453
+ new_group = ListGroup(self_ref="#")
4448
4454
  self.insert_item_before_sibling(
4449
4455
  new_item=new_group,
4450
4456
  sibling=curr_list_items[0],
@@ -4531,3 +4537,10 @@ class DoclingDocument(BaseModel):
4531
4537
  self.key_value_items = item_lists["key_value_items"] # type: ignore
4532
4538
  self.form_items = item_lists["form_items"] # type: ignore
4533
4539
  self.body = new_body
4540
+
4541
+
4542
+ # deprecated aliases (kept for backwards compatibility):
4543
+ BasePictureData = BaseAnnotation
4544
+ PictureDescriptionData = DescriptionAnnotation
4545
+ PictureMiscData = MiscAnnotation
4546
+ UnorderedList = ListGroup
@@ -77,7 +77,7 @@ class GroupLabel(str, Enum):
77
77
  LIST = (
78
78
  "list" # group label for list container (not the list-items) (e.g. HTML <ul/>)
79
79
  )
80
- ORDERED_LIST = "ordered_list" # List with enumeration (e.g. HTML <ol/>)
80
+ ORDERED_LIST = "ordered_list" # deprecated
81
81
  CHAPTER = "chapter"
82
82
  SECTION = "section"
83
83
  SHEET = "sheet"
@@ -122,6 +122,8 @@ class BoundingRectangle(BaseModel):
122
122
  p_1 = ((self.r_x1 + self.r_x2) / 2.0, (self.r_y1 + self.r_y2) / 2.0)
123
123
 
124
124
  delta_x, delta_y = p_1[0] - p_0[0], p_1[1] - p_0[1]
125
+ if self.coord_origin == CoordOrigin.TOPLEFT:
126
+ delta_y = -delta_y
125
127
 
126
128
  if abs(delta_y) < 1.0e-3:
127
129
  angle = 0.0
@@ -131,8 +133,7 @@ class BoundingRectangle(BaseModel):
131
133
  angle = math.atan(delta_y / delta_x)
132
134
  if delta_x < 0:
133
135
  angle += np.pi
134
- if angle < 0:
135
- angle += 2 * np.pi
136
+ angle = angle % (2 * np.pi)
136
137
  return angle
137
138
 
138
139
  @property
@@ -5,8 +5,10 @@
5
5
 
6
6
  """Utils for document types."""
7
7
 
8
+ import html
8
9
  import unicodedata
9
10
  from pathlib import Path
11
+ from typing import Optional
10
12
 
11
13
 
12
14
  def relative_path(src: Path, target: Path) -> Path:
@@ -49,14 +51,23 @@ def relative_path(src: Path, target: Path) -> Path:
49
51
  return Path(*up_segments, *down_segments)
50
52
 
51
53
 
52
- def get_html_tag_with_text_direction(html_tag: str, text: str) -> str:
54
+ def get_html_tag_with_text_direction(
55
+ html_tag: str, text: str, attrs: Optional[dict] = None
56
+ ) -> str:
53
57
  """Form the HTML element with tag, text, and optional dir attribute."""
54
- text_dir = get_text_direction(text)
55
-
56
- if text_dir == "ltr":
57
- return f"<{html_tag}>{text}</{html_tag}>"
58
- else:
59
- return f'<{html_tag} dir="{text_dir}">{text}</{html_tag}>'
58
+ my_attrs = attrs or {}
59
+ if (dir := my_attrs.get("dir")) is not None and dir != "ltr":
60
+ my_attrs["dir"] = get_text_direction(text)
61
+ pieces: list[str] = [html_tag]
62
+ if my_attrs:
63
+ attrs_str = " ".join(
64
+ [
65
+ f'{html.escape(k, quote=False)}="{html.escape(my_attrs[k], quote=False)}"'
66
+ for k in my_attrs
67
+ ]
68
+ )
69
+ pieces.append(attrs_str)
70
+ return f"<{' '.join(pieces)}>{text}</{html_tag}>"
60
71
 
61
72
 
62
73
  def get_text_direction(text: str) -> str:
@@ -6,6 +6,7 @@
6
6
  """File-related utilities."""
7
7
 
8
8
  import importlib
9
+ import re
9
10
  import tempfile
10
11
  from io import BytesIO
11
12
  from pathlib import Path
@@ -76,6 +77,32 @@ def resolve_source_to_stream(
76
77
  agent_name = f"docling-core/{importlib.metadata.version('docling-core')}"
77
78
  req_headers["user-agent"] = agent_name
78
79
 
80
+ # Google Docs, Files, PDF URLs, Spreadsheets, Presentations: convert to export URL
81
+ google_doc_id = re.search(
82
+ r"google\.com\/(file|document|spreadsheets|presentation)\/d\/([\w-]+)",
83
+ str(http_url),
84
+ )
85
+ if google_doc_id:
86
+ doc_type = google_doc_id.group(1)
87
+ doc_id = google_doc_id.group(2)
88
+
89
+ if doc_type == "file":
90
+ http_url = TypeAdapter(AnyHttpUrl).validate_python(
91
+ f"https://drive.google.com/uc?export=download&id={doc_id}"
92
+ )
93
+ elif doc_type == "document":
94
+ http_url = TypeAdapter(AnyHttpUrl).validate_python(
95
+ f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
96
+ )
97
+ elif doc_type == "spreadsheets":
98
+ http_url = TypeAdapter(AnyHttpUrl).validate_python(
99
+ f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=xlsx"
100
+ )
101
+ elif doc_type == "presentation":
102
+ http_url = TypeAdapter(AnyHttpUrl).validate_python(
103
+ f"https://docs.google.com/presentation/d/{doc_id}/export?format=pptx"
104
+ )
105
+
79
106
  # fetch the page
80
107
  res = requests.get(http_url, stream=True, headers=req_headers)
81
108
  res.raise_for_status()
@@ -26,7 +26,6 @@ from docling_core.types.doc import (
26
26
  TextItem,
27
27
  )
28
28
  from docling_core.types.doc.document import ContentLayer, GroupItem, ListItem, TableData
29
- from docling_core.types.doc.labels import GroupLabel
30
29
  from docling_core.types.legacy_doc.base import (
31
30
  BaseCell,
32
31
  BaseText,
@@ -486,7 +485,7 @@ def legacy_to_docling_document(legacy_doc: DsDocument) -> DoclingDocument: # no
486
485
  item_type in "list-item-level-1" or item.name in {"list", "list-item"}
487
486
  ):
488
487
  if current_list is None:
489
- current_list = doc.add_group(label=GroupLabel.LIST, name="list")
488
+ current_list = doc.add_list_group(name="list")
490
489
  else:
491
490
  current_list = None
492
491
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.38.2
3
+ Version: 2.40.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -19,19 +19,19 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
19
19
  docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
20
20
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
21
21
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
22
- docling_core/transforms/chunker/hierarchical_chunker.py,sha256=7Fpwwsn2BoiR12KGPrn8fU1uuhqBLp85MRLMF0aIsL8,8281
22
+ docling_core/transforms/chunker/hierarchical_chunker.py,sha256=uDf-qGiIT_4JUEg9NOdzvDqAPOTqycKJ-jEpDkV3jJU,8243
23
23
  docling_core/transforms/chunker/hybrid_chunker.py,sha256=xjkz8hy3tXXzkJzf7QMFOEq_v8V7Jcs9tCY0Mxjge74,12548
24
24
  docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
25
25
  docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
26
26
  docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
27
27
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
28
28
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
29
- docling_core/transforms/serializer/base.py,sha256=ZFIiZeplL-QbBs9EDUb1awqxapQ23PsApVetJtAs7Vs,6891
30
- docling_core/transforms/serializer/common.py,sha256=RO2KWl3sZq_PIvzWzuGJTWntKjLOAy3n17cgZi84AAs,19163
31
- docling_core/transforms/serializer/doctags.py,sha256=PuAExlP-2HxcDSP_R_phtYQU0yKBW94RrPgb85IUxck,19905
32
- docling_core/transforms/serializer/html.py,sha256=SZgQa0QnknEoRwMFLdgmVsLQqLF2rQl3D7XyEZzUHCE,37151
29
+ docling_core/transforms/serializer/base.py,sha256=s3Anl_3-QJM1t29Bz-iOgLhAcfG3BZuwZqdYTi5Xfr0,6846
30
+ docling_core/transforms/serializer/common.py,sha256=Dkw9axJqU2qlZuEFRDa6Av11PIL2ejOOOCAahtoK9sA,19106
31
+ docling_core/transforms/serializer/doctags.py,sha256=TD0yAm1qSVy-GsE6svpUAI-Yqjcf2rrTZ3ac9YU3gbE,19858
32
+ docling_core/transforms/serializer/html.py,sha256=oxnUhszRPBINiK1tq2dwf5QjTCrIV_q15vsrPVqBeME,38988
33
33
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
34
- docling_core/transforms/serializer/markdown.py,sha256=2wV0ydqWKSm-HAW94gF0IRBpjWgoqUjL4JHRYS8DDgY,21803
34
+ docling_core/transforms/serializer/markdown.py,sha256=VwonuAkuOPmQM7ibDIGvQBHOqhTcTJ_t187fLQQiNPo,23951
35
35
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
36
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
37
  docling_core/transforms/visualizer/layout_visualizer.py,sha256=zHzQTWcy-z1J2BcsjvakLkrp8pgStgnxhDl8YqIAotY,8035
@@ -39,13 +39,13 @@ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao
39
39
  docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
40
40
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
41
41
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
42
- docling_core/types/doc/__init__.py,sha256=pchsIq-9FH_kCTyuyDdB8L4yV77pmnxPwT7399xrqxI,1626
42
+ docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
43
43
  docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
44
- docling_core/types/doc/document.py,sha256=0e-v_N2ALA66aUZduK1Rii_PcKjffxNKWR9V8Lp0clg,156894
45
- docling_core/types/doc/labels.py,sha256=JiciRK7_DOkebsrfQ6PVCvS__TsKgWn1ANk84BeB14k,7359
46
- docling_core/types/doc/page.py,sha256=GV9UnGCvvqs6KD_ac3hF6b_NH6M6IevsL5iSt8WWVCI,41221
44
+ docling_core/types/doc/document.py,sha256=9-n0tngXLTRVAkqGHe3bDSh1OJbBt87EW2nV8GdOGME,157406
45
+ docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
46
+ docling_core/types/doc/page.py,sha256=J_4ThNhrdhrfPtNMBTDHi-CQBvraejAwUaqVjyDeeeI,41288
47
47
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
48
- docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
48
+ docling_core/types/doc/utils.py,sha256=JpAi7x9DHksFlIj_gRJPcSZOHa8AHvVPEO_K9aSnw4c,2608
49
49
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
50
50
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
51
51
  docling_core/types/io/__init__.py,sha256=7QYvFRaDE0AzBg8e7tvsVNlLBbCbAbQ9rP2TU8aXR1k,350
@@ -68,15 +68,15 @@ docling_core/types/rec/statement.py,sha256=YwcV4CbVaAbzNwh14yJ_6Py3Ww0XnUJrEEUiK
68
68
  docling_core/types/rec/subject.py,sha256=PRCERGTMs4YhR3_Ne6jogkm41zYg8uUWb1yFpM7atm4,2572
69
69
  docling_core/utils/__init__.py,sha256=VauNNpWRHG0_ISKrsy5-gTxicrdQZSau6qMfuMl3iqk,120
70
70
  docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,874
71
- docling_core/utils/file.py,sha256=GzX0pclvewwPoqHJSaVUuULzSJwJgkCUwgKgJ7G5ohQ,5628
71
+ docling_core/utils/file.py,sha256=CSNclJGL2OwLIc8DQFdoLxr22FUc4_UC7zS6pNrFfkQ,6858
72
72
  docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6LZDtC8,2290
73
73
  docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
74
- docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
74
+ docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
75
75
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
76
76
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
77
- docling_core-2.38.2.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
- docling_core-2.38.2.dist-info/METADATA,sha256=E1ONe70u3yt98iAtpnlfsS9hTV4Cpx8Kn5Q0Zz6o_XY,6453
79
- docling_core-2.38.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- docling_core-2.38.2.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
- docling_core-2.38.2.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
- docling_core-2.38.2.dist-info/RECORD,,
77
+ docling_core-2.40.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
+ docling_core-2.40.0.dist-info/METADATA,sha256=A6_Wz_CJzmHa20USMUgQPDMpN5-S3f8VpNrx7ns1SXo,6453
79
+ docling_core-2.40.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ docling_core-2.40.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
+ docling_core-2.40.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
+ docling_core-2.40.0.dist-info/RECORD,,