docling-core 2.33.1__py3-none-any.whl → 2.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -202,6 +202,16 @@ class BaseDocSerializer(ABC):
202
202
  """Hook for strikethrough formatting serialization."""
203
203
  ...
204
204
 
205
+ @abstractmethod
206
+ def serialize_subscript(self, text: str, **kwargs: Any) -> str:
207
+ """Hook for subscript formatting serialization."""
208
+ ...
209
+
210
+ @abstractmethod
211
+ def serialize_superscript(self, text: str, **kwargs: Any) -> str:
212
+ """Hook for superscript formatting serialization."""
213
+ ...
214
+
205
215
  @abstractmethod
206
216
  def serialize_hyperlink(
207
217
  self,
@@ -239,6 +249,15 @@ class BaseDocSerializer(ABC):
239
249
  """Serialize the item's captions."""
240
250
  ...
241
251
 
252
+ @abstractmethod
253
+ def serialize_annotations(
254
+ self,
255
+ item: DocItem,
256
+ **kwargs: Any,
257
+ ) -> SerializationResult:
258
+ """Serialize the item's annotations."""
259
+ ...
260
+
242
261
  @abstractmethod
243
262
  def get_excluded_refs(self, **kwargs: Any) -> set[str]:
244
263
  """Get references to excluded items."""
@@ -257,3 +276,18 @@ class BaseSerializerProvider(ABC):
257
276
  def get_serializer(self, doc: DoclingDocument) -> BaseDocSerializer:
258
277
  """Get a the associated serializer."""
259
278
  ...
279
+
280
+
281
+ class BaseAnnotationSerializer(ABC):
282
+ """Base class for annotation serializers."""
283
+
284
+ @abstractmethod
285
+ def serialize(
286
+ self,
287
+ *,
288
+ item: DocItem,
289
+ doc: DoclingDocument,
290
+ **kwargs: Any,
291
+ ) -> SerializationResult:
292
+ """Serializes the passed annotation."""
293
+ ...
@@ -15,6 +15,7 @@ from pydantic import AnyUrl, BaseModel, ConfigDict, NonNegativeInt, computed_fie
15
15
  from typing_extensions import Self, override
16
16
 
17
17
  from docling_core.transforms.serializer.base import (
18
+ BaseAnnotationSerializer,
18
19
  BaseDocSerializer,
19
20
  BaseFallbackSerializer,
20
21
  BaseFormSerializer,
@@ -30,6 +31,7 @@ from docling_core.transforms.serializer.base import (
30
31
  from docling_core.types.doc.document import (
31
32
  DOCUMENT_TOKENS_EXPORT_LABELS,
32
33
  ContentLayer,
34
+ DescriptionAnnotation,
33
35
  DocItem,
34
36
  DoclingDocument,
35
37
  FloatingItem,
@@ -41,9 +43,10 @@ from docling_core.types.doc.document import (
41
43
  OrderedList,
42
44
  PictureClassificationData,
43
45
  PictureDataType,
44
- PictureDescriptionData,
45
46
  PictureItem,
46
47
  PictureMoleculeData,
48
+ Script,
49
+ TableAnnotationType,
47
50
  TableItem,
48
51
  TextItem,
49
52
  UnorderedList,
@@ -122,7 +125,9 @@ def _iterate_items(
122
125
  yield item
123
126
 
124
127
 
125
- def _get_picture_annotation_text(annotation: PictureDataType) -> Optional[str]:
128
+ def _get_annotation_text(
129
+ annotation: Union[PictureDataType, TableAnnotationType],
130
+ ) -> Optional[str]:
126
131
  result = None
127
132
  if isinstance(annotation, PictureClassificationData):
128
133
  predicted_class = (
@@ -132,7 +137,7 @@ def _get_picture_annotation_text(annotation: PictureDataType) -> Optional[str]:
132
137
  )
133
138
  if predicted_class is not None:
134
139
  result = predicted_class.replace("_", " ")
135
- elif isinstance(annotation, PictureDescriptionData):
140
+ elif isinstance(annotation, DescriptionAnnotation):
136
141
  result = annotation.text
137
142
  elif isinstance(annotation, PictureMoleculeData):
138
143
  result = annotation.smi
@@ -211,6 +216,8 @@ class DocSerializer(BaseModel, BaseDocSerializer):
211
216
  list_serializer: BaseListSerializer
212
217
  inline_serializer: BaseInlineSerializer
213
218
 
219
+ annotation_serializer: BaseAnnotationSerializer
220
+
214
221
  params: CommonParams = CommonParams()
215
222
 
216
223
  _excluded_refs_cache: dict[str, set[str]] = {}
@@ -449,6 +456,10 @@ class DocSerializer(BaseModel, BaseDocSerializer):
449
456
  res = self.serialize_underline(text=res)
450
457
  if formatting.strikethrough:
451
458
  res = self.serialize_strikethrough(text=res)
459
+ if formatting.script == Script.SUB:
460
+ res = self.serialize_subscript(text=res)
461
+ elif formatting.script == Script.SUPER:
462
+ res = self.serialize_superscript(text=res)
452
463
  if params.include_hyperlinks and hyperlink:
453
464
  res = self.serialize_hyperlink(text=res, hyperlink=hyperlink)
454
465
  return res
@@ -473,6 +484,16 @@ class DocSerializer(BaseModel, BaseDocSerializer):
473
484
  """Hook for strikethrough formatting serialization."""
474
485
  return text
475
486
 
487
+ @override
488
+ def serialize_subscript(self, text: str, **kwargs: Any) -> str:
489
+ """Hook for subscript formatting serialization."""
490
+ return text
491
+
492
+ @override
493
+ def serialize_superscript(self, text: str, **kwargs: Any) -> str:
494
+ """Hook for superscript formatting serialization."""
495
+ return text
496
+
476
497
  @override
477
498
  def serialize_hyperlink(
478
499
  self,
@@ -505,6 +526,19 @@ class DocSerializer(BaseModel, BaseDocSerializer):
505
526
  text_res = ""
506
527
  return create_ser_result(text=text_res, span_source=results)
507
528
 
529
+ @override
530
+ def serialize_annotations(
531
+ self,
532
+ item: DocItem,
533
+ **kwargs: Any,
534
+ ) -> SerializationResult:
535
+ """Serialize the item's annotations."""
536
+ return self.annotation_serializer.serialize(
537
+ item=item,
538
+ doc=self.doc,
539
+ **kwargs,
540
+ )
541
+
508
542
  def _get_applicable_pages(self) -> Optional[list[int]]:
509
543
  pages = {
510
544
  item.prov[0].page_no: ...
@@ -7,6 +7,7 @@ from pydantic import BaseModel
7
7
  from typing_extensions import override
8
8
 
9
9
  from docling_core.transforms.serializer.base import (
10
+ BaseAnnotationSerializer,
10
11
  BaseDocSerializer,
11
12
  BaseFallbackSerializer,
12
13
  BaseFormSerializer,
@@ -17,12 +18,14 @@ from docling_core.transforms.serializer.base import (
17
18
  BaseTableSerializer,
18
19
  BaseTextSerializer,
19
20
  SerializationResult,
21
+ Span,
20
22
  )
21
23
  from docling_core.transforms.serializer.common import (
22
24
  CommonParams,
23
25
  DocSerializer,
24
26
  create_ser_result,
25
27
  )
28
+ from docling_core.types.doc.base import BoundingBox
26
29
  from docling_core.types.doc.document import (
27
30
  CodeItem,
28
31
  DocItem,
@@ -38,6 +41,7 @@ from docling_core.types.doc.document import (
38
41
  PictureItem,
39
42
  PictureMoleculeData,
40
43
  PictureTabularChartData,
44
+ ProvenanceItem,
41
45
  TableItem,
42
46
  TextItem,
43
47
  UnorderedList,
@@ -414,6 +418,39 @@ class DocTagsListSerializer(BaseModel, BaseListSerializer):
414
418
  class DocTagsInlineSerializer(BaseInlineSerializer):
415
419
  """DocTags-specific inline group serializer."""
416
420
 
421
+ def _get_inline_location_tags(
422
+ self, doc: DoclingDocument, item: InlineGroup, params: DocTagsParams
423
+ ) -> SerializationResult:
424
+
425
+ prov: Optional[ProvenanceItem] = None
426
+ boxes: list[BoundingBox] = []
427
+ doc_items: list[DocItem] = []
428
+ for it, _ in doc.iterate_items(root=item):
429
+ if isinstance(it, DocItem):
430
+ for prov in it.prov:
431
+ boxes.append(prov.bbox)
432
+ doc_items.append(it)
433
+ if prov is None:
434
+ return create_ser_result()
435
+
436
+ bbox = BoundingBox.enclosing_bbox(boxes=boxes)
437
+
438
+ # using last seen prov as reference for page dims
439
+ page_w, page_h = doc.pages[prov.page_no].size.as_tuple()
440
+
441
+ loc_str = DocumentToken.get_location(
442
+ bbox=bbox.to_top_left_origin(page_h).as_tuple(),
443
+ page_w=page_w,
444
+ page_h=page_h,
445
+ xsize=params.xsize,
446
+ ysize=params.ysize,
447
+ )
448
+
449
+ return SerializationResult(
450
+ text=loc_str,
451
+ spans=[Span(item=it) for it in doc_items],
452
+ )
453
+
417
454
  @override
418
455
  def serialize(
419
456
  self,
@@ -428,12 +465,23 @@ class DocTagsInlineSerializer(BaseInlineSerializer):
428
465
  """Serializes the passed item."""
429
466
  my_visited = visited if visited is not None else set()
430
467
  params = DocTagsParams(**kwargs)
431
- parts = doc_serializer.get_parts(
432
- item=item,
433
- list_level=list_level,
434
- is_inline_scope=True,
435
- visited=my_visited,
436
- **kwargs,
468
+ parts: List[SerializationResult] = []
469
+ if params.add_location:
470
+ inline_loc_tags_ser_res = self._get_inline_location_tags(
471
+ doc=doc,
472
+ item=item,
473
+ params=params,
474
+ )
475
+ parts.append(inline_loc_tags_ser_res)
476
+ params.add_location = False # suppress children location serialization
477
+ parts.extend(
478
+ doc_serializer.get_parts(
479
+ item=item,
480
+ list_level=list_level,
481
+ is_inline_scope=True,
482
+ visited=my_visited,
483
+ **{**kwargs, **params.model_dump()},
484
+ )
437
485
  )
438
486
  wrap_tag = DocumentToken.INLINE.value
439
487
  delim = _get_delim(params=params)
@@ -460,6 +508,15 @@ class DocTagsFallbackSerializer(BaseFallbackSerializer):
460
508
  return create_ser_result()
461
509
 
462
510
 
511
+ class DocTagsAnnotationSerializer(BaseAnnotationSerializer):
512
+ """DocTags-specific annotation serializer."""
513
+
514
+ @override
515
+ def serialize(self, *, item: DocItem, **kwargs: Any) -> SerializationResult:
516
+ """Serializes the item's annotations."""
517
+ return create_ser_result()
518
+
519
+
463
520
  class DocTagsDocSerializer(DocSerializer):
464
521
  """DocTags-specific document serializer."""
465
522
 
@@ -473,6 +530,8 @@ class DocTagsDocSerializer(DocSerializer):
473
530
  list_serializer: BaseListSerializer = DocTagsListSerializer()
474
531
  inline_serializer: BaseInlineSerializer = DocTagsInlineSerializer()
475
532
 
533
+ annotation_serializer: BaseAnnotationSerializer = DocTagsAnnotationSerializer()
534
+
476
535
  params: DocTagsParams = DocTagsParams()
477
536
 
478
537
  @override
@@ -21,6 +21,7 @@ from pydantic import AnyUrl, BaseModel
21
21
  from typing_extensions import override
22
22
 
23
23
  from docling_core.transforms.serializer.base import (
24
+ BaseAnnotationSerializer,
24
25
  BaseDocSerializer,
25
26
  BaseFallbackSerializer,
26
27
  BaseFormSerializer,
@@ -35,7 +36,7 @@ from docling_core.transforms.serializer.base import (
35
36
  from docling_core.transforms.serializer.common import (
36
37
  CommonParams,
37
38
  DocSerializer,
38
- _get_picture_annotation_text,
39
+ _get_annotation_text,
39
40
  create_ser_result,
40
41
  )
41
42
  from docling_core.transforms.serializer.html_styles import (
@@ -47,6 +48,7 @@ from docling_core.types.doc.base import ImageRefMode
47
48
  from docling_core.types.doc.document import (
48
49
  CodeItem,
49
50
  ContentLayer,
51
+ DescriptionAnnotation,
50
52
  DocItem,
51
53
  DoclingDocument,
52
54
  FloatingItem,
@@ -59,7 +61,9 @@ from docling_core.types.doc.document import (
59
61
  ListItem,
60
62
  NodeItem,
61
63
  OrderedList,
64
+ PictureClassificationData,
62
65
  PictureItem,
66
+ PictureMoleculeData,
63
67
  PictureTabularChartData,
64
68
  SectionHeaderItem,
65
69
  TableCell,
@@ -758,14 +762,7 @@ class HTMLFallbackSerializer(BaseFallbackSerializer):
758
762
  """HTML-specific fallback serializer."""
759
763
 
760
764
  @override
761
- def serialize(
762
- self,
763
- *,
764
- item: NodeItem,
765
- doc_serializer: "BaseDocSerializer",
766
- doc: DoclingDocument,
767
- **kwargs: Any,
768
- ) -> SerializationResult:
765
+ def serialize(self, *, item: NodeItem, **kwargs: Any) -> SerializationResult:
769
766
  """Fallback serializer for items not handled by other serializers."""
770
767
  if isinstance(item, DocItem):
771
768
  return create_ser_result(
@@ -777,6 +774,42 @@ class HTMLFallbackSerializer(BaseFallbackSerializer):
777
774
  return create_ser_result()
778
775
 
779
776
 
777
+ class HTMLAnnotationSerializer(BaseModel, BaseAnnotationSerializer):
778
+ """HTML-specific annotation serializer."""
779
+
780
+ def serialize(
781
+ self,
782
+ *,
783
+ item: DocItem,
784
+ doc: DoclingDocument,
785
+ **kwargs: Any,
786
+ ) -> SerializationResult:
787
+ """Serializes the passed annotation to HTML format."""
788
+ res_parts: list[SerializationResult] = []
789
+ for ann in item.get_annotations():
790
+ if isinstance(
791
+ ann,
792
+ (PictureClassificationData, DescriptionAnnotation, PictureMoleculeData),
793
+ ):
794
+ if ann_text := _get_annotation_text(ann):
795
+ text_dir = get_text_direction(ann_text)
796
+ dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
797
+ ann_ser_res = create_ser_result(
798
+ text=(
799
+ f'<div data-annotation-kind="{ann.kind}"{dir_str}>'
800
+ f"{html.escape(ann_text)}"
801
+ f"</div>"
802
+ ),
803
+ span_source=item,
804
+ )
805
+ res_parts.append(ann_ser_res)
806
+
807
+ return create_ser_result(
808
+ text=" ".join([r.text for r in res_parts if r.text]),
809
+ span_source=res_parts,
810
+ )
811
+
812
+
780
813
  class HTMLDocSerializer(DocSerializer):
781
814
  """HTML-specific document serializer."""
782
815
 
@@ -790,6 +823,8 @@ class HTMLDocSerializer(DocSerializer):
790
823
  list_serializer: BaseListSerializer = HTMLListSerializer()
791
824
  inline_serializer: BaseInlineSerializer = HTMLInlineSerializer()
792
825
 
826
+ annotation_serializer: BaseAnnotationSerializer = HTMLAnnotationSerializer()
827
+
793
828
  params: HTMLParams = HTMLParams()
794
829
 
795
830
  @override
@@ -812,6 +847,16 @@ class HTMLDocSerializer(DocSerializer):
812
847
  """Apply HTML-specific strikethrough serialization."""
813
848
  return f"<del>{text}</del>"
814
849
 
850
+ @override
851
+ def serialize_subscript(self, text: str, **kwargs: Any) -> str:
852
+ """Apply HTML-specific subscript serialization."""
853
+ return f"<sub>{text}</sub>"
854
+
855
+ @override
856
+ def serialize_superscript(self, text: str, **kwargs: Any) -> str:
857
+ """Apply HTML-specific superscript serialization."""
858
+ return f"<sup>{text}</sup>"
859
+
815
860
  @override
816
861
  def serialize_hyperlink(
817
862
  self,
@@ -968,20 +1013,13 @@ class HTMLDocSerializer(DocSerializer):
968
1013
  results.append(cap_ser_res)
969
1014
 
970
1015
  if params.include_annotations and item.self_ref not in excluded_refs:
971
- if isinstance(item, PictureItem):
972
- for ann in item.annotations:
973
- if ann_text := _get_picture_annotation_text(annotation=ann):
974
- text_dir = get_text_direction(ann_text)
975
- dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
976
- ann_ser_res = create_ser_result(
977
- text=(
978
- f'<div data-annotation-kind="{ann.kind}"{dir_str}>'
979
- f"{html.escape(ann_text)}"
980
- f"</div>"
981
- ),
982
- span_source=item,
983
- )
984
- results.append(ann_ser_res)
1016
+ if isinstance(item, (PictureItem, TableItem)):
1017
+ ann_res = self.serialize_annotations(
1018
+ item=item,
1019
+ **kwargs,
1020
+ )
1021
+ if ann_res.text:
1022
+ results.append(ann_res)
985
1023
 
986
1024
  text_res = params.caption_delim.join([r.text for r in results])
987
1025
  if text_res:
@@ -15,6 +15,7 @@ from tabulate import tabulate
15
15
  from typing_extensions import override
16
16
 
17
17
  from docling_core.transforms.serializer.base import (
18
+ BaseAnnotationSerializer,
18
19
  BaseDocSerializer,
19
20
  BaseFallbackSerializer,
20
21
  BaseFormSerializer,
@@ -29,7 +30,7 @@ from docling_core.transforms.serializer.base import (
29
30
  from docling_core.transforms.serializer.common import (
30
31
  CommonParams,
31
32
  DocSerializer,
32
- _get_picture_annotation_text,
33
+ _get_annotation_text,
33
34
  _PageBreakSerResult,
34
35
  create_ser_result,
35
36
  )
@@ -37,6 +38,7 @@ from docling_core.types.doc.base import ImageRefMode
37
38
  from docling_core.types.doc.document import (
38
39
  CodeItem,
39
40
  ContentLayer,
41
+ DescriptionAnnotation,
40
42
  DocItem,
41
43
  DoclingDocument,
42
44
  FloatingItem,
@@ -48,7 +50,9 @@ from docling_core.types.doc.document import (
48
50
  KeyValueItem,
49
51
  NodeItem,
50
52
  OrderedList,
53
+ PictureClassificationData,
51
54
  PictureItem,
55
+ PictureMoleculeData,
52
56
  PictureTabularChartData,
53
57
  SectionHeaderItem,
54
58
  TableItem,
@@ -58,6 +62,23 @@ from docling_core.types.doc.document import (
58
62
  )
59
63
 
60
64
 
65
+ def _get_annotation_ser_result(
66
+ ann_kind: str, ann_text: str, mark_annotation: bool, doc_item: DocItem
67
+ ):
68
+ return create_ser_result(
69
+ text=(
70
+ (
71
+ f'<!--<annotation kind="{ann_kind}">-->'
72
+ f"{ann_text}"
73
+ f"<!--<annotation/>-->"
74
+ )
75
+ if mark_annotation
76
+ else ann_text
77
+ ),
78
+ span_source=doc_item,
79
+ )
80
+
81
+
61
82
  class MarkdownParams(CommonParams):
62
83
  """Markdown-specific serialization parameters."""
63
84
 
@@ -136,6 +157,49 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
136
157
  return create_ser_result(text=text, span_source=res_parts)
137
158
 
138
159
 
160
+ class MarkdownAnnotationSerializer(BaseModel, BaseAnnotationSerializer):
161
+ """Markdown-specific annotation serializer."""
162
+
163
+ def serialize(
164
+ self,
165
+ *,
166
+ item: DocItem,
167
+ doc: DoclingDocument,
168
+ **kwargs: Any,
169
+ ) -> SerializationResult:
170
+ """Serialize the item's annotations."""
171
+ params = MarkdownParams(**kwargs)
172
+
173
+ res_parts: list[SerializationResult] = []
174
+ for ann in item.get_annotations():
175
+ if isinstance(
176
+ ann,
177
+ (
178
+ PictureClassificationData,
179
+ DescriptionAnnotation,
180
+ PictureMoleculeData,
181
+ ),
182
+ ):
183
+ if ann_text := _get_annotation_text(ann):
184
+ ann_res = create_ser_result(
185
+ text=(
186
+ (
187
+ f'<!--<annotation kind="{ann.kind}">-->'
188
+ f"{ann_text}"
189
+ f"<!--<annotation/>-->"
190
+ )
191
+ if params.mark_annotations
192
+ else ann_text
193
+ ),
194
+ span_source=item,
195
+ )
196
+ res_parts.append(ann_res)
197
+ return create_ser_result(
198
+ text="\n\n".join([r.text for r in res_parts if r.text]),
199
+ span_source=item,
200
+ )
201
+
202
+
139
203
  class MarkdownTableSerializer(BaseTableSerializer):
140
204
  """Markdown-specific table item serializer."""
141
205
 
@@ -149,6 +213,7 @@ class MarkdownTableSerializer(BaseTableSerializer):
149
213
  **kwargs: Any,
150
214
  ) -> SerializationResult:
151
215
  """Serializes the passed item."""
216
+ params = MarkdownParams(**kwargs)
152
217
  res_parts: list[SerializationResult] = []
153
218
 
154
219
  cap_res = doc_serializer.serialize_captions(
@@ -159,6 +224,16 @@ class MarkdownTableSerializer(BaseTableSerializer):
159
224
  res_parts.append(cap_res)
160
225
 
161
226
  if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
227
+
228
+ if params.include_annotations:
229
+
230
+ ann_res = doc_serializer.serialize_annotations(
231
+ item=item,
232
+ **kwargs,
233
+ )
234
+ if ann_res.text:
235
+ res_parts.append(ann_res)
236
+
162
237
  rows = [
163
238
  [
164
239
  # make sure that md tables are not broken
@@ -214,22 +289,12 @@ class MarkdownPictureSerializer(BasePictureSerializer):
214
289
 
215
290
  if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
216
291
  if params.include_annotations:
217
-
218
- for ann in item.annotations:
219
- if ann_text := _get_picture_annotation_text(annotation=ann):
220
- ann_ser_res = create_ser_result(
221
- text=(
222
- (
223
- f'<!--<annotation kind="{ann.kind}">-->'
224
- f"{ann_text}"
225
- f"<!--<annotation/>-->"
226
- )
227
- if params.mark_annotations
228
- else ann_text
229
- ),
230
- span_source=item,
231
- )
232
- res_parts.append(ann_ser_res)
292
+ ann_res = doc_serializer.serialize_annotations(
293
+ item=item,
294
+ **kwargs,
295
+ )
296
+ if ann_res.text:
297
+ res_parts.append(ann_res)
233
298
 
234
299
  img_res = self._serialize_image_part(
235
300
  item=item,
@@ -257,7 +322,7 @@ class MarkdownPictureSerializer(BasePictureSerializer):
257
322
  res_parts.append(
258
323
  create_ser_result(text=md_table_content, span_source=item)
259
324
  )
260
- text_res = "\n\n".join([r.text for r in res_parts])
325
+ text_res = "\n\n".join([r.text for r in res_parts if r.text])
261
326
 
262
327
  return create_ser_result(text=text_res, span_source=res_parts)
263
328
 
@@ -471,6 +536,8 @@ class MarkdownDocSerializer(DocSerializer):
471
536
  list_serializer: BaseListSerializer = MarkdownListSerializer()
472
537
  inline_serializer: BaseInlineSerializer = MarkdownInlineSerializer()
473
538
 
539
+ annotation_serializer: BaseAnnotationSerializer = MarkdownAnnotationSerializer()
540
+
474
541
  params: MarkdownParams = MarkdownParams()
475
542
 
476
543
  @override
@@ -15,7 +15,7 @@ import warnings
15
15
  from enum import Enum
16
16
  from io import BytesIO
17
17
  from pathlib import Path
18
- from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
18
+ from typing import Any, Dict, Final, List, Literal, Optional, Sequence, Tuple, Union
19
19
  from urllib.parse import unquote
20
20
 
21
21
  import pandas as pd
@@ -30,6 +30,7 @@ from pydantic import (
30
30
  computed_field,
31
31
  field_validator,
32
32
  model_validator,
33
+ validate_call,
33
34
  )
34
35
  from tabulate import tabulate
35
36
  from typing_extensions import Annotated, Self, deprecated
@@ -53,7 +54,7 @@ _logger = logging.getLogger(__name__)
53
54
 
54
55
  Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
55
56
  LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
56
- CURRENT_VERSION: Final = "1.3.0"
57
+ CURRENT_VERSION: Final = "1.4.0"
57
58
 
58
59
  DEFAULT_EXPORT_LABELS = {
59
60
  DocItemLabel.TITLE,
@@ -85,8 +86,8 @@ DOCUMENT_TOKENS_EXPORT_LABELS.update(
85
86
  )
86
87
 
87
88
 
88
- class BasePictureData(BaseModel):
89
- """BasePictureData."""
89
+ class BaseAnnotation(BaseModel):
90
+ """Base class for all annotation types."""
90
91
 
91
92
  kind: str
92
93
 
@@ -98,7 +99,7 @@ class PictureClassificationClass(BaseModel):
98
99
  confidence: float
99
100
 
100
101
 
101
- class PictureClassificationData(BasePictureData):
102
+ class PictureClassificationData(BaseAnnotation):
102
103
  """PictureClassificationData."""
103
104
 
104
105
  kind: Literal["classification"] = "classification"
@@ -106,19 +107,18 @@ class PictureClassificationData(BasePictureData):
106
107
  predicted_classes: List[PictureClassificationClass]
107
108
 
108
109
 
109
- class PictureDescriptionData(BasePictureData):
110
- """PictureDescriptionData."""
110
+ class DescriptionAnnotation(BaseAnnotation):
111
+ """DescriptionAnnotation."""
111
112
 
112
113
  kind: Literal["description"] = "description"
113
114
  text: str
114
115
  provenance: str
115
116
 
116
117
 
117
- class PictureMoleculeData(BaseModel):
118
+ class PictureMoleculeData(BaseAnnotation):
118
119
  """PictureMoleculeData."""
119
120
 
120
121
  kind: Literal["molecule_data"] = "molecule_data"
121
-
122
122
  smi: str
123
123
  confidence: float
124
124
  class_name: str
@@ -126,13 +126,19 @@ class PictureMoleculeData(BaseModel):
126
126
  provenance: str
127
127
 
128
128
 
129
- class PictureMiscData(BaseModel):
130
- """PictureMiscData."""
129
+ class MiscAnnotation(BaseAnnotation):
130
+ """MiscAnnotation."""
131
131
 
132
132
  kind: Literal["misc"] = "misc"
133
133
  content: Dict[str, Any]
134
134
 
135
135
 
136
+ # deprecated aliases:
137
+ BasePictureData = BaseAnnotation
138
+ PictureDescriptionData = DescriptionAnnotation
139
+ PictureMiscData = MiscAnnotation
140
+
141
+
136
142
  class ChartLine(BaseModel):
137
143
  """Represents a line in a line chart.
138
144
 
@@ -196,7 +202,7 @@ class ChartPoint(BaseModel):
196
202
  value: Tuple[float, float]
197
203
 
198
204
 
199
- class PictureChartData(BaseModel):
205
+ class PictureChartData(BaseAnnotation):
200
206
  """Base class for picture chart data.
201
207
 
202
208
  Attributes:
@@ -381,10 +387,10 @@ class PictureTabularChartData(PictureChartData):
381
387
 
382
388
  PictureDataType = Annotated[
383
389
  Union[
390
+ DescriptionAnnotation,
391
+ MiscAnnotation,
384
392
  PictureClassificationData,
385
- PictureDescriptionData,
386
393
  PictureMoleculeData,
387
- PictureMiscData,
388
394
  PictureTabularChartData,
389
395
  PictureLineChartData,
390
396
  PictureBarChartData,
@@ -818,6 +824,18 @@ class DocItem(
818
824
  )
819
825
  return page_image.crop(crop_bbox.as_tuple())
820
826
 
827
+ def get_annotations(self) -> Sequence[BaseAnnotation]:
828
+ """Get the annotations of this DocItem."""
829
+ return []
830
+
831
+
832
+ class Script(str, Enum):
833
+ """Text script position."""
834
+
835
+ BASELINE = "baseline"
836
+ SUB = "sub"
837
+ SUPER = "super"
838
+
821
839
 
822
840
  class Formatting(BaseModel):
823
841
  """Formatting."""
@@ -826,6 +844,7 @@ class Formatting(BaseModel):
826
844
  italic: bool = False
827
845
  underline: bool = False
828
846
  strikethrough: bool = False
847
+ script: Script = Script.BASELINE
829
848
 
830
849
 
831
850
  class TextItem(DocItem):
@@ -1182,6 +1201,19 @@ class PictureItem(FloatingItem):
1182
1201
  text = serializer.serialize(item=self).text
1183
1202
  return text
1184
1203
 
1204
+ def get_annotations(self) -> Sequence[BaseAnnotation]:
1205
+ """Get the annotations of this PictureItem."""
1206
+ return self.annotations
1207
+
1208
+
1209
+ TableAnnotationType = Annotated[
1210
+ Union[
1211
+ DescriptionAnnotation,
1212
+ MiscAnnotation,
1213
+ ],
1214
+ Field(discriminator="kind"),
1215
+ ]
1216
+
1185
1217
 
1186
1218
  class TableItem(FloatingItem):
1187
1219
  """TableItem."""
@@ -1192,6 +1224,8 @@ class TableItem(FloatingItem):
1192
1224
  DocItemLabel.TABLE,
1193
1225
  ] = DocItemLabel.TABLE
1194
1226
 
1227
+ annotations: List[TableAnnotationType] = []
1228
+
1195
1229
  def export_to_dataframe(self) -> pd.DataFrame:
1196
1230
  """Export the table as a Pandas DataFrame."""
1197
1231
  if self.data.num_rows == 0 or self.data.num_cols == 0:
@@ -1438,6 +1472,15 @@ class TableItem(FloatingItem):
1438
1472
  text = serializer.serialize(item=self).text
1439
1473
  return text
1440
1474
 
1475
+ @validate_call
1476
+ def add_annotation(self, annotation: TableAnnotationType) -> None:
1477
+ """Add an annotation to the table."""
1478
+ self.annotations.append(annotation)
1479
+
1480
+ def get_annotations(self) -> Sequence[BaseAnnotation]:
1481
+ """Get the annotations of this TableItem."""
1482
+ return self.annotations
1483
+
1441
1484
 
1442
1485
  class GraphCell(BaseModel):
1443
1486
  """GraphCell."""
@@ -1776,6 +1819,18 @@ class DoclingDocument(BaseModel):
1776
1819
  item.parent = parent_ref
1777
1820
 
1778
1821
  self.form_items.append(item)
1822
+
1823
+ elif isinstance(item, (UnorderedList, OrderedList, InlineGroup)):
1824
+ item_label = "groups"
1825
+ item_index = len(self.groups)
1826
+
1827
+ cref = f"#/{item_label}/{item_index}"
1828
+
1829
+ item.self_ref = cref
1830
+ item.parent = parent_ref
1831
+
1832
+ self.groups.append(item)
1833
+
1779
1834
  else:
1780
1835
  raise ValueError(f"Item {item} is not supported for insertion")
1781
1836
 
@@ -2111,8 +2166,8 @@ class DoclingDocument(BaseModel):
2111
2166
  :param parent: Optional[NodeItem]: (Default value = None)
2112
2167
 
2113
2168
  """
2114
- if not parent:
2115
- parent = self.body
2169
+ if not isinstance(parent, (OrderedList, UnorderedList)):
2170
+ raise ValueError("ListItem's parent must be a list group")
2116
2171
 
2117
2172
  if not orig:
2118
2173
  orig = text
@@ -2267,6 +2322,7 @@ class DoclingDocument(BaseModel):
2267
2322
  parent: Optional[NodeItem] = None,
2268
2323
  label: DocItemLabel = DocItemLabel.TABLE,
2269
2324
  content_layer: Optional[ContentLayer] = None,
2325
+ annotations: Optional[list[TableAnnotationType]] = None,
2270
2326
  ):
2271
2327
  """add_table.
2272
2328
 
@@ -2284,7 +2340,11 @@ class DoclingDocument(BaseModel):
2284
2340
  cref = f"#/tables/{table_index}"
2285
2341
 
2286
2342
  tbl_item = TableItem(
2287
- label=label, data=data, self_ref=cref, parent=parent.get_ref()
2343
+ label=label,
2344
+ data=data,
2345
+ self_ref=cref,
2346
+ parent=parent.get_ref(),
2347
+ annotations=annotations or [],
2288
2348
  )
2289
2349
  if prov:
2290
2350
  tbl_item.prov.append(prov)
@@ -2301,7 +2361,7 @@ class DoclingDocument(BaseModel):
2301
2361
 
2302
2362
  def add_picture(
2303
2363
  self,
2304
- annotations: List[PictureDataType] = [],
2364
+ annotations: Optional[List[PictureDataType]] = None,
2305
2365
  image: Optional[ImageRef] = None,
2306
2366
  caption: Optional[Union[TextItem, RefItem]] = None,
2307
2367
  prov: Optional[ProvenanceItem] = None,
@@ -2310,7 +2370,7 @@ class DoclingDocument(BaseModel):
2310
2370
  ):
2311
2371
  """add_picture.
2312
2372
 
2313
- :param data: List[PictureData]: (Default value = [])
2373
+ :param data: Optional[List[PictureData]]: (Default value = None)
2314
2374
  :param caption: Optional[Union[TextItem:
2315
2375
  :param RefItem]]: (Default value = None)
2316
2376
  :param prov: Optional[ProvenanceItem]: (Default value = None)
@@ -2324,7 +2384,7 @@ class DoclingDocument(BaseModel):
2324
2384
 
2325
2385
  fig_item = PictureItem(
2326
2386
  label=DocItemLabel.PICTURE,
2327
- annotations=annotations,
2387
+ annotations=annotations or [],
2328
2388
  image=image,
2329
2389
  self_ref=cref,
2330
2390
  parent=parent.get_ref(),
@@ -3589,6 +3649,52 @@ class DoclingDocument(BaseModel):
3589
3649
 
3590
3650
  return (GraphData(cells=cells, links=links), overall_prov)
3591
3651
 
3652
+ def _add_text(
3653
+ full_chunk: str,
3654
+ bbox: Optional[BoundingBox],
3655
+ pg_width: int,
3656
+ pg_height: int,
3657
+ page_no: int,
3658
+ tag_name: str,
3659
+ doc_label: DocItemLabel,
3660
+ doc: DoclingDocument,
3661
+ parent: Optional[NodeItem],
3662
+ ):
3663
+ # For everything else, treat as text
3664
+ text_content = extract_inner_text(full_chunk)
3665
+ element_prov = (
3666
+ ProvenanceItem(
3667
+ bbox=bbox.resize_by_scale(pg_width, pg_height),
3668
+ charspan=(0, len(text_content)),
3669
+ page_no=page_no,
3670
+ )
3671
+ if bbox
3672
+ else None
3673
+ )
3674
+
3675
+ content_layer = ContentLayer.BODY
3676
+ if tag_name in [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]:
3677
+ content_layer = ContentLayer.FURNITURE
3678
+
3679
+ if doc_label == DocItemLabel.SECTION_HEADER:
3680
+ # Extract level from tag_name (e.g. "section_level_header_1" -> 1)
3681
+ level = int(tag_name.split("_")[-1])
3682
+ doc.add_heading(
3683
+ text=text_content,
3684
+ level=level,
3685
+ prov=element_prov,
3686
+ parent=parent,
3687
+ content_layer=content_layer,
3688
+ )
3689
+ else:
3690
+ doc.add_text(
3691
+ label=doc_label,
3692
+ text=text_content,
3693
+ prov=element_prov,
3694
+ parent=parent,
3695
+ content_layer=content_layer,
3696
+ )
3697
+
3592
3698
  # doc = DoclingDocument(name="Document")
3593
3699
  for pg_idx, doctag_page in enumerate(doctag_document.pages):
3594
3700
  page_doctags = doctag_page.tokens
@@ -3623,7 +3729,7 @@ class DoclingDocument(BaseModel):
3623
3729
  tag_pattern = (
3624
3730
  rf"<(?P<tag>{DocItemLabel.TITLE}|{DocItemLabel.DOCUMENT_INDEX}|"
3625
3731
  rf"{DocItemLabel.CHECKBOX_UNSELECTED}|{DocItemLabel.CHECKBOX_SELECTED}|"
3626
- rf"{DocItemLabel.TEXT}|{DocItemLabel.PAGE_HEADER}|"
3732
+ rf"{DocItemLabel.TEXT}|{DocItemLabel.PAGE_HEADER}|{GroupLabel.INLINE}|"
3627
3733
  rf"{DocItemLabel.PAGE_FOOTER}|{DocItemLabel.FORMULA}|"
3628
3734
  rf"{DocItemLabel.CAPTION}|{DocItemLabel.PICTURE}|"
3629
3735
  rf"{DocItemLabel.FOOTNOTE}|{DocItemLabel.CODE}|"
@@ -3648,7 +3754,7 @@ class DoclingDocument(BaseModel):
3648
3754
  # no closing tag; only the existence of the item is recovered
3649
3755
  full_chunk = f"<{tag_name}></{tag_name}>"
3650
3756
 
3651
- doc_label = tag_to_doclabel.get(tag_name, DocItemLabel.PARAGRAPH)
3757
+ doc_label = tag_to_doclabel.get(tag_name, DocItemLabel.TEXT)
3652
3758
 
3653
3759
  if tag_name == DocumentToken.OTSL.value:
3654
3760
  table_data = parse_table_content(full_chunk)
@@ -3671,6 +3777,24 @@ class DoclingDocument(BaseModel):
3671
3777
  else:
3672
3778
  doc.add_table(data=table_data, caption=caption)
3673
3779
 
3780
+ elif tag_name == GroupLabel.INLINE:
3781
+ inline_group = doc.add_inline_group()
3782
+ content = match.group("content")
3783
+ common_bbox = extract_bounding_box(content)
3784
+ for item_match in pattern.finditer(content):
3785
+ item_tag = item_match.group("tag")
3786
+ _add_text(
3787
+ full_chunk=item_match.group(0),
3788
+ bbox=common_bbox,
3789
+ pg_width=pg_width,
3790
+ pg_height=pg_height,
3791
+ page_no=page_no,
3792
+ tag_name=item_tag,
3793
+ doc_label=tag_to_doclabel.get(item_tag, DocItemLabel.TEXT),
3794
+ doc=doc,
3795
+ parent=inline_group,
3796
+ )
3797
+
3674
3798
  elif tag_name in [DocItemLabel.PICTURE, DocItemLabel.CHART]:
3675
3799
  caption, caption_bbox = extract_caption(full_chunk)
3676
3800
  table_data = None
@@ -3820,38 +3944,17 @@ class DoclingDocument(BaseModel):
3820
3944
  )
3821
3945
  else:
3822
3946
  # For everything else, treat as text
3823
- text_content = extract_inner_text(full_chunk)
3824
- element_prov = (
3825
- ProvenanceItem(
3826
- bbox=bbox.resize_by_scale(pg_width, pg_height),
3827
- charspan=(0, len(text_content)),
3828
- page_no=page_no,
3829
- )
3830
- if bbox
3831
- else None
3947
+ _add_text(
3948
+ full_chunk=full_chunk,
3949
+ bbox=bbox,
3950
+ pg_width=pg_width,
3951
+ pg_height=pg_height,
3952
+ page_no=page_no,
3953
+ tag_name=tag_name,
3954
+ doc_label=doc_label,
3955
+ doc=doc,
3956
+ parent=None,
3832
3957
  )
3833
-
3834
- content_layer = ContentLayer.BODY
3835
- if tag_name in [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]:
3836
- content_layer = ContentLayer.FURNITURE
3837
-
3838
- if doc_label == DocItemLabel.SECTION_HEADER:
3839
- # Extract level from tag_name (e.g. "section_level_header_1" -> 1)
3840
- level = int(tag_name.split("_")[-1])
3841
- doc.add_heading(
3842
- text=text_content,
3843
- level=level,
3844
- prov=element_prov,
3845
- content_layer=content_layer,
3846
- )
3847
- else:
3848
- doc.add_text(
3849
- label=doc_label,
3850
- text=text_content,
3851
- prov=element_prov,
3852
- content_layer=content_layer,
3853
- )
3854
-
3855
3958
  return doc
3856
3959
 
3857
3960
  @deprecated("Use save_as_doctags instead.")
@@ -4149,3 +4252,58 @@ class DoclingDocument(BaseModel):
4149
4252
  raise ValueError("Document hierachy is inconsistent.")
4150
4253
 
4151
4254
  return d
4255
+
4256
+ @model_validator(mode="after")
4257
+ def validate_misplaced_list_items(self):
4258
+ """validate_misplaced_list_items."""
4259
+ # find list items without list parent, putting succesive ones together
4260
+ misplaced_list_items: list[list[ListItem]] = []
4261
+ prev: Optional[NodeItem] = None
4262
+ for item, _ in self.iterate_items(
4263
+ traverse_pictures=True,
4264
+ included_content_layers={c for c in ContentLayer},
4265
+ with_groups=True, # so that we can distinguish neighboring lists
4266
+ ):
4267
+ if isinstance(item, ListItem) and (
4268
+ item.parent is None
4269
+ or not isinstance(
4270
+ item.parent.resolve(doc=self), (OrderedList, UnorderedList)
4271
+ )
4272
+ ):
4273
+ # non_group_list_items.append(item)
4274
+ if prev is None or not isinstance(prev, ListItem): # if new list
4275
+ misplaced_list_items.append([item])
4276
+ else:
4277
+ misplaced_list_items[-1].append(item)
4278
+ prev = item
4279
+
4280
+ for curr_list_items in reversed(misplaced_list_items):
4281
+
4282
+ # add group
4283
+ new_group = (
4284
+ OrderedList(self_ref="#")
4285
+ if curr_list_items[0].enumerated
4286
+ else UnorderedList(self_ref="#")
4287
+ )
4288
+ self.insert_item_before_sibling(
4289
+ new_item=new_group,
4290
+ sibling=curr_list_items[0],
4291
+ )
4292
+
4293
+ # delete list items from document (should not be affected by group addition)
4294
+ self.delete_items(node_items=curr_list_items)
4295
+
4296
+ # add list items to new group
4297
+ for li in curr_list_items:
4298
+ self.add_list_item(
4299
+ text=li.text,
4300
+ enumerated=li.enumerated,
4301
+ marker=li.marker,
4302
+ orig=li.orig,
4303
+ prov=li.prov[0] if li.prov else None,
4304
+ parent=new_group,
4305
+ content_layer=li.content_layer,
4306
+ formatting=li.formatting,
4307
+ hyperlink=li.hyperlink,
4308
+ )
4309
+ return self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.33.1
3
+ Version: 2.34.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -26,12 +26,12 @@ docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP
26
26
  docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
27
27
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
28
28
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
29
- docling_core/transforms/serializer/base.py,sha256=9bgpWA0oMmZNRc3yIuZVnu5bJ1glClBsswtVF1vYwMI,6046
30
- docling_core/transforms/serializer/common.py,sha256=mkajw0QRL--WgVL42Vlp2e2PuUQVh79D6EKP4_3YKy0,18112
31
- docling_core/transforms/serializer/doctags.py,sha256=mEmRWVuebcG5pZcR1_HX146cyUk0_FjaLQtMXSgh9hs,17870
32
- docling_core/transforms/serializer/html.py,sha256=_HN1WFKH_WJkxtZrmvm1a6-UDxsEGt_ChWdUysS1qjY,35843
29
+ docling_core/transforms/serializer/base.py,sha256=ZFIiZeplL-QbBs9EDUb1awqxapQ23PsApVetJtAs7Vs,6891
30
+ docling_core/transforms/serializer/common.py,sha256=WP-qO-woidrKyvZ56m0vlKMysoLrMzzZtHSCIwsl3ek,19119
31
+ docling_core/transforms/serializer/doctags.py,sha256=PuAExlP-2HxcDSP_R_phtYQU0yKBW94RrPgb85IUxck,19905
32
+ docling_core/transforms/serializer/html.py,sha256=KiywrroYBS3yk07gQizlmk3oqkXg_NpFwE0VF31_Z-I,37112
33
33
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
34
- docling_core/transforms/serializer/markdown.py,sha256=ussKqIptiKPTCRNjy3edjap4DOsy52no-FLSeAyv9S0,18759
34
+ docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
35
35
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
36
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
37
  docling_core/transforms/visualizer/layout_visualizer.py,sha256=ulXxWGIl69-HMKDPFk_XKgNCgQeDNc969PVt_X0-drA,7823
@@ -40,7 +40,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
40
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
41
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
42
42
  docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
43
- docling_core/types/doc/document.py,sha256=rdevCAZDpMPzPlZmAtiucvBM8h_AjuIZpQDaqjpknl0,142796
43
+ docling_core/types/doc/document.py,sha256=VKZg1VT-H8gTXybgY6lRlcKKR3f6mFDB9UzcrLtII5I,148197
44
44
  docling_core/types/doc/labels.py,sha256=vp4h3e7AmBvezRmgrfuPehjAHTZOufphErLB4ENhdME,7171
45
45
  docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
46
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -73,9 +73,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
73
73
  docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
74
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
75
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
76
- docling_core-2.33.1.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
- docling_core-2.33.1.dist-info/METADATA,sha256=tib261Wc010Z2y6_lgKcXdO2OKPG8pdf2n1CoIYSDBA,6453
78
- docling_core-2.33.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- docling_core-2.33.1.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
80
- docling_core-2.33.1.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
81
- docling_core-2.33.1.dist-info/RECORD,,
76
+ docling_core-2.34.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.34.0.dist-info/METADATA,sha256=853af3C8OZrbXzZqYFhfDfu-gtG4m7my-6wqzCir_cg,6453
78
+ docling_core-2.34.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
+ docling_core-2.34.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
80
+ docling_core-2.34.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
81
+ docling_core-2.34.0.dist-info/RECORD,,