docling-core 2.31.1__py3-none-any.whl → 2.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -9,6 +9,7 @@ from functools import cached_property
9
9
  from typing import Any, Iterable, Iterator, Optional, Union
10
10
 
11
11
  from pydantic import BaseModel, ConfigDict, Field, computed_field, model_validator
12
+ from transformers import PreTrainedTokenizerBase
12
13
 
13
14
  from docling_core.transforms.chunker.hierarchical_chunker import (
14
15
  ChunkingSerializerProvider,
@@ -70,23 +71,36 @@ class HybridChunker(BaseChunker):
70
71
  @model_validator(mode="before")
71
72
  @classmethod
72
73
  def _patch(cls, data: Any) -> Any:
73
- if isinstance(data, dict) and (tokenizer := data.get("tokenizer")):
74
+ if isinstance(data, dict):
75
+ tokenizer = data.get("tokenizer")
74
76
  max_tokens = data.get("max_tokens")
75
- if isinstance(tokenizer, BaseTokenizer):
76
- pass
77
- else:
77
+ if not isinstance(tokenizer, BaseTokenizer) and (
78
+ # some legacy param passed:
79
+ tokenizer is not None
80
+ or max_tokens is not None
81
+ ):
78
82
  from docling_core.transforms.chunker.tokenizer.huggingface import (
79
83
  HuggingFaceTokenizer,
80
84
  )
81
85
 
86
+ warnings.warn(
87
+ "Deprecated initialization parameter types for HybridChunker. "
88
+ "For updated usage check out "
89
+ "https://docling-project.github.io/docling/examples/hybrid_chunking/",
90
+ DeprecationWarning,
91
+ )
92
+
82
93
  if isinstance(tokenizer, str):
83
94
  data["tokenizer"] = HuggingFaceTokenizer.from_pretrained(
84
95
  model_name=tokenizer,
85
96
  max_tokens=max_tokens,
86
97
  )
87
- else:
88
- # migrate previous HF-based tokenizers
89
- kwargs = {"tokenizer": tokenizer}
98
+ elif tokenizer is None or isinstance(
99
+ tokenizer, PreTrainedTokenizerBase
100
+ ):
101
+ kwargs = {
102
+ "tokenizer": tokenizer or _get_default_tokenizer().tokenizer
103
+ }
90
104
  if max_tokens is not None:
91
105
  kwargs["max_tokens"] = max_tokens
92
106
  data["tokenizer"] = HuggingFaceTokenizer(**kwargs)
@@ -141,7 +155,6 @@ class HybridChunker(BaseChunker):
141
155
  meta = DocMeta(
142
156
  doc_items=doc_items,
143
157
  headings=doc_chunk.meta.headings,
144
- captions=doc_chunk.meta.captions,
145
158
  origin=doc_chunk.meta.origin,
146
159
  )
147
160
  window_text = (
@@ -220,7 +233,9 @@ class HybridChunker(BaseChunker):
220
233
  )
221
234
  if available_length <= 0:
222
235
  warnings.warn(
223
- f"Headers and captions for this chunk are longer than the total amount of size for the chunk, chunk will be ignored: {doc_chunk.text=}" # noqa
236
+ "Headers and captions for this chunk are longer than the total "
237
+ "amount of size for the chunk, chunk will be ignored: "
238
+ f"{doc_chunk.text=}"
224
239
  )
225
240
  return []
226
241
  text = doc_chunk.text
@@ -235,10 +250,10 @@ class HybridChunker(BaseChunker):
235
250
  num_chunks = len(chunks)
236
251
  while window_end < num_chunks:
237
252
  chunk = chunks[window_end]
238
- headings_and_captions = (chunk.meta.headings, chunk.meta.captions)
253
+ headings = chunk.meta.headings
239
254
  ready_to_append = False
240
255
  if window_start == window_end:
241
- current_headings_and_captions = headings_and_captions
256
+ current_headings = headings
242
257
  window_end += 1
243
258
  first_chunk_of_window = chunk
244
259
  else:
@@ -249,13 +264,12 @@ class HybridChunker(BaseChunker):
249
264
  text=self.delim.join([chk.text for chk in chks]),
250
265
  meta=DocMeta(
251
266
  doc_items=doc_items,
252
- headings=current_headings_and_captions[0],
253
- captions=current_headings_and_captions[1],
267
+ headings=current_headings,
254
268
  origin=chunk.meta.origin,
255
269
  ),
256
270
  )
257
271
  if (
258
- headings_and_captions == current_headings_and_captions
272
+ headings == current_headings
259
273
  and self._count_chunk_tokens(doc_chunk=candidate) <= self.max_tokens
260
274
  ):
261
275
  # there is room to include the new chunk so add it to the window and
@@ -1,10 +1,11 @@
1
1
  """HuggingFace tokenization."""
2
2
 
3
- import sys
3
+ import json
4
4
  from os import PathLike
5
5
  from typing import Optional, Union
6
6
 
7
- from pydantic import ConfigDict, PositiveInt, TypeAdapter, model_validator
7
+ from huggingface_hub import hf_hub_download
8
+ from pydantic import ConfigDict, model_validator
8
9
  from typing_extensions import Self
9
10
 
10
11
  from docling_core.transforms.chunker.tokenizer.base import BaseTokenizer
@@ -28,16 +29,23 @@ class HuggingFaceTokenizer(BaseTokenizer):
28
29
 
29
30
  @model_validator(mode="after")
30
31
  def _patch(self) -> Self:
31
- if hasattr(self.tokenizer, "model_max_length"):
32
- model_max_tokens: PositiveInt = TypeAdapter(PositiveInt).validate_python(
33
- self.tokenizer.model_max_length
34
- )
35
- user_max_tokens = self.max_tokens or sys.maxsize
36
- self.max_tokens = min(model_max_tokens, user_max_tokens)
37
- elif self.max_tokens is None:
38
- raise ValueError(
39
- "max_tokens must be defined as model does not define model_max_length"
40
- )
32
+ if self.max_tokens is None:
33
+ try:
34
+ # try to use SentenceTransformers-specific config as that seems to be
35
+ # reliable (whenever available)
36
+ config_name = "sentence_bert_config.json"
37
+ config_path = hf_hub_download(
38
+ repo_id=self.tokenizer.name_or_path,
39
+ filename=config_name,
40
+ )
41
+ with open(config_path) as f:
42
+ data = json.load(f)
43
+ self.max_tokens = int(data["max_seq_length"])
44
+ except Exception as e:
45
+ raise RuntimeError(
46
+ "max_tokens could not be determined automatically; please set "
47
+ "explicitly."
48
+ ) from e
41
49
  return self
42
50
 
43
51
  def count_tokens(self, text: str):
@@ -11,7 +11,7 @@ from functools import cached_property
11
11
  from pathlib import Path
12
12
  from typing import Any, Iterable, Optional, Tuple, Union
13
13
 
14
- from pydantic import AnyUrl, BaseModel, NonNegativeInt, computed_field
14
+ from pydantic import AnyUrl, BaseModel, ConfigDict, NonNegativeInt, computed_field
15
15
  from typing_extensions import Self, override
16
16
 
17
17
  from docling_core.transforms.serializer.base import (
@@ -39,7 +39,11 @@ from docling_core.types.doc.document import (
39
39
  KeyValueItem,
40
40
  NodeItem,
41
41
  OrderedList,
42
+ PictureClassificationData,
43
+ PictureDataType,
44
+ PictureDescriptionData,
42
45
  PictureItem,
46
+ PictureMoleculeData,
43
47
  TableItem,
44
48
  TextItem,
45
49
  UnorderedList,
@@ -118,6 +122,23 @@ def _iterate_items(
118
122
  yield item
119
123
 
120
124
 
125
+ def _get_picture_annotation_text(annotation: PictureDataType) -> Optional[str]:
126
+ result = None
127
+ if isinstance(annotation, PictureClassificationData):
128
+ predicted_class = (
129
+ annotation.predicted_classes[0].class_name
130
+ if annotation.predicted_classes
131
+ else None
132
+ )
133
+ if predicted_class is not None:
134
+ result = predicted_class.replace("_", " ")
135
+ elif isinstance(annotation, PictureDescriptionData):
136
+ result = annotation.text
137
+ elif isinstance(annotation, PictureMoleculeData):
138
+ result = annotation.smi
139
+ return result
140
+
141
+
121
142
  def create_ser_result(
122
143
  *,
123
144
  text: str = "",
@@ -176,11 +197,7 @@ class CommonParams(BaseModel):
176
197
  class DocSerializer(BaseModel, BaseDocSerializer):
177
198
  """Class for document serializers."""
178
199
 
179
- class Config:
180
- """Pydantic config."""
181
-
182
- arbitrary_types_allowed = True
183
- extra = "forbid"
200
+ model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid")
184
201
 
185
202
  doc: DoclingDocument
186
203
 
@@ -35,6 +35,7 @@ from docling_core.transforms.serializer.base import (
35
35
  from docling_core.transforms.serializer.common import (
36
36
  CommonParams,
37
37
  DocSerializer,
38
+ _get_picture_annotation_text,
38
39
  create_ser_result,
39
40
  )
40
41
  from docling_core.transforms.serializer.html_styles import (
@@ -110,6 +111,8 @@ class HTMLParams(CommonParams):
110
111
  # Enable charts to be printed into HTML as tables
111
112
  enable_chart_tables: bool = True
112
113
 
114
+ include_annotations: bool = True
115
+
113
116
 
114
117
  class HTMLTextSerializer(BaseModel, BaseTextSerializer):
115
118
  """HTML-specific text item serializer."""
@@ -943,18 +946,46 @@ class HTMLDocSerializer(DocSerializer):
943
946
  params = self.params.merge_with_patch(patch=kwargs)
944
947
  results: list[SerializationResult] = []
945
948
  text_res = ""
949
+ excluded_refs = self.get_excluded_refs(**kwargs)
950
+
946
951
  if DocItemLabel.CAPTION in params.labels:
947
- results = [
948
- create_ser_result(text=it.text, span_source=it)
949
- for cap in item.captions
950
- if isinstance(it := cap.resolve(self.doc), TextItem)
951
- and it.self_ref not in self.get_excluded_refs(**kwargs)
952
- ]
953
- text_res = params.caption_delim.join([r.text for r in results])
954
- if text_res:
955
- text_dir = get_text_direction(text_res)
956
- dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
957
- text_res = f"<{tag}{dir_str}>{html.escape(text_res)}</{tag}>"
952
+ for cap in item.captions:
953
+ if (
954
+ isinstance(it := cap.resolve(self.doc), TextItem)
955
+ and it.self_ref not in excluded_refs
956
+ ):
957
+ text_cap = it.text
958
+ text_dir = get_text_direction(text_cap)
959
+ dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
960
+ cap_ser_res = create_ser_result(
961
+ text=(
962
+ f'<div class="caption"{dir_str}>'
963
+ f"{html.escape(text_cap)}"
964
+ f"</div>"
965
+ ),
966
+ span_source=it,
967
+ )
968
+ results.append(cap_ser_res)
969
+
970
+ if params.include_annotations and item.self_ref not in excluded_refs:
971
+ if isinstance(item, PictureItem):
972
+ for ann in item.annotations:
973
+ if ann_text := _get_picture_annotation_text(annotation=ann):
974
+ text_dir = get_text_direction(ann_text)
975
+ dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
976
+ ann_ser_res = create_ser_result(
977
+ text=(
978
+ f'<div data-annotation-kind="{ann.kind}"{dir_str}>'
979
+ f"{html.escape(ann_text)}"
980
+ f"</div>"
981
+ ),
982
+ span_source=item,
983
+ )
984
+ results.append(ann_ser_res)
985
+
986
+ text_res = params.caption_delim.join([r.text for r in results])
987
+ if text_res:
988
+ text_res = f"<{tag}>{text_res}</{tag}>"
958
989
  return create_ser_result(text=text_res, span_source=results)
959
990
 
960
991
  def _generate_head(self) -> str:
@@ -29,6 +29,7 @@ from docling_core.transforms.serializer.base import (
29
29
  from docling_core.transforms.serializer.common import (
30
30
  CommonParams,
31
31
  DocSerializer,
32
+ _get_picture_annotation_text,
32
33
  _PageBreakSerResult,
33
34
  create_ser_result,
34
35
  )
@@ -69,6 +70,8 @@ class MarkdownParams(CommonParams):
69
70
  page_break_placeholder: Optional[str] = None # e.g. "<!-- page break -->"
70
71
  escape_underscores: bool = True
71
72
  escape_html: bool = True
73
+ include_annotations: bool = True
74
+ mark_annotations: bool = False
72
75
 
73
76
 
74
77
  class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
@@ -210,6 +213,24 @@ class MarkdownPictureSerializer(BasePictureSerializer):
210
213
  res_parts.append(cap_res)
211
214
 
212
215
  if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
216
+ if params.include_annotations:
217
+
218
+ for ann in item.annotations:
219
+ if ann_text := _get_picture_annotation_text(annotation=ann):
220
+ ann_ser_res = create_ser_result(
221
+ text=(
222
+ (
223
+ f'<!--<annotation kind="{ann.kind}">-->'
224
+ f"{ann_text}"
225
+ f"<!--<annotation/>-->"
226
+ )
227
+ if params.mark_annotations
228
+ else ann_text
229
+ ),
230
+ span_source=item,
231
+ )
232
+ res_parts.append(ann_ser_res)
233
+
213
234
  img_res = self._serialize_image_part(
214
235
  item=item,
215
236
  doc=doc,
@@ -11,6 +11,7 @@ import os
11
11
  import re
12
12
  import sys
13
13
  import typing
14
+ import warnings
14
15
  from enum import Enum
15
16
  from io import BytesIO
16
17
  from pathlib import Path
@@ -2924,6 +2925,7 @@ class DoclingDocument(BaseModel):
2924
2925
  page_no: Optional[int] = None,
2925
2926
  included_content_layers: Optional[set[ContentLayer]] = None,
2926
2927
  page_break_placeholder: Optional[str] = None,
2928
+ include_annotations: bool = True,
2927
2929
  ):
2928
2930
  """Save to markdown."""
2929
2931
  if isinstance(filename, str):
@@ -2951,6 +2953,7 @@ class DoclingDocument(BaseModel):
2951
2953
  page_no=page_no,
2952
2954
  included_content_layers=included_content_layers,
2953
2955
  page_break_placeholder=page_break_placeholder,
2956
+ include_annotations=include_annotations,
2954
2957
  )
2955
2958
 
2956
2959
  with open(filename, "w", encoding="utf-8") as fw:
@@ -2972,6 +2975,8 @@ class DoclingDocument(BaseModel):
2972
2975
  page_no: Optional[int] = None,
2973
2976
  included_content_layers: Optional[set[ContentLayer]] = None,
2974
2977
  page_break_placeholder: Optional[str] = None, # e.g. "<!-- page break -->",
2978
+ include_annotations: bool = True,
2979
+ mark_annotations: bool = False,
2975
2980
  ) -> str:
2976
2981
  r"""Serialize to Markdown.
2977
2982
 
@@ -2991,9 +2996,9 @@ class DoclingDocument(BaseModel):
2991
2996
  :type labels: Optional[set[DocItemLabel]] = None
2992
2997
  :param strict_text: Deprecated.
2993
2998
  :type strict_text: bool = False
2994
- :param escaping_underscores: bool: Whether to escape underscores in the
2999
+ :param escape_underscores: bool: Whether to escape underscores in the
2995
3000
  text content of the document. (Default value = True).
2996
- :type escaping_underscores: bool = True
3001
+ :type escape_underscores: bool = True
2997
3002
  :param image_placeholder: The placeholder to include to position
2998
3003
  images in the markdown. (Default value = "\<!-- image --\>").
2999
3004
  :type image_placeholder: str = "<!-- image -->"
@@ -3009,6 +3014,12 @@ class DoclingDocument(BaseModel):
3009
3014
  :param page_break_placeholder: The placeholder to include for marking page
3010
3015
  breaks. None means no page break placeholder will be used.
3011
3016
  :type page_break_placeholder: Optional[str] = None
3017
+ :param include_annotations: bool: Whether to include annotations in the export.
3018
+ (Default value = True).
3019
+ :type include_annotations: bool = True
3020
+ :param mark_annotations: bool: Whether to mark annotations in the export; only
3021
+ relevant if include_annotations is True. (Default value = False).
3022
+ :type mark_annotations: bool = False
3012
3023
  :returns: The exported Markdown representation.
3013
3024
  :rtype: str
3014
3025
  """
@@ -3038,6 +3049,8 @@ class DoclingDocument(BaseModel):
3038
3049
  indent=indent,
3039
3050
  wrap_width=text_width if text_width > 0 else None,
3040
3051
  page_break_placeholder=page_break_placeholder,
3052
+ include_annotations=include_annotations,
3053
+ mark_annotations=mark_annotations,
3041
3054
  ),
3042
3055
  )
3043
3056
  ser_res = serializer.serialize()
@@ -3087,6 +3100,7 @@ class DoclingDocument(BaseModel):
3087
3100
  html_head: str = "null", # should be deprecated
3088
3101
  included_content_layers: Optional[set[ContentLayer]] = None,
3089
3102
  split_page_view: bool = False,
3103
+ include_annotations: bool = True,
3090
3104
  ):
3091
3105
  """Save to HTML."""
3092
3106
  if isinstance(filename, str):
@@ -3112,6 +3126,7 @@ class DoclingDocument(BaseModel):
3112
3126
  html_head=html_head,
3113
3127
  included_content_layers=included_content_layers,
3114
3128
  split_page_view=split_page_view,
3129
+ include_annotations=include_annotations,
3115
3130
  )
3116
3131
 
3117
3132
  with open(filename, "w", encoding="utf-8") as fw:
@@ -3164,6 +3179,7 @@ class DoclingDocument(BaseModel):
3164
3179
  html_head: str = "null", # should be deprecated ...
3165
3180
  included_content_layers: Optional[set[ContentLayer]] = None,
3166
3181
  split_page_view: bool = False,
3182
+ include_annotations: bool = True,
3167
3183
  ) -> str:
3168
3184
  r"""Serialize to HTML."""
3169
3185
  from docling_core.transforms.serializer.html import (
@@ -3195,6 +3211,7 @@ class DoclingDocument(BaseModel):
3195
3211
  html_head=html_head,
3196
3212
  html_lang=html_lang,
3197
3213
  output_style=output_style,
3214
+ include_annotations=include_annotations,
3198
3215
  )
3199
3216
 
3200
3217
  if html_head == "null":
@@ -4109,7 +4126,10 @@ class DoclingDocument(BaseModel):
4109
4126
  @classmethod
4110
4127
  def validate_document(cls, d: "DoclingDocument"):
4111
4128
  """validate_document."""
4112
- if not d.validate_tree(d.body) or not d.validate_tree(d.furniture):
4113
- raise ValueError("Document hierachy is inconsistent.")
4129
+ with warnings.catch_warnings():
4130
+ # ignore warning from deprecated furniture
4131
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
4132
+ if not d.validate_tree(d.body) or not d.validate_tree(d.furniture):
4133
+ raise ValueError("Document hierachy is inconsistent.")
4114
4134
 
4115
4135
  return d
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.31.1
3
+ Version: 2.32.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -20,18 +20,18 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
20
20
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
21
21
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
22
22
  docling_core/transforms/chunker/hierarchical_chunker.py,sha256=7Fpwwsn2BoiR12KGPrn8fU1uuhqBLp85MRLMF0aIsL8,8281
23
- docling_core/transforms/chunker/hybrid_chunker.py,sha256=67Whij6zSPZbVQA-fToyBtTfLtDK6BdnZ-Mhlz0p8ZQ,11886
23
+ docling_core/transforms/chunker/hybrid_chunker.py,sha256=i4Yskms48XRUAVhec8pTGDP1dbrTEgc1pNh5fNXqfKQ,12317
24
24
  docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
25
25
  docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
26
- docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=JQ-D3b5vTPQbvu4HaMfYqFzSBLbV_HnmoBGv7d6Kqn4,2220
26
+ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
27
27
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
28
28
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
29
29
  docling_core/transforms/serializer/base.py,sha256=9bgpWA0oMmZNRc3yIuZVnu5bJ1glClBsswtVF1vYwMI,6046
30
- docling_core/transforms/serializer/common.py,sha256=TC1EwHIp9PYcI8jeTKeavUAPtounYmS0V1bfS_wDKm0,17427
30
+ docling_core/transforms/serializer/common.py,sha256=mkajw0QRL--WgVL42Vlp2e2PuUQVh79D6EKP4_3YKy0,18112
31
31
  docling_core/transforms/serializer/doctags.py,sha256=mEmRWVuebcG5pZcR1_HX146cyUk0_FjaLQtMXSgh9hs,17870
32
- docling_core/transforms/serializer/html.py,sha256=Xq9CU5qZTDdwstizYqWNL_TFNDs9NHK_6JvvZk0TP98,34571
32
+ docling_core/transforms/serializer/html.py,sha256=_HN1WFKH_WJkxtZrmvm1a6-UDxsEGt_ChWdUysS1qjY,35843
33
33
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
34
- docling_core/transforms/serializer/markdown.py,sha256=4thokWJIaF3dvpchjp-Y7NTSzUuXwTmfNey4MQj-c5I,17873
34
+ docling_core/transforms/serializer/markdown.py,sha256=ussKqIptiKPTCRNjy3edjap4DOsy52no-FLSeAyv9S0,18759
35
35
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
36
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
37
  docling_core/transforms/visualizer/layout_visualizer.py,sha256=ulXxWGIl69-HMKDPFk_XKgNCgQeDNc969PVt_X0-drA,7823
@@ -40,7 +40,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
40
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
41
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
42
42
  docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
43
- docling_core/types/doc/document.py,sha256=eboNYL-QVnDNnw3vL7PPVdPosfs5oNfsrVofxmdBDHw,140884
43
+ docling_core/types/doc/document.py,sha256=wxPxTOh3pfZr33rGPgnrFSY6b70C5Fe20tqqgYRUxrI,141930
44
44
  docling_core/types/doc/labels.py,sha256=vp4h3e7AmBvezRmgrfuPehjAHTZOufphErLB4ENhdME,7171
45
45
  docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
46
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -73,8 +73,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
73
73
  docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
74
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
75
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
76
- docling_core-2.31.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
- docling_core-2.31.1.dist-info/METADATA,sha256=O13NvxzbHR0wUzP_3yQbOFqSI63LUvCzWvXEkiqQePY,5976
78
- docling_core-2.31.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
79
- docling_core-2.31.1.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
80
- docling_core-2.31.1.dist-info/RECORD,,
76
+ docling_core-2.32.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.32.0.dist-info/METADATA,sha256=3ZtmT43WNDIQQd4I9H-Yf1Z954rod4Y5ZDrvTNPsRBQ,5976
78
+ docling_core-2.32.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
79
+ docling_core-2.32.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
80
+ docling_core-2.32.0.dist-info/RECORD,,