docling-core 2.47.0__py3-none-any.whl → 2.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -145,7 +145,7 @@ class TripletTableSerializer(BaseTableSerializer):
145
145
  parts.append(cap_res)
146
146
 
147
147
  if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
148
- table_df = item.export_to_dataframe()
148
+ table_df = item.export_to_dataframe(doc)
149
149
  if table_df.shape[0] >= 1 and table_df.shape[1] >= 2:
150
150
 
151
151
  # copy header as first row and shift all rows by one
@@ -394,6 +394,7 @@ class DocSerializer(BaseModel, BaseDocSerializer):
394
394
  item=item,
395
395
  doc_serializer=self,
396
396
  doc=self.doc,
397
+ visited=my_visited,
397
398
  **my_kwargs,
398
399
  )
399
400
  return part
@@ -32,6 +32,7 @@ from docling_core.types.doc.document import (
32
32
  DoclingDocument,
33
33
  FloatingItem,
34
34
  FormItem,
35
+ GroupItem,
35
36
  InlineGroup,
36
37
  KeyValueItem,
37
38
  ListGroup,
@@ -42,6 +43,7 @@ from docling_core.types.doc.document import (
42
43
  PictureMoleculeData,
43
44
  PictureTabularChartData,
44
45
  ProvenanceItem,
46
+ SectionHeaderItem,
45
47
  TableItem,
46
48
  TextItem,
47
49
  )
@@ -94,11 +96,11 @@ class DocTagsTextSerializer(BaseModel, BaseTextSerializer):
94
96
  item: TextItem,
95
97
  doc_serializer: BaseDocSerializer,
96
98
  doc: DoclingDocument,
99
+ visited: Optional[set[str]] = None,
97
100
  **kwargs: Any,
98
101
  ) -> SerializationResult:
99
102
  """Serializes the passed item."""
100
- from docling_core.types.doc.document import SectionHeaderItem
101
-
103
+ my_visited = visited if visited is not None else set()
102
104
  params = DocTagsParams(**kwargs)
103
105
  wrap_tag: Optional[str] = DocumentToken.create_token_name_from_doc_item_label(
104
106
  label=item.label,
@@ -116,12 +118,21 @@ class DocTagsTextSerializer(BaseModel, BaseTextSerializer):
116
118
  parts.append(location)
117
119
 
118
120
  if params.add_content:
119
- text_part = item.text
120
- text_part = doc_serializer.post_process(
121
- text=text_part,
122
- formatting=item.formatting,
123
- hyperlink=item.hyperlink,
124
- )
121
+ if (
122
+ item.text == ""
123
+ and len(item.children) == 1
124
+ and isinstance(
125
+ (child_group := item.children[0].resolve(doc)), InlineGroup
126
+ )
127
+ ):
128
+ ser_res = doc_serializer.serialize(item=child_group, visited=my_visited)
129
+ text_part = ser_res.text
130
+ else:
131
+ text_part = doc_serializer.post_process(
132
+ text=item.text,
133
+ formatting=item.formatting,
134
+ hyperlink=item.hyperlink,
135
+ )
125
136
 
126
137
  if isinstance(item, CodeItem):
127
138
  language_token = DocumentToken.get_code_language_token(
@@ -506,7 +517,12 @@ class DocTagsFallbackSerializer(BaseFallbackSerializer):
506
517
  **kwargs: Any,
507
518
  ) -> SerializationResult:
508
519
  """Serializes the passed item."""
509
- return create_ser_result()
520
+ if isinstance(item, GroupItem):
521
+ parts = doc_serializer.get_parts(item=item, **kwargs)
522
+ text_res = "\n".join([p.text for p in parts if p.text])
523
+ return create_ser_result(text=text_res, span_source=parts)
524
+ else:
525
+ return create_ser_result()
510
526
 
511
527
 
512
528
  class DocTagsAnnotationSerializer(BaseAnnotationSerializer):
@@ -55,6 +55,7 @@ from docling_core.types.doc.document import (
55
55
  FormItem,
56
56
  FormulaItem,
57
57
  GraphData,
58
+ GroupItem,
58
59
  ImageRef,
59
60
  InlineGroup,
60
61
  KeyValueItem,
@@ -139,21 +140,34 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
139
140
  res_parts: list[SerializationResult] = []
140
141
  post_processed = False
141
142
 
142
- # Prepare the HTML based on item type
143
- if isinstance(item, TitleItem):
144
- text_inner = self._prepare_content(item.text)
145
- text = get_html_tag_with_text_direction(html_tag="h1", text=text_inner)
143
+ has_inline_repr = (
144
+ item.text == ""
145
+ and len(item.children) == 1
146
+ and isinstance((child_group := item.children[0].resolve(doc)), InlineGroup)
147
+ )
148
+ if has_inline_repr:
149
+ text = doc_serializer.serialize(item=child_group, visited=my_visited).text
150
+ post_processed = True
151
+ else:
152
+ text = item.text
153
+ if not isinstance(item, (CodeItem, FormulaItem)):
154
+ text = html.escape(text, quote=False)
155
+ text = text.replace("\n", "<br>")
146
156
 
147
- elif isinstance(item, SectionHeaderItem):
148
- section_level = min(item.level + 1, 6)
149
- text_inner = self._prepare_content(item.text)
157
+ # Prepare the HTML based on item type
158
+ if isinstance(item, (TitleItem, SectionHeaderItem)):
159
+ section_level = (
160
+ min(item.level + 1, 6) if isinstance(item, SectionHeaderItem) else 1
161
+ )
150
162
  text = get_html_tag_with_text_direction(
151
- html_tag=f"h{section_level}", text=text_inner
163
+ html_tag=f"h{section_level}", text=text
152
164
  )
153
165
 
154
166
  elif isinstance(item, FormulaItem):
155
167
  text = self._process_formula(
156
168
  item=item,
169
+ text=text,
170
+ orig=item.orig,
157
171
  doc=doc,
158
172
  image_mode=params.image_mode,
159
173
  formula_to_mathml=params.formula_to_mathml,
@@ -161,19 +175,26 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
161
175
  )
162
176
 
163
177
  elif isinstance(item, CodeItem):
164
- text = self._process_code(item=item, is_inline_scope=is_inline_scope)
178
+ text = (
179
+ f"<code>{text}</code>"
180
+ if is_inline_scope
181
+ else f"<pre><code>{text}</code></pre>"
182
+ )
165
183
 
166
184
  elif isinstance(item, ListItem):
167
185
  # List items are handled by list serializer
168
186
  text_parts: list[str] = []
169
- if item_text := self._prepare_content(item.text):
170
- item_text = doc_serializer.post_process(
171
- text=item_text,
172
- formatting=item.formatting,
173
- hyperlink=item.hyperlink,
174
- )
175
- post_processed = True
176
- text_parts.append(item_text)
187
+ if text:
188
+ if has_inline_repr:
189
+ text = f"\n{text}\n"
190
+ else:
191
+ text = doc_serializer.post_process(
192
+ text=text,
193
+ formatting=item.formatting,
194
+ hyperlink=item.hyperlink,
195
+ )
196
+ post_processed = True
197
+ text_parts.append(text)
177
198
  nested_parts = [
178
199
  r.text
179
200
  for r in doc_serializer.get_parts(
@@ -184,29 +205,26 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
184
205
  )
185
206
  ]
186
207
  text_parts.extend(nested_parts)
187
- text_inner = "\n".join(text_parts)
208
+ text = "\n".join(text_parts)
188
209
  if nested_parts:
189
- text_inner = f"\n{text_inner}\n"
210
+ text = f"\n{text}\n"
190
211
  text = (
191
212
  get_html_tag_with_text_direction(
192
213
  html_tag="li",
193
- text=text_inner,
214
+ text=text,
194
215
  attrs=(
195
216
  {"style": f"list-style-type: '{item.marker} ';"}
196
217
  if params.show_original_list_item_marker and item.marker
197
218
  else {}
198
219
  ),
199
220
  )
200
- if text_inner
221
+ if text
201
222
  else ""
202
223
  )
203
224
 
204
- elif is_inline_scope:
205
- text = self._prepare_content(item.text)
206
- else:
225
+ elif not is_inline_scope:
207
226
  # Regular text item
208
- text_inner = self._prepare_content(item.text)
209
- text = get_html_tag_with_text_direction(html_tag="p", text=text_inner)
227
+ text = get_html_tag_with_text_direction(html_tag="p", text=text)
210
228
 
211
229
  # Apply formatting and hyperlinks
212
230
  if not post_processed:
@@ -227,66 +245,44 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
227
245
 
228
246
  return create_ser_result(text=text, span_source=res_parts)
229
247
 
230
- def _prepare_content(
231
- self, text: str, do_escape_html=True, do_replace_newline=True
232
- ) -> str:
233
- """Prepare text content for HTML inclusion."""
234
- if do_escape_html:
235
- text = html.escape(text, quote=False)
236
- if do_replace_newline:
237
- text = text.replace("\n", "<br>")
238
- return text
239
-
240
- def _process_code(
241
- self,
242
- item: CodeItem,
243
- is_inline_scope: bool,
244
- ) -> str:
245
- code_text = self._prepare_content(
246
- item.text, do_escape_html=False, do_replace_newline=False
247
- )
248
- if is_inline_scope:
249
- text = f"<code>{code_text}</code>"
250
- else:
251
- text = f"<pre><code>{code_text}</code></pre>"
252
-
253
- return text
254
-
255
248
  def _process_formula(
256
249
  self,
257
- item: FormulaItem,
250
+ *,
251
+ item: DocItem,
252
+ text: str,
253
+ orig: str,
258
254
  doc: DoclingDocument,
259
255
  image_mode: ImageRefMode,
260
256
  formula_to_mathml: bool,
261
257
  is_inline_scope: bool,
262
258
  ) -> str:
263
259
  """Process a formula item to HTML/MathML."""
264
- math_formula = self._prepare_content(
265
- item.text, do_escape_html=False, do_replace_newline=False
266
- )
267
-
268
260
  # If formula is empty, try to use an image fallback
269
- if item.text == "" and item.orig != "":
270
- img_fallback = self._get_formula_image_fallback(item, doc)
271
- if (
272
- image_mode == ImageRefMode.EMBEDDED
273
- and len(item.prov) > 0
274
- and img_fallback
275
- ):
276
- return img_fallback
261
+ if (
262
+ text == ""
263
+ and orig != ""
264
+ and len(item.prov) > 0
265
+ and image_mode == ImageRefMode.EMBEDDED
266
+ and (
267
+ img_fallback := self._get_formula_image_fallback(
268
+ item=item, orig=orig, doc=doc
269
+ )
270
+ )
271
+ ):
272
+ return img_fallback
277
273
 
278
274
  # Try to generate MathML
279
- if formula_to_mathml and math_formula:
275
+ elif formula_to_mathml and text:
280
276
  try:
281
277
  # Set display mode based on context
282
278
  display_mode = "inline" if is_inline_scope else "block"
283
279
  mathml_element = latex2mathml.converter.convert_to_element(
284
- math_formula, display=display_mode
280
+ text, display=display_mode
285
281
  )
286
282
  annotation = SubElement(
287
283
  mathml_element, "annotation", dict(encoding="TeX")
288
284
  )
289
- annotation.text = math_formula
285
+ annotation.text = text
290
286
  mathml = unescape(tostring(mathml_element, encoding="unicode"))
291
287
 
292
288
  # Don't wrap in div for inline formulas
@@ -296,40 +292,40 @@ class HTMLTextSerializer(BaseModel, BaseTextSerializer):
296
292
  return f"<div>{mathml}</div>"
297
293
 
298
294
  except Exception:
299
- img_fallback = self._get_formula_image_fallback(item, doc)
295
+ img_fallback = self._get_formula_image_fallback(
296
+ item=item, orig=orig, doc=doc
297
+ )
300
298
  if (
301
299
  image_mode == ImageRefMode.EMBEDDED
302
300
  and len(item.prov) > 0
303
301
  and img_fallback
304
302
  ):
305
303
  return img_fallback
306
- elif math_formula:
307
- return f"<pre>{math_formula}</pre>"
304
+ elif text:
305
+ return f"<pre>{text}</pre>"
308
306
  else:
309
307
  return "<pre>Formula not decoded</pre>"
310
308
 
311
309
  _logger.warning("Could not parse formula with MathML")
312
310
 
313
311
  # Fallback options if we got here
314
- if math_formula and is_inline_scope:
315
- return f"<code>{math_formula}</code>"
316
- elif math_formula and (not is_inline_scope):
317
- f"<pre>{math_formula}</pre>"
312
+ if text and is_inline_scope:
313
+ return f"<code>{text}</code>"
314
+ elif text and (not is_inline_scope):
315
+ f"<pre>{text}</pre>"
318
316
  elif is_inline_scope:
319
317
  return '<span class="formula-not-decoded">Formula not decoded</span>'
320
318
 
321
319
  return '<div class="formula-not-decoded">Formula not decoded</div>'
322
320
 
323
321
  def _get_formula_image_fallback(
324
- self, item: TextItem, doc: DoclingDocument
322
+ self, *, item: DocItem, orig: str, doc: DoclingDocument
325
323
  ) -> Optional[str]:
326
324
  """Try to get an image fallback for a formula."""
327
325
  item_image = item.get_image(doc=doc)
328
326
  if item_image is not None:
329
327
  img_ref = ImageRef.from_pil(item_image, dpi=72)
330
- return (
331
- "<figure>" f'<img src="{img_ref.uri}" alt="{item.orig}" />' "</figure>"
332
- )
328
+ return "<figure>" f'<img src="{img_ref.uri}" alt="{orig}" />' "</figure>"
333
329
  return None
334
330
 
335
331
 
@@ -792,21 +788,30 @@ class HTMLFallbackSerializer(BaseFallbackSerializer):
792
788
  """HTML-specific fallback serializer."""
793
789
 
794
790
  @override
795
- def serialize(self, *, item: NodeItem, **kwargs: Any) -> SerializationResult:
791
+ def serialize(
792
+ self,
793
+ *,
794
+ item: NodeItem,
795
+ doc_serializer: "BaseDocSerializer",
796
+ doc: DoclingDocument,
797
+ **kwargs: Any,
798
+ ) -> SerializationResult:
796
799
  """Fallback serializer for items not handled by other serializers."""
797
- if isinstance(item, DocItem):
800
+ if isinstance(item, GroupItem):
801
+ parts = doc_serializer.get_parts(item=item, **kwargs)
802
+ text_res = "\n".join([p.text for p in parts if p.text])
803
+ return create_ser_result(text=text_res, span_source=parts)
804
+ else:
798
805
  return create_ser_result(
799
806
  text=f"<!-- Unhandled item type: {item.__class__.__name__} -->",
800
- span_source=item,
807
+ span_source=item if isinstance(item, DocItem) else [],
801
808
  )
802
- else:
803
- # For group items, we don't generate any markup
804
- return create_ser_result()
805
809
 
806
810
 
807
811
  class HTMLAnnotationSerializer(BaseModel, BaseAnnotationSerializer):
808
812
  """HTML-specific annotation serializer."""
809
813
 
814
+ @override
810
815
  def serialize(
811
816
  self,
812
817
  *,
@@ -45,6 +45,7 @@ from docling_core.types.doc.document import (
45
45
  Formatting,
46
46
  FormItem,
47
47
  FormulaItem,
48
+ GroupItem,
48
49
  ImageRef,
49
50
  InlineGroup,
50
51
  KeyValueItem,
@@ -124,26 +125,24 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
124
125
  my_visited = visited if visited is not None else set()
125
126
  params = MarkdownParams(**kwargs)
126
127
  res_parts: list[SerializationResult] = []
127
- text = item.text
128
128
  escape_html = True
129
129
  escape_underscores = True
130
- processing_pending = True
131
- if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
132
- # case where processing/formatting should be applied first (in inner scope)
130
+
131
+ has_inline_repr = (
132
+ item.text == ""
133
+ and len(item.children) == 1
134
+ and isinstance((child_group := item.children[0].resolve(doc)), InlineGroup)
135
+ )
136
+ if has_inline_repr:
137
+ text = doc_serializer.serialize(item=child_group, visited=my_visited).text
133
138
  processing_pending = False
134
- if (
135
- text == ""
136
- and len(item.children) == 1
137
- and isinstance(
138
- (child_group := item.children[0].resolve(doc)), InlineGroup
139
- )
140
- ):
141
- # case of inline within heading / list item
142
- ser_res = doc_serializer.serialize(item=child_group)
143
- text = ser_res.text
144
- for span in ser_res.spans:
145
- my_visited.add(span.item.self_ref)
146
- else:
139
+ else:
140
+ text = item.text
141
+ processing_pending = True
142
+
143
+ if isinstance(item, (ListItem, TitleItem, SectionHeaderItem)):
144
+ if not has_inline_repr:
145
+ # case where processing/formatting should be applied first (in inner scope)
147
146
  text = doc_serializer.post_process(
148
147
  text=text,
149
148
  escape_html=escape_html,
@@ -151,6 +150,7 @@ class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
151
150
  formatting=item.formatting,
152
151
  hyperlink=item.hyperlink,
153
152
  )
153
+ processing_pending = False
154
154
 
155
155
  if isinstance(item, ListItem):
156
156
  pieces: list[str] = []
@@ -600,13 +600,15 @@ class MarkdownFallbackSerializer(BaseFallbackSerializer):
600
600
  **kwargs: Any,
601
601
  ) -> SerializationResult:
602
602
  """Serializes the passed item."""
603
- if isinstance(item, DocItem):
603
+ if isinstance(item, GroupItem):
604
+ parts = doc_serializer.get_parts(item=item, **kwargs)
605
+ text_res = "\n\n".join([p.text for p in parts if p.text])
606
+ return create_ser_result(text=text_res, span_source=parts)
607
+ else:
604
608
  return create_ser_result(
605
609
  text="<!-- missing-text -->",
606
- span_source=item,
610
+ span_source=item if isinstance(item, DocItem) else [],
607
611
  )
608
- else:
609
- return create_ser_result()
610
612
 
611
613
 
612
614
  class MarkdownDocSerializer(DocSerializer):
@@ -60,7 +60,7 @@ _logger = logging.getLogger(__name__)
60
60
 
61
61
  Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
62
62
  LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
63
- CURRENT_VERSION: Final = "1.6.0"
63
+ CURRENT_VERSION: Final = "1.7.0"
64
64
 
65
65
  DEFAULT_EXPORT_LABELS = {
66
66
  DocItemLabel.TITLE,
@@ -310,6 +310,7 @@ class TableCell(BaseModel):
310
310
  column_header: bool = False
311
311
  row_header: bool = False
312
312
  row_section: bool = False
313
+ fillable: bool = False
313
314
 
314
315
  @model_validator(mode="before")
315
316
  @classmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.47.0
3
+ Version: 2.48.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -19,7 +19,7 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
19
19
  docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
20
20
  docling_core/transforms/chunker/__init__.py,sha256=Qg5RhC-2QqdXKEfjzNGJaVi0NqBCL3xAhKWJGOlrE3M,375
21
21
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
22
- docling_core/transforms/chunker/hierarchical_chunker.py,sha256=uDf-qGiIT_4JUEg9NOdzvDqAPOTqycKJ-jEpDkV3jJU,8243
22
+ docling_core/transforms/chunker/hierarchical_chunker.py,sha256=qc-gnuxji-2lrlZCRr34VubBciBTE4ClZ3QplgNpUx8,8246
23
23
  docling_core/transforms/chunker/hybrid_chunker.py,sha256=xjkz8hy3tXXzkJzf7QMFOEq_v8V7Jcs9tCY0Mxjge74,12548
24
24
  docling_core/transforms/chunker/page_chunker.py,sha256=gLUlqA_klK-rkuPVYuJKi3ZuTIGdd2HD7ces72AiZ2U,2018
25
25
  docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
@@ -28,11 +28,11 @@ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZ
28
28
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
29
29
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
30
30
  docling_core/transforms/serializer/base.py,sha256=TI8Epj7gyxdTet9j-Rs4o5U09gfACfAIVoirlschviM,7266
31
- docling_core/transforms/serializer/common.py,sha256=RwfdzZ9FRSHQjKM0vskg1CVqar0Z_ms38arSlLAgITc,19150
32
- docling_core/transforms/serializer/doctags.py,sha256=VXPjAZPhBur7LaEeuqH9k31TgZWSN32lK8z8rJXzFwY,19935
33
- docling_core/transforms/serializer/html.py,sha256=GRfRaqFIb4FXRMplB4Agl4fSNa5jsHV7P4tBtFMro9I,38453
31
+ docling_core/transforms/serializer/common.py,sha256=vfJhu0b4vAcIres85PX774RQSTKu9RueBOWMO95FQyc,19186
32
+ docling_core/transforms/serializer/doctags.py,sha256=9_aV_ffTOTtQKZQTKz_I3kRTQ_GXHCePKwXnR-rnggA,20644
33
+ docling_core/transforms/serializer/html.py,sha256=h0yiDgTNIeOS-rJaMRfinUFgrZygd3MjheM7pjLw5F0,38380
34
34
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
35
- docling_core/transforms/serializer/markdown.py,sha256=hilGM1yWpbbRTjuEjfBRrhavspD5vFF_6SDvlKx8BrM,24230
35
+ docling_core/transforms/serializer/markdown.py,sha256=9Sy7xWSegX0zdQb9vPzEUFucyGQUA4TcQxMfE70SJsk,24354
36
36
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
37
37
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
38
38
  docling_core/transforms/visualizer/key_value_visualizer.py,sha256=fp7nFLy4flOSiavdRgg5y1Mu7WVLIDGh1zEHsq8kgVM,8979
@@ -43,7 +43,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
43
43
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
44
44
  docling_core/types/doc/__init__.py,sha256=Vsl3oJV3_BLpS7rIwvahhcWOwmEBvj7ZbQzQCCl-IQk,1678
45
45
  docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
46
- docling_core/types/doc/document.py,sha256=jyMcK1oiu8X8juNa9DuI3S1imn4hXwjOS7iTLQ1HykU,202707
46
+ docling_core/types/doc/document.py,sha256=sZsLV6GfFF8TzTgD6C47a9YrurLZFhwqt8I9PZmYkJY,202734
47
47
  docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
48
48
  docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
49
49
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -76,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
76
76
  docling_core/utils/legacy.py,sha256=G7ed8fkBpIO8hG3DKEY83cHsrKJHyvDst_1jSdgBXMI,24406
77
77
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
78
78
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
79
- docling_core-2.47.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
- docling_core-2.47.0.dist-info/METADATA,sha256=jW4Zdx0WwStnLDifSsvYyGLw-5C2IYiEeK4IQRGQi-I,6453
81
- docling_core-2.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- docling_core-2.47.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
- docling_core-2.47.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
- docling_core-2.47.0.dist-info/RECORD,,
79
+ docling_core-2.48.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
+ docling_core-2.48.0.dist-info/METADATA,sha256=WybgSJP5TG0mMu5sA2bN0pVKCoZxKCf4KR70MGK3904,6453
81
+ docling_core-2.48.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ docling_core-2.48.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
+ docling_core-2.48.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
+ docling_core-2.48.0.dist-info/RECORD,,