docling-core 2.45.0__py3-none-any.whl → 2.46.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -359,6 +359,7 @@ class DocSerializer(BaseModel, BaseDocSerializer):
359
359
  item=item,
360
360
  doc_serializer=self,
361
361
  doc=self.doc,
362
+ visited=my_visited,
362
363
  **my_kwargs,
363
364
  )
364
365
  elif isinstance(item, PictureItem):
@@ -157,6 +157,7 @@ class DocTagsTableSerializer(BaseTableSerializer):
157
157
  item: TableItem,
158
158
  doc_serializer: BaseDocSerializer,
159
159
  doc: DoclingDocument,
160
+ visited: Optional[set[str]] = None,
160
161
  **kwargs: Any,
161
162
  ) -> SerializationResult:
162
163
  """Serializes the passed item."""
@@ -179,6 +180,7 @@ class DocTagsTableSerializer(BaseTableSerializer):
179
180
  add_cell_text=params.add_table_cell_text,
180
181
  xsize=params.xsize,
181
182
  ysize=params.ysize,
183
+ visited=visited,
182
184
  )
183
185
  res_parts.append(create_ser_result(text=otsl_text, span_source=item))
184
186
 
@@ -65,8 +65,8 @@ from docling_core.types.doc.document import (
65
65
  PictureItem,
66
66
  PictureMoleculeData,
67
67
  PictureTabularChartData,
68
+ RichTableCell,
68
69
  SectionHeaderItem,
69
- TableCell,
70
70
  TableItem,
71
71
  TextItem,
72
72
  TitleItem,
@@ -346,9 +346,6 @@ class HTMLTableSerializer(BaseTableSerializer):
346
346
  **kwargs: Any,
347
347
  ) -> SerializationResult:
348
348
  """Serializes the passed table item to HTML."""
349
- nrows = item.data.num_rows
350
- ncols = item.data.num_cols
351
-
352
349
  res_parts: list[SerializationResult] = []
353
350
  cap_res = doc_serializer.serialize_captions(item=item, tag="caption", **kwargs)
354
351
  if cap_res.text:
@@ -356,11 +353,11 @@ class HTMLTableSerializer(BaseTableSerializer):
356
353
 
357
354
  if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
358
355
  body = ""
356
+ span_source: Union[DocItem, list[SerializationResult]] = []
359
357
 
360
- for i in range(nrows):
358
+ for i, row in enumerate(item.data.grid):
361
359
  body += "<tr>"
362
- for j in range(ncols):
363
- cell: TableCell = item.data.grid[i][j]
360
+ for j, cell in enumerate(row):
364
361
 
365
362
  rowspan, rowstart = (
366
363
  cell.row_span,
@@ -376,7 +373,16 @@ class HTMLTableSerializer(BaseTableSerializer):
376
373
  if colstart != j:
377
374
  continue
378
375
 
379
- content = html.escape(cell.text.strip())
376
+ if isinstance(cell, RichTableCell):
377
+ ser_res = doc_serializer.serialize(
378
+ item=cell.ref.resolve(doc=doc), **kwargs
379
+ )
380
+ content = ser_res.text
381
+ span_source = [ser_res]
382
+ else:
383
+ content = html.escape(cell.text.strip())
384
+ span_source = item
385
+
380
386
  celltag = "td"
381
387
  if cell.column_header or cell.row_header or cell.row_section:
382
388
  celltag = "th"
@@ -389,14 +395,14 @@ class HTMLTableSerializer(BaseTableSerializer):
389
395
 
390
396
  text_dir = get_text_direction(content)
391
397
  if text_dir == "rtl":
392
- opening_tag += f' dir="{dir}"'
398
+ opening_tag += f' dir="{text_dir}"'
393
399
 
394
400
  body += f"<{opening_tag}>{content}</{celltag}>"
395
401
  body += "</tr>"
396
402
 
397
403
  if body:
398
404
  body = f"<tbody>{body}</tbody>"
399
- res_parts.append(create_ser_result(text=body, span_source=item))
405
+ res_parts.append(create_ser_result(text=body, span_source=span_source))
400
406
 
401
407
  text_res = "".join([r.text for r in res_parts])
402
408
  text_res = f"<table>{text_res}</table>" if text_res else ""
@@ -55,6 +55,7 @@ from docling_core.types.doc.document import (
55
55
  PictureItem,
56
56
  PictureMoleculeData,
57
57
  PictureTabularChartData,
58
+ RichTableCell,
58
59
  SectionHeaderItem,
59
60
  TableItem,
60
61
  TextItem,
@@ -320,7 +321,13 @@ class MarkdownTableSerializer(BaseTableSerializer):
320
321
  [
321
322
  # make sure that md tables are not broken
322
323
  # due to newline chars in the text
323
- col.text.replace("\n", " ")
324
+ (
325
+ doc_serializer.serialize(
326
+ item=col.ref.resolve(doc=doc), **kwargs
327
+ ).text
328
+ if isinstance(col, RichTableCell)
329
+ else col.text
330
+ ).replace("\n", " ")
324
331
  for col in row
325
332
  ]
326
333
  for row in item.data.grid
@@ -7,6 +7,7 @@
7
7
 
8
8
  from .base import BoundingBox, CoordOrigin, ImageRefMode, Size
9
9
  from .document import (
10
+ AnyTableCell,
10
11
  BaseAnnotation,
11
12
  ChartBar,
12
13
  ChartLine,
@@ -52,6 +53,7 @@ from .document import (
52
53
  PictureTabularChartData,
53
54
  ProvenanceItem,
54
55
  RefItem,
56
+ RichTableCell,
55
57
  Script,
56
58
  SectionHeaderItem,
57
59
  TableCell,
@@ -34,7 +34,7 @@ from pydantic import (
34
34
  validate_call,
35
35
  )
36
36
  from tabulate import tabulate
37
- from typing_extensions import Annotated, Self, deprecated
37
+ from typing_extensions import Annotated, Self, deprecated, override
38
38
 
39
39
  from docling_core.search.package import VERSION_PATTERN
40
40
  from docling_core.types.base import _JSON_POINTER_REGEX
@@ -60,7 +60,7 @@ _logger = logging.getLogger(__name__)
60
60
 
61
61
  Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
62
62
  LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
63
- CURRENT_VERSION: Final = "1.5.0"
63
+ CURRENT_VERSION: Final = "1.6.0"
64
64
 
65
65
  DEFAULT_EXPORT_LABELS = {
66
66
  DocItemLabel.TITLE,
@@ -325,7 +325,7 @@ class TableCell(BaseModel):
325
325
  in data
326
326
  ):
327
327
  return data
328
- text = data["bbox"].get("token", "")
328
+ text = data.get("bbox", {}).get("token", "")
329
329
  if not len(text):
330
330
  text_cells = data.pop("text_cell_bboxes", None)
331
331
  if text_cells:
@@ -337,11 +337,39 @@ class TableCell(BaseModel):
337
337
 
338
338
  return data
339
339
 
340
+ def _get_text(self, doc: Optional["DoclingDocument"] = None, **kwargs: Any) -> str:
341
+ return self.text
342
+
343
+
344
+ class RichTableCell(TableCell):
345
+ """RichTableCell."""
346
+
347
+ ref: "RefItem"
348
+
349
+ @override
350
+ def _get_text(self, doc: Optional["DoclingDocument"] = None, **kwargs: Any) -> str:
351
+ from docling_core.transforms.serializer.markdown import MarkdownDocSerializer
352
+
353
+ if doc is not None:
354
+ doc_serializer = kwargs.pop(
355
+ "doc_serializer", MarkdownDocSerializer(doc=doc)
356
+ )
357
+ ser_res = doc_serializer.serialize(item=self.ref.resolve(doc=doc), **kwargs)
358
+ return ser_res.text
359
+ else:
360
+ return "<!-- rich cell -->"
361
+
362
+
363
+ AnyTableCell = Annotated[
364
+ Union[RichTableCell, TableCell],
365
+ Field(union_mode="left_to_right"),
366
+ ]
367
+
340
368
 
341
369
  class TableData(BaseModel): # TBD
342
370
  """BaseTableData."""
343
371
 
344
- table_cells: List[TableCell] = []
372
+ table_cells: List[AnyTableCell] = []
345
373
  num_rows: int = 0
346
374
  num_cols: int = 0
347
375
 
@@ -380,7 +408,9 @@ class TableData(BaseModel): # TBD
380
408
 
381
409
  return table_data
382
410
 
383
- def remove_rows(self, indices: List[int]) -> List[List[TableCell]]:
411
+ def remove_rows(
412
+ self, indices: List[int], doc: Optional["DoclingDocument"] = None
413
+ ) -> List[List[TableCell]]:
384
414
  """Remove rows from the table by their indices.
385
415
 
386
416
  :param indices: List[int]: A list of indices of the rows to remove. (Starting from 0)
@@ -392,6 +422,7 @@ class TableData(BaseModel): # TBD
392
422
 
393
423
  indices = sorted(indices, reverse=True)
394
424
 
425
+ refs_to_remove = []
395
426
  all_removed_cells = []
396
427
  for row_index in indices:
397
428
  if row_index < 0 or row_index >= self.num_rows:
@@ -403,6 +434,10 @@ class TableData(BaseModel): # TBD
403
434
  end_idx = start_idx + self.num_cols
404
435
  removed_cells = self.table_cells[start_idx:end_idx]
405
436
 
437
+ for cell in removed_cells:
438
+ if isinstance(cell, RichTableCell):
439
+ refs_to_remove.append(cell.ref)
440
+
406
441
  # Remove the cells from the table
407
442
  self.table_cells = self.table_cells[:start_idx] + self.table_cells[end_idx:]
408
443
 
@@ -417,9 +452,18 @@ class TableData(BaseModel): # TBD
417
452
 
418
453
  all_removed_cells.append(removed_cells)
419
454
 
455
+ if refs_to_remove:
456
+ if doc is None:
457
+ _logger.warning(
458
+ "When table contains rich cells, `doc` argument must be provided, "
459
+ "otherwise rich cell content will be left dangling."
460
+ )
461
+ else:
462
+ doc._delete_items(refs_to_remove)
463
+
420
464
  return all_removed_cells
421
465
 
422
- def pop_row(self) -> List[TableCell]:
466
+ def pop_row(self, doc: Optional["DoclingDocument"] = None) -> List[TableCell]:
423
467
  """Remove and return the last row from the table.
424
468
 
425
469
  :returns: List[TableCell]: A list of TableCell objects representing the popped row.
@@ -427,16 +471,18 @@ class TableData(BaseModel): # TBD
427
471
  if self.num_rows == 0:
428
472
  raise IndexError("Cannot pop from an empty table.")
429
473
 
430
- return self.remove_row(self.num_rows - 1)
474
+ return self.remove_row(self.num_rows - 1, doc=doc)
431
475
 
432
- def remove_row(self, row_index: int) -> List[TableCell]:
476
+ def remove_row(
477
+ self, row_index: int, doc: Optional["DoclingDocument"] = None
478
+ ) -> List[TableCell]:
433
479
  """Remove a row from the table by its index.
434
480
 
435
481
  :param row_index: int: The index of the row to remove. (Starting from 0)
436
482
 
437
483
  :returns: List[TableCell]: A list of TableCell objects representing the removed row.
438
484
  """
439
- return self.remove_rows([row_index])[0]
485
+ return self.remove_rows([row_index], doc=doc)[0]
440
486
 
441
487
  def insert_rows(
442
488
  self, row_index: int, rows: List[List[str]], after: bool = False
@@ -1509,8 +1555,15 @@ class TableItem(FloatingItem):
1509
1555
 
1510
1556
  annotations: List[TableAnnotationType] = []
1511
1557
 
1512
- def export_to_dataframe(self) -> pd.DataFrame:
1558
+ def export_to_dataframe(
1559
+ self, doc: Optional["DoclingDocument"] = None
1560
+ ) -> pd.DataFrame:
1513
1561
  """Export the table as a Pandas DataFrame."""
1562
+ if doc is None:
1563
+ _logger.warning(
1564
+ "Usage of TableItem.export_to_dataframe() without `doc` argument is deprecated."
1565
+ )
1566
+
1514
1567
  if self.data.num_rows == 0 or self.data.num_cols == 0:
1515
1568
  return pd.DataFrame()
1516
1569
 
@@ -1539,14 +1592,15 @@ class TableItem(FloatingItem):
1539
1592
  columns = ["" for _ in range(self.data.num_cols)]
1540
1593
  for i in range(num_headers):
1541
1594
  for j, cell in enumerate(self.data.grid[i]):
1542
- col_name = cell.text
1595
+ col_name = cell._get_text(doc=doc)
1543
1596
  if columns[j] != "":
1544
1597
  col_name = f".{col_name}"
1545
1598
  columns[j] += col_name
1546
1599
 
1547
1600
  # Create table data
1548
1601
  table_data = [
1549
- [cell.text for cell in row] for row in self.data.grid[num_headers:]
1602
+ [cell._get_text(doc=doc) for cell in row]
1603
+ for row in self.data.grid[num_headers:]
1550
1604
  ]
1551
1605
 
1552
1606
  # Create DataFrame
@@ -1577,7 +1631,7 @@ class TableItem(FloatingItem):
1577
1631
 
1578
1632
  # make sure that md tables are not broken
1579
1633
  # due to newline chars in the text
1580
- text = col.text
1634
+ text = col._get_text(doc=doc)
1581
1635
  text = text.replace("\n", " ")
1582
1636
  tmp.append(text)
1583
1637
 
@@ -1623,6 +1677,7 @@ class TableItem(FloatingItem):
1623
1677
  add_cell_text: bool = True,
1624
1678
  xsize: int = 500,
1625
1679
  ysize: int = 500,
1680
+ **kwargs: Any,
1626
1681
  ) -> str:
1627
1682
  """Export the table as OTSL."""
1628
1683
  # Possible OTSL tokens...
@@ -1639,6 +1694,9 @@ class TableItem(FloatingItem):
1639
1694
  # Headers (column, row, section row):
1640
1695
  # "ched", "rhed", "srow"
1641
1696
 
1697
+ from docling_core.transforms.serializer.doctags import DocTagsDocSerializer
1698
+
1699
+ doc_serializer = DocTagsDocSerializer(doc=doc)
1642
1700
  body = []
1643
1701
  nrows = self.data.num_rows
1644
1702
  ncols = self.data.num_cols
@@ -1652,7 +1710,9 @@ class TableItem(FloatingItem):
1652
1710
  for i in range(nrows):
1653
1711
  for j in range(ncols):
1654
1712
  cell: TableCell = self.data.grid[i][j]
1655
- content = cell.text.strip()
1713
+ content = cell._get_text(
1714
+ doc=doc, doc_serializer=doc_serializer, **kwargs
1715
+ ).strip()
1656
1716
  rowspan, rowstart = (
1657
1717
  cell.row_span,
1658
1718
  cell.start_row_offset_idx,
@@ -2304,6 +2364,15 @@ class DoclingDocument(BaseModel):
2304
2364
  refs_to_be_deleted=refs_to_be_deleted,
2305
2365
  lookup=lookup,
2306
2366
  )
2367
+ if isinstance(node, TableItem):
2368
+ for cell in node.data.table_cells:
2369
+ if isinstance(cell, RichTableCell):
2370
+ path = cell.ref._split_ref_to_path()
2371
+ cell.ref = self._update_ref_with_lookup(
2372
+ item_label=path[1],
2373
+ item_index=int(path[2]),
2374
+ lookup=lookup,
2375
+ )
2307
2376
 
2308
2377
  # Update the self_ref reference
2309
2378
  if node.parent is not None:
@@ -3945,16 +4014,22 @@ class DoclingDocument(BaseModel):
3945
4014
  """num_pages."""
3946
4015
  return len(self.pages.values())
3947
4016
 
3948
- def validate_tree(self, root) -> bool:
4017
+ def validate_tree(self, root: NodeItem) -> bool:
3949
4018
  """validate_tree."""
3950
- res = []
3951
4019
  for child_ref in root.children:
3952
4020
  child = child_ref.resolve(self)
3953
- if child.parent.resolve(self) != root:
4021
+ if child.parent.resolve(self) != root or not self.validate_tree(child):
3954
4022
  return False
3955
- res.append(self.validate_tree(child))
3956
4023
 
3957
- return all(res) or len(res) == 0
4024
+ if isinstance(root, TableItem):
4025
+ for cell in root.data.table_cells:
4026
+ if isinstance(cell, RichTableCell) and (
4027
+ (par_ref := cell.ref.resolve(self).parent) is None
4028
+ or par_ref.resolve(self) != root
4029
+ ):
4030
+ return False
4031
+
4032
+ return True
3958
4033
 
3959
4034
  def iterate_items(
3960
4035
  self,
@@ -3963,7 +4038,7 @@ class DoclingDocument(BaseModel):
3963
4038
  traverse_pictures: bool = False,
3964
4039
  page_no: Optional[int] = None,
3965
4040
  included_content_layers: Optional[set[ContentLayer]] = None,
3966
- _level: int = 0, # fixed parameter, carries through the node nesting level
4041
+ _level: int = 0, # deprecated
3967
4042
  ) -> typing.Iterable[Tuple[NodeItem, int]]: # tuple of node and level
3968
4043
  """Iterate elements with level."""
3969
4044
  for item, stack in self._iterate_items_with_stack(
@@ -5324,7 +5399,9 @@ class DoclingDocument(BaseModel):
5324
5399
  grid.append([])
5325
5400
  for j, cell in enumerate(row):
5326
5401
  if j < 10:
5327
- text = get_text(text=cell.text, max_text_len=16)
5402
+ text = get_text(
5403
+ cell._get_text(doc=self), max_text_len=16
5404
+ )
5328
5405
  grid[-1].append(text)
5329
5406
 
5330
5407
  result.append("\n" + tabulate(grid) + "\n")
@@ -5588,6 +5665,16 @@ class DoclingDocument(BaseModel):
5588
5665
  )
5589
5666
  break
5590
5667
 
5668
+ # update rich table cells references:
5669
+ if isinstance(parent_item, TableItem):
5670
+ for cell in parent_item.data.table_cells:
5671
+ if (
5672
+ isinstance(cell, RichTableCell)
5673
+ and cell.ref.cref == item.self_ref
5674
+ ):
5675
+ cell.ref.cref = new_cref
5676
+ break
5677
+
5591
5678
  elif num_components == 2 and path_components[1] == "body":
5592
5679
  parent_item = self._body
5593
5680
  else:
@@ -5676,6 +5763,18 @@ class DoclingDocument(BaseModel):
5676
5763
  elif isinstance(item, ListItem):
5677
5764
  validate_list_item(self, item)
5678
5765
 
5766
+ def add_table_cell(self, table_item: TableItem, cell: TableCell) -> None:
5767
+ """Add a table cell to the table."""
5768
+ if isinstance(cell, RichTableCell):
5769
+ item = cell.ref.resolve(doc=self)
5770
+ if isinstance(item, NodeItem) and (
5771
+ (not item.parent) or item.parent.cref != table_item.self_ref
5772
+ ):
5773
+ raise ValueError(
5774
+ f"Trying to add cell with another parent {item.parent} to {table_item.self_ref}"
5775
+ )
5776
+ table_item.data.table_cells.append(cell)
5777
+
5679
5778
 
5680
5779
  # deprecated aliases (kept for backwards compatibility):
5681
5780
  BasePictureData = BaseAnnotation
@@ -252,7 +252,7 @@ def docling_document_to_legacy(doc: DoclingDocument, fallback_filaname: str = "f
252
252
 
253
253
  spans = list(_make_spans(cell, item))
254
254
  table_data[i][j] = GlmTableCell(
255
- text=cell.text,
255
+ text=cell._get_text(doc=doc),
256
256
  bbox=(
257
257
  cell.bbox.as_tuple()
258
258
  if cell.bbox is not None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.45.0
3
+ Version: 2.46.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -28,11 +28,11 @@ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZ
28
28
  docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
29
29
  docling_core/transforms/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
30
30
  docling_core/transforms/serializer/base.py,sha256=TI8Epj7gyxdTet9j-Rs4o5U09gfACfAIVoirlschviM,7266
31
- docling_core/transforms/serializer/common.py,sha256=0TNEGoA_rJ-qkVYp-X8SMUr3jTrbf6TRzPzwufYh5JM,19114
32
- docling_core/transforms/serializer/doctags.py,sha256=TD0yAm1qSVy-GsE6svpUAI-Yqjcf2rrTZ3ac9YU3gbE,19858
33
- docling_core/transforms/serializer/html.py,sha256=PPlHVu3_wnc0cD-n6n8v9clCmeY_LPqII7euVYqi6Kk,38089
31
+ docling_core/transforms/serializer/common.py,sha256=RwfdzZ9FRSHQjKM0vskg1CVqar0Z_ms38arSlLAgITc,19150
32
+ docling_core/transforms/serializer/doctags.py,sha256=VXPjAZPhBur7LaEeuqH9k31TgZWSN32lK8z8rJXzFwY,19935
33
+ docling_core/transforms/serializer/html.py,sha256=GRfRaqFIb4FXRMplB4Agl4fSNa5jsHV7P4tBtFMro9I,38453
34
34
  docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
35
- docling_core/transforms/serializer/markdown.py,sha256=VwonuAkuOPmQM7ibDIGvQBHOqhTcTJ_t187fLQQiNPo,23951
35
+ docling_core/transforms/serializer/markdown.py,sha256=hilGM1yWpbbRTjuEjfBRrhavspD5vFF_6SDvlKx8BrM,24230
36
36
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
37
37
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
38
38
  docling_core/transforms/visualizer/key_value_visualizer.py,sha256=fp7nFLy4flOSiavdRgg5y1Mu7WVLIDGh1zEHsq8kgVM,8979
@@ -41,9 +41,9 @@ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao
41
41
  docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
42
42
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
43
43
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
44
- docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
44
+ docling_core/types/doc/__init__.py,sha256=Vsl3oJV3_BLpS7rIwvahhcWOwmEBvj7ZbQzQCCl-IQk,1678
45
45
  docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
46
- docling_core/types/doc/document.py,sha256=gMruWRH1ELYepSMaGA5b8_l9bIjIz7JU4Yh78W_sb00,198193
46
+ docling_core/types/doc/document.py,sha256=Ab-JOc6fkzocXP3PcxPRXJPjLOhOTYo_0571vSr6VXo,202093
47
47
  docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
48
48
  docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
49
49
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -73,12 +73,12 @@ docling_core/utils/alias.py,sha256=B6Lqvss8CbaNARHLR4qSmNh9OkB6LvqTpxfsFmkLAFo,8
73
73
  docling_core/utils/file.py,sha256=CSNclJGL2OwLIc8DQFdoLxr22FUc4_UC7zS6pNrFfkQ,6858
74
74
  docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6LZDtC8,2290
75
75
  docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
76
- docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
76
+ docling_core/utils/legacy.py,sha256=G7ed8fkBpIO8hG3DKEY83cHsrKJHyvDst_1jSdgBXMI,24406
77
77
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
78
78
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
79
- docling_core-2.45.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
- docling_core-2.45.0.dist-info/METADATA,sha256=VX5jfhqswstEumhmLi6VrGD9crC8RKy52z835nCvORw,6453
81
- docling_core-2.45.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
- docling_core-2.45.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
- docling_core-2.45.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
- docling_core-2.45.0.dist-info/RECORD,,
79
+ docling_core-2.46.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
+ docling_core-2.46.0.dist-info/METADATA,sha256=txMHh-7y8N3RiJ_M_HbrsvzRyGPJVXv8UcA6_DpAfok,6453
81
+ docling_core-2.46.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ docling_core-2.46.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
+ docling_core-2.46.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
+ docling_core-2.46.0.dist-info/RECORD,,