docling-core 2.36.0__tar.gz → 2.37.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (107) hide show
  1. {docling_core-2.36.0 → docling_core-2.37.0}/PKG-INFO +1 -1
  2. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/html.py +1 -1
  3. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/visualizer/table_visualizer.py +109 -4
  4. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/document.py +114 -1
  5. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/PKG-INFO +1 -1
  6. {docling_core-2.36.0 → docling_core-2.37.0}/pyproject.toml +1 -1
  7. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_visualization.py +17 -1
  8. {docling_core-2.36.0 → docling_core-2.37.0}/LICENSE +0 -0
  9. {docling_core-2.36.0 → docling_core-2.37.0}/README.md +0 -0
  10. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/__init__.py +0 -0
  11. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/cli/__init__.py +0 -0
  12. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/cli/view.py +0 -0
  13. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/experimental/__init__.py +0 -0
  14. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/py.typed +0 -0
  15. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
  16. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
  17. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  18. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
  19. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  20. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  21. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  22. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  23. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/search/__init__.py +0 -0
  24. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  25. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/search/mapping.py +0 -0
  26. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/search/meta.py +0 -0
  27. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/search/package.py +0 -0
  28. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/__init__.py +0 -0
  29. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/__init__.py +0 -0
  30. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/base.py +0 -0
  31. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  32. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  33. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  34. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  35. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  36. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  37. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/__init__.py +0 -0
  38. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/base.py +0 -0
  39. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/common.py +0 -0
  40. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/doctags.py +0 -0
  41. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/html_styles.py +0 -0
  42. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/serializer/markdown.py +0 -0
  43. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/visualizer/__init__.py +0 -0
  44. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/visualizer/base.py +0 -0
  45. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  46. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  47. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/__init__.py +0 -0
  48. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/base.py +0 -0
  49. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/__init__.py +0 -0
  50. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/base.py +0 -0
  51. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/labels.py +0 -0
  52. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/page.py +0 -0
  53. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/tokens.py +0 -0
  54. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/doc/utils.py +0 -0
  55. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/gen/__init__.py +0 -0
  56. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/gen/generic.py +0 -0
  57. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/io/__init__.py +0 -0
  58. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/__init__.py +0 -0
  59. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/base.py +0 -0
  60. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  61. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  62. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  63. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/document.py +0 -0
  64. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/legacy_doc/tokens.py +0 -0
  65. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/nlp/__init__.py +0 -0
  66. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/nlp/qa.py +0 -0
  67. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/nlp/qa_labels.py +0 -0
  68. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/__init__.py +0 -0
  69. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/attribute.py +0 -0
  70. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/base.py +0 -0
  71. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/predicate.py +0 -0
  72. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/record.py +0 -0
  73. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/statement.py +0 -0
  74. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/types/rec/subject.py +0 -0
  75. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/__init__.py +0 -0
  76. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/alias.py +0 -0
  77. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/file.py +0 -0
  78. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/generate_docs.py +0 -0
  79. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/generate_jsonschema.py +0 -0
  80. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/legacy.py +0 -0
  81. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/validate.py +0 -0
  82. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core/utils/validators.py +0 -0
  83. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/SOURCES.txt +0 -0
  84. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/dependency_links.txt +0 -0
  85. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/entry_points.txt +0 -0
  86. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/requires.txt +0 -0
  87. {docling_core-2.36.0 → docling_core-2.37.0}/docling_core.egg-info/top_level.txt +0 -0
  88. {docling_core-2.36.0 → docling_core-2.37.0}/setup.cfg +0 -0
  89. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_base.py +0 -0
  90. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_collection.py +0 -0
  91. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_data_gen_flag.py +0 -0
  92. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_doc_base.py +0 -0
  93. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_doc_legacy_convert.py +0 -0
  94. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_doc_schema.py +0 -0
  95. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_doc_schema_extractor.py +0 -0
  96. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_docling_doc.py +0 -0
  97. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_doctags_load.py +0 -0
  98. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_hierarchical_chunker.py +0 -0
  99. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_hybrid_chunker.py +0 -0
  100. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_json_schema_to_search_mapper.py +0 -0
  101. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_nlp_qa.py +0 -0
  102. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_otsl_table_export.py +0 -0
  103. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_page.py +0 -0
  104. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_rec_schema.py +0 -0
  105. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_search_meta.py +0 -0
  106. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_serialization.py +0 -0
  107. {docling_core-2.36.0 → docling_core-2.37.0}/test/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.36.0
3
+ Version: 2.37.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -340,7 +340,7 @@ class HTMLTableSerializer(BaseTableSerializer):
340
340
 
341
341
  content = html.escape(cell.text.strip())
342
342
  celltag = "td"
343
- if cell.column_header:
343
+ if cell.column_header or cell.row_header or cell.row_section:
344
344
  celltag = "th"
345
345
 
346
346
  opening_tag = f"{celltag}"
@@ -23,8 +23,23 @@ class TableVisualizer(BaseVisualizer):
23
23
 
24
24
  # show_Label: bool = False
25
25
  show_cells: bool = True
26
- # show_rows: bool = False
27
- # show_cols: bool = False
26
+ show_rows: bool = False
27
+ show_cols: bool = False
28
+
29
+ cell_color: tuple[int, int, int, int] = (256, 0, 0, 32)
30
+ cell_outline: tuple[int, int, int, int] = (256, 0, 0, 128)
31
+
32
+ row_color: tuple[int, int, int, int] = (256, 0, 0, 32)
33
+ row_outline: tuple[int, int, int, int] = (256, 0, 0, 128)
34
+
35
+ row_header_color: tuple[int, int, int, int] = (0, 256, 0, 32)
36
+ row_header_outline: tuple[int, int, int, int] = (0, 256, 0, 128)
37
+
38
+ col_color: tuple[int, int, int, int] = (0, 256, 0, 32)
39
+ col_outline: tuple[int, int, int, int] = (0, 256, 0, 128)
40
+
41
+ col_header_color: tuple[int, int, int, int] = (0, 0, 256, 32)
42
+ col_header_outline: tuple[int, int, int, int] = (0, 0, 256, 128)
28
43
 
29
44
  base_visualizer: Optional[BaseVisualizer] = None
30
45
  params: Params = Params()
@@ -45,7 +60,21 @@ class TableVisualizer(BaseVisualizer):
45
60
 
46
61
  tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
47
62
 
48
- cell_color = (256, 0, 0, 32) # Transparent black for cells
63
+ cell_color = self.params.cell_color # Transparent black for cells
64
+ cell_outline = self.params.cell_outline
65
+ if cell.column_header:
66
+ cell_color = (
67
+ self.params.col_header_color
68
+ ) # Transparent black for cells
69
+ cell_outline = self.params.col_header_outline
70
+ if cell.row_header:
71
+ cell_color = (
72
+ self.params.row_header_color
73
+ ) # Transparent black for cells
74
+ cell_outline = self.params.row_header_outline
75
+ if cell.row_section:
76
+ cell_color = self.params.row_header_color
77
+ cell_outline = self.params.row_header_outline
49
78
 
50
79
  cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
51
80
  cx0 *= scale_x
@@ -55,10 +84,68 @@ class TableVisualizer(BaseVisualizer):
55
84
 
56
85
  draw.rectangle(
57
86
  [(cx0, cy0), (cx1, cy1)],
58
- outline=(256, 0, 0, 128),
87
+ outline=cell_outline,
59
88
  fill=cell_color,
60
89
  )
61
90
 
91
+ def _draw_table_rows(
92
+ self,
93
+ table: TableItem,
94
+ page_image: Image,
95
+ page_height: float,
96
+ scale_x: float,
97
+ scale_y: float,
98
+ ):
99
+ """Draw individual table cells."""
100
+ draw = ImageDraw.Draw(page_image, "RGBA")
101
+
102
+ rows = table.data.get_row_bounding_boxes()
103
+
104
+ for rid, bbox in rows.items():
105
+
106
+ tl_bbox = bbox.to_top_left_origin(page_height=page_height)
107
+
108
+ cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
109
+ cx0 *= scale_x
110
+ cx1 *= scale_x
111
+ cy0 *= scale_y
112
+ cy1 *= scale_y
113
+
114
+ draw.rectangle(
115
+ [(cx0, cy0), (cx1, cy1)],
116
+ outline=self.params.row_outline,
117
+ fill=self.params.row_color,
118
+ )
119
+
120
+ def _draw_table_cols(
121
+ self,
122
+ table: TableItem,
123
+ page_image: Image,
124
+ page_height: float,
125
+ scale_x: float,
126
+ scale_y: float,
127
+ ):
128
+ """Draw individual table cells."""
129
+ draw = ImageDraw.Draw(page_image, "RGBA")
130
+
131
+ cols = table.data.get_column_bounding_boxes()
132
+
133
+ for cid, bbox in cols.items():
134
+
135
+ tl_bbox = bbox.to_top_left_origin(page_height=page_height)
136
+
137
+ cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
138
+ cx0 *= scale_x
139
+ cx1 *= scale_x
140
+ cy0 *= scale_y
141
+ cy1 *= scale_y
142
+
143
+ draw.rectangle(
144
+ [(cx0, cy0), (cx1, cy1)],
145
+ outline=self.params.col_outline,
146
+ fill=self.params.col_color,
147
+ )
148
+
62
149
  def _draw_doc_tables(
63
150
  self,
64
151
  doc: DoclingDocument,
@@ -108,6 +195,24 @@ class TableVisualizer(BaseVisualizer):
108
195
  scale_y=image.height / doc.pages[page_nr].size.height,
109
196
  )
110
197
 
198
+ if self.params.show_rows:
199
+ self._draw_table_rows(
200
+ table=elem,
201
+ page_height=doc.pages[page_nr].size.height,
202
+ page_image=image,
203
+ scale_x=image.width / doc.pages[page_nr].size.width,
204
+ scale_y=image.height / doc.pages[page_nr].size.height,
205
+ )
206
+
207
+ if self.params.show_cols:
208
+ self._draw_table_cols(
209
+ table=elem,
210
+ page_height=doc.pages[page_nr].size.height,
211
+ page_image=image,
212
+ scale_x=image.width / doc.pages[page_nr].size.width,
213
+ scale_y=image.height / doc.pages[page_nr].size.height,
214
+ )
215
+
111
216
  else:
112
217
  raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
113
218
 
@@ -38,7 +38,7 @@ from typing_extensions import Annotated, Self, deprecated
38
38
  from docling_core.search.package import VERSION_PATTERN
39
39
  from docling_core.types.base import _JSON_POINTER_REGEX
40
40
  from docling_core.types.doc import BoundingBox, Size
41
- from docling_core.types.doc.base import ImageRefMode
41
+ from docling_core.types.doc.base import CoordOrigin, ImageRefMode
42
42
  from docling_core.types.doc.labels import (
43
43
  CodeLanguageLabel,
44
44
  DocItemLabel,
@@ -372,6 +372,119 @@ class TableData(BaseModel): # TBD
372
372
 
373
373
  return table_data
374
374
 
375
+ def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
376
+ """Get the minimal bounding box for each row in the table.
377
+
378
+ Returns:
379
+ List[Optional[BoundingBox]]: A list where each element is the minimal
380
+ bounding box that encompasses all cells in that row, or None if no
381
+ cells in the row have bounding boxes.
382
+ """
383
+ coords = []
384
+ for cell in self.table_cells:
385
+ if cell.bbox is not None:
386
+ coords.append(cell.bbox.coord_origin)
387
+
388
+ if len(set(coords)) > 1:
389
+ raise ValueError(
390
+ "All bounding boxes must have the same \
391
+ CoordOrigin to compute their union."
392
+ )
393
+
394
+ row_bboxes: dict[int, BoundingBox] = {}
395
+
396
+ for row_idx in range(self.num_rows):
397
+ row_cells_with_bbox: dict[int, list[BoundingBox]] = {}
398
+
399
+ # Collect all cells in this row that have bounding boxes
400
+ for cell in self.table_cells:
401
+
402
+ if (
403
+ cell.bbox is not None
404
+ and cell.start_row_offset_idx <= row_idx < cell.end_row_offset_idx
405
+ ):
406
+
407
+ row_span = cell.end_row_offset_idx - cell.start_row_offset_idx
408
+ if row_span in row_cells_with_bbox:
409
+ row_cells_with_bbox[row_span].append(cell.bbox)
410
+ else:
411
+ row_cells_with_bbox[row_span] = [cell.bbox]
412
+
413
+ # Calculate the enclosing bounding box for this row
414
+ if len(row_cells_with_bbox) > 0:
415
+ min_row_span = min(row_cells_with_bbox.keys())
416
+ row_bbox: BoundingBox = BoundingBox.enclosing_bbox(
417
+ row_cells_with_bbox[min_row_span]
418
+ )
419
+
420
+ for rspan, bboxs in row_cells_with_bbox.items():
421
+ for bbox in bboxs:
422
+ row_bbox.l = min(row_bbox.l, bbox.l)
423
+ row_bbox.r = max(row_bbox.r, bbox.r)
424
+
425
+ row_bboxes[row_idx] = row_bbox
426
+
427
+ return row_bboxes
428
+
429
+ def get_column_bounding_boxes(self) -> dict[int, BoundingBox]:
430
+ """Get the minimal bounding box for each column in the table.
431
+
432
+ Returns:
433
+ List[Optional[BoundingBox]]: A list where each element is the minimal
434
+ bounding box that encompasses all cells in that column, or None if no
435
+ cells in the column have bounding boxes.
436
+ """
437
+ coords = []
438
+ for cell in self.table_cells:
439
+ if cell.bbox is not None:
440
+ coords.append(cell.bbox.coord_origin)
441
+
442
+ if len(set(coords)) > 1:
443
+ raise ValueError(
444
+ "All bounding boxes must have the same \
445
+ CoordOrigin to compute their union."
446
+ )
447
+
448
+ col_bboxes: dict[int, BoundingBox] = {}
449
+
450
+ for col_idx in range(self.num_cols):
451
+ col_cells_with_bbox: dict[int, list[BoundingBox]] = {}
452
+
453
+ # Collect all cells in this row that have bounding boxes
454
+ for cell in self.table_cells:
455
+
456
+ if (
457
+ cell.bbox is not None
458
+ and cell.start_col_offset_idx <= col_idx < cell.end_col_offset_idx
459
+ ):
460
+
461
+ col_span = cell.end_col_offset_idx - cell.start_col_offset_idx
462
+ if col_span in col_cells_with_bbox:
463
+ col_cells_with_bbox[col_span].append(cell.bbox)
464
+ else:
465
+ col_cells_with_bbox[col_span] = [cell.bbox]
466
+
467
+ # Calculate the enclosing bounding box for this row
468
+ if len(col_cells_with_bbox) > 0:
469
+ min_col_span = min(col_cells_with_bbox.keys())
470
+ col_bbox: BoundingBox = BoundingBox.enclosing_bbox(
471
+ col_cells_with_bbox[min_col_span]
472
+ )
473
+
474
+ for rspan, bboxs in col_cells_with_bbox.items():
475
+ for bbox in bboxs:
476
+ if bbox.coord_origin == CoordOrigin.TOPLEFT:
477
+ col_bbox.b = max(col_bbox.b, bbox.b)
478
+ col_bbox.t = min(col_bbox.t, bbox.t)
479
+
480
+ elif bbox.coord_origin == CoordOrigin.BOTTOMLEFT:
481
+ col_bbox.b = min(col_bbox.b, bbox.b)
482
+ col_bbox.t = max(col_bbox.t, bbox.t)
483
+
484
+ col_bboxes[col_idx] = col_bbox
485
+
486
+ return col_bboxes
487
+
375
488
 
376
489
  class PictureTabularChartData(PictureChartData):
377
490
  """Base class for picture chart data.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.36.0
3
+ Version: 2.37.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.36.0" # DO NOT EDIT, updated automatically
3
+ version = "2.37.0" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -55,7 +55,7 @@ def test_doc_visualization_no_label():
55
55
  )
56
56
 
57
57
 
58
- def test_table_visualization_no_label():
58
+ def test_table_visualization_for_cells():
59
59
  src = Path("./test/data/doc/2408.09869v3_enriched.json")
60
60
  doc = DoclingDocument.load_from_json(src)
61
61
 
@@ -66,3 +66,19 @@ def test_table_visualization_no_label():
66
66
  exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_table_viz_wout_lbl_p5.png",
67
67
  actual=viz_pages[5],
68
68
  )
69
+
70
+
71
+ def test_table_visualization_for_rows_and_cols():
72
+ src = Path("./test/data/doc/2408.09869v3_enriched.json")
73
+ doc = DoclingDocument.load_from_json(src)
74
+
75
+ visualizer = TableVisualizer(
76
+ params=TableVisualizer.Params(show_cells=False, show_rows=True, show_cols=True)
77
+ )
78
+ viz_pages = visualizer.get_visualization(doc=doc)
79
+
80
+ verify(
81
+ exp_file=VIZ_TEST_DATA_PATH
82
+ / f"{src.stem}_table_viz_wout_lbl_p5_rows_and_cols.png",
83
+ actual=viz_pages[5],
84
+ )
File without changes
File without changes
File without changes