docling-core 2.34.2__py3-none-any.whl → 2.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -119,7 +119,10 @@ class LayoutVisualizer(BaseVisualizer):
119
119
  )
120
120
 
121
121
  def _draw_doc_layout(
122
- self, doc: DoclingDocument, images: Optional[dict[Optional[int], Image]] = None
122
+ self,
123
+ doc: DoclingDocument,
124
+ images: Optional[dict[Optional[int], Image]] = None,
125
+ included_content_layers: Optional[set[ContentLayer]] = None,
123
126
  ):
124
127
  """Draw the document clusters and optionaly the reading order."""
125
128
  clusters = []
@@ -128,6 +131,9 @@ class LayoutVisualizer(BaseVisualizer):
128
131
  if images is not None:
129
132
  my_images = images
130
133
 
134
+ if included_content_layers is None:
135
+ included_content_layers = {c for c in ContentLayer}
136
+
131
137
  # Initialise `my_images` beforehand: sometimes, you have the
132
138
  # page-images but no DocItems!
133
139
  for page_nr, page in doc.pages.items():
@@ -141,9 +147,7 @@ class LayoutVisualizer(BaseVisualizer):
141
147
  prev_image = None
142
148
  prev_page_nr = None
143
149
  for idx, (elem, _) in enumerate(
144
- doc.iterate_items(
145
- included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
146
- )
150
+ doc.iterate_items(included_content_layers=included_content_layers)
147
151
  ):
148
152
  if not isinstance(elem, DocItem):
149
153
  continue
@@ -0,0 +1,135 @@
1
+ """Define classes for layout visualization."""
2
+
3
+ import logging
4
+ from copy import deepcopy
5
+ from typing import Optional
6
+
7
+ from PIL import ImageDraw
8
+ from PIL.Image import Image
9
+ from pydantic import BaseModel
10
+ from typing_extensions import override
11
+
12
+ from docling_core.transforms.visualizer.base import BaseVisualizer
13
+ from docling_core.types.doc.document import ContentLayer, DoclingDocument, TableItem
14
+
15
+ _log = logging.getLogger(__name__)
16
+
17
+
18
+ class TableVisualizer(BaseVisualizer):
19
+ """Table visualizer."""
20
+
21
+ class Params(BaseModel):
22
+ """Table visualization parameters."""
23
+
24
+ # show_Label: bool = False
25
+ show_cells: bool = True
26
+ # show_rows: bool = False
27
+ # show_cols: bool = False
28
+
29
+ base_visualizer: Optional[BaseVisualizer] = None
30
+ params: Params = Params()
31
+
32
+ def _draw_table_cells(
33
+ self,
34
+ table: TableItem,
35
+ page_image: Image,
36
+ page_height: float,
37
+ scale_x: float,
38
+ scale_y: float,
39
+ ):
40
+ """Draw individual table cells."""
41
+ draw = ImageDraw.Draw(page_image, "RGBA")
42
+
43
+ for cell in table.data.table_cells:
44
+ if cell.bbox is not None:
45
+
46
+ tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
47
+
48
+ cell_color = (256, 0, 0, 32) # Transparent black for cells
49
+
50
+ cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
51
+ cx0 *= scale_x
52
+ cx1 *= scale_x
53
+ cy0 *= scale_y
54
+ cy1 *= scale_y
55
+
56
+ draw.rectangle(
57
+ [(cx0, cy0), (cx1, cy1)],
58
+ outline=(256, 0, 0, 128),
59
+ fill=cell_color,
60
+ )
61
+
62
+ def _draw_doc_tables(
63
+ self,
64
+ doc: DoclingDocument,
65
+ images: Optional[dict[Optional[int], Image]] = None,
66
+ included_content_layers: Optional[set[ContentLayer]] = None,
67
+ ):
68
+ """Draw the document tables."""
69
+ my_images: dict[Optional[int], Image] = {}
70
+
71
+ if images is not None:
72
+ my_images = images
73
+
74
+ if included_content_layers is None:
75
+ included_content_layers = {c for c in ContentLayer}
76
+
77
+ # Initialise `my_images` beforehand: sometimes, you have the
78
+ # page-images but no DocItems!
79
+ for page_nr, page in doc.pages.items():
80
+ page_image = doc.pages[page_nr].image
81
+ if page_image is None or (pil_img := page_image.pil_image) is None:
82
+ raise RuntimeError("Cannot visualize document without images")
83
+ elif page_nr not in my_images:
84
+ image = deepcopy(pil_img)
85
+ my_images[page_nr] = image
86
+
87
+ for idx, (elem, _) in enumerate(
88
+ doc.iterate_items(included_content_layers=included_content_layers)
89
+ ):
90
+ if not isinstance(elem, TableItem):
91
+ continue
92
+ if len(elem.prov) == 0:
93
+ continue # Skip elements without provenances
94
+
95
+ if len(elem.prov) == 1:
96
+
97
+ page_nr = elem.prov[0].page_no
98
+
99
+ if page_nr in my_images:
100
+ image = my_images[page_nr]
101
+
102
+ if self.params.show_cells:
103
+ self._draw_table_cells(
104
+ table=elem,
105
+ page_height=doc.pages[page_nr].size.height,
106
+ page_image=image,
107
+ scale_x=image.width / doc.pages[page_nr].size.width,
108
+ scale_y=image.height / doc.pages[page_nr].size.height,
109
+ )
110
+
111
+ else:
112
+ raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
113
+
114
+ else:
115
+ _log.error("Can not yet visualise tables with multiple provenances")
116
+
117
+ return my_images
118
+
119
+ @override
120
+ def get_visualization(
121
+ self,
122
+ *,
123
+ doc: DoclingDocument,
124
+ **kwargs,
125
+ ) -> dict[Optional[int], Image]:
126
+ """Get visualization of the document as images by page."""
127
+ base_images = (
128
+ self.base_visualizer.get_visualization(doc=doc, **kwargs)
129
+ if self.base_visualizer
130
+ else None
131
+ )
132
+ return self._draw_doc_tables(
133
+ doc=doc,
134
+ images=base_images,
135
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.34.2
3
+ Version: 2.35.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -34,8 +34,9 @@ docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx9
34
34
  docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
35
35
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
36
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
- docling_core/transforms/visualizer/layout_visualizer.py,sha256=ulXxWGIl69-HMKDPFk_XKgNCgQeDNc969PVt_X0-drA,7823
37
+ docling_core/transforms/visualizer/layout_visualizer.py,sha256=N3SA9sMkg2bEZ_2r52FpwRXcI3EJ2M5P9LYK4Az4jqQ,7968
38
38
  docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=-ej5uLriNtr9C7YBHXMg8sZfB9Uc8cSRr1bJ8FVjpY8,5320
39
+ docling_core/transforms/visualizer/table_visualizer.py,sha256=XlLMSROyRW2UtAjKTltcESSs_rdQNKjO3QvO7ET7uc0,4275
39
40
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
40
41
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
41
42
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
@@ -73,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
73
74
  docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
74
75
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
75
76
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
76
- docling_core-2.34.2.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
- docling_core-2.34.2.dist-info/METADATA,sha256=SZQWH-WCvFQCz39nJqxUjv_t14TaI_WDThYzOki7EHc,6453
78
- docling_core-2.34.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- docling_core-2.34.2.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
80
- docling_core-2.34.2.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
81
- docling_core-2.34.2.dist-info/RECORD,,
77
+ docling_core-2.35.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
+ docling_core-2.35.0.dist-info/METADATA,sha256=Gube58hbnoDQGoeGmaK-yrMulAuKHiw7lUeGKxzSDsc,6453
79
+ docling_core-2.35.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ docling_core-2.35.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
+ docling_core-2.35.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
+ docling_core-2.35.0.dist-info/RECORD,,