docling-core 2.43.1__py3-none-any.whl → 2.44.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -0,0 +1,217 @@
1
+ """Key‑value visualizer overlaying key/value cells and their links on page images.
2
+
3
+ This module complements :py:class:`layout_visualizer.LayoutVisualizer` by drawing
4
+ *key* and *value* cells plus the directed links between them. It can be stacked
5
+ on top of any other :py:class:`BaseVisualizer` – e.g. first draw the general
6
+ layout, then add the key‑value layer.
7
+ """
8
+
9
+ from copy import deepcopy
10
+ from typing import Optional, Union
11
+
12
+ from PIL import ImageDraw, ImageFont
13
+ from PIL.Image import Image
14
+ from PIL.ImageFont import FreeTypeFont
15
+ from pydantic import BaseModel
16
+ from typing_extensions import override
17
+
18
+ from docling_core.transforms.visualizer.base import BaseVisualizer
19
+ from docling_core.types.doc.document import ContentLayer, DoclingDocument
20
+ from docling_core.types.doc.labels import GraphCellLabel, GraphLinkLabel
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Helper functions / constants
24
+ # ---------------------------------------------------------------------------
25
+
26
+ # Semi‑transparent RGBA colours for key / value cells and their connecting link
27
+ _KEY_FILL = (0, 170, 0, 70) # greenish
28
+ _VALUE_FILL = (0, 0, 200, 70) # bluish
29
+ _LINK_COLOUR = (255, 0, 0, 255) # red line (solid)
30
+
31
+ _LABEL_TXT_COLOUR = (0, 0, 0, 255)
32
+ _LABEL_BG_COLOUR = (255, 255, 255, 180) # semi‑transparent white
33
+
34
+
35
+ class KeyValueVisualizer(BaseVisualizer):
36
+ """Draw key/value graphs stored in :py:attr:`DoclingDocument.key_value_items`."""
37
+
38
+ class Params(BaseModel):
39
+ """Parameters for KeyValueVisualizer controlling label and cell id display, and content layers to visualize."""
40
+
41
+ show_label: bool = True # draw cell text close to bbox
42
+ show_cell_id: bool = False # annotate each rectangle with its cell_id
43
+ content_layers: set[ContentLayer] = {cl for cl in ContentLayer}
44
+
45
+ base_visualizer: Optional[BaseVisualizer] = None
46
+ params: Params = Params()
47
+
48
+ # ---------------------------------------------------------------------
49
+ # Internal helpers
50
+ # ---------------------------------------------------------------------
51
+
52
+ def _cell_fill(self, label: GraphCellLabel) -> tuple[int, int, int, int]:
53
+ """Return RGBA fill colour depending on *label*."""
54
+ return _KEY_FILL if label == GraphCellLabel.KEY else _VALUE_FILL
55
+
56
+ def _draw_key_value_layer(
57
+ self,
58
+ *,
59
+ image: Image,
60
+ doc: DoclingDocument,
61
+ page_no: int,
62
+ scale_x: float,
63
+ scale_y: float,
64
+ ) -> None:
65
+ """Draw every key‑value graph that has cells on *page_no* onto *image*."""
66
+ draw = ImageDraw.Draw(image, "RGBA")
67
+ # Choose a small truetype font if available, otherwise default bitmap font
68
+ font: Union[ImageFont.ImageFont, FreeTypeFont]
69
+ try:
70
+ font = ImageFont.truetype("arial.ttf", 12)
71
+ except OSError:
72
+ font = ImageFont.load_default()
73
+
74
+ for kv_item in doc.key_value_items:
75
+ cell_dict = {cell.cell_id: cell for cell in kv_item.graph.cells}
76
+
77
+ # ------------------------------------------------------------------
78
+ # First draw cells (rectangles + optional labels)
79
+ # ------------------------------------------------------------------
80
+ for cell in cell_dict.values():
81
+ if cell.prov is None or cell.prov.page_no != page_no:
82
+ continue # skip cells not on this page or without bbox
83
+
84
+ tl_bbox = cell.prov.bbox.to_top_left_origin(
85
+ page_height=doc.pages[page_no].size.height
86
+ )
87
+ x0, y0, x1, y1 = tl_bbox.as_tuple()
88
+ x0 *= scale_x
89
+ x1 *= scale_x
90
+ y0 *= scale_y
91
+ y1 *= scale_y
92
+ fill_rgba = self._cell_fill(cell.label)
93
+
94
+ draw.rectangle(
95
+ [(x0, y0), (x1, y1)],
96
+ outline=fill_rgba[:-1] + (255,),
97
+ fill=fill_rgba,
98
+ )
99
+
100
+ if self.params.show_label:
101
+ txt_parts = []
102
+ if self.params.show_cell_id:
103
+ txt_parts.append(str(cell.cell_id))
104
+ txt_parts.append(cell.text)
105
+ label_text = " | ".join(txt_parts)
106
+
107
+ tbx = draw.textbbox((x0, y0), label_text, font=font)
108
+ pad = 2
109
+ draw.rectangle(
110
+ [(tbx[0] - pad, tbx[1] - pad), (tbx[2] + pad, tbx[3] + pad)],
111
+ fill=_LABEL_BG_COLOUR,
112
+ )
113
+ draw.text((x0, y0), label_text, font=font, fill=_LABEL_TXT_COLOUR)
114
+
115
+ # ------------------------------------------------------------------
116
+ # Then draw links (after rectangles so they appear on top)
117
+ # ------------------------------------------------------------------
118
+ for link in kv_item.graph.links:
119
+ if link.label != GraphLinkLabel.TO_VALUE:
120
+ # Future‑proof: ignore other link types silently
121
+ continue
122
+
123
+ src_cell = cell_dict.get(link.source_cell_id)
124
+ tgt_cell = cell_dict.get(link.target_cell_id)
125
+ if src_cell is None or tgt_cell is None:
126
+ continue
127
+ if (
128
+ src_cell.prov is None
129
+ or tgt_cell.prov is None
130
+ or src_cell.prov.page_no != page_no
131
+ or tgt_cell.prov.page_no != page_no
132
+ ):
133
+ continue # only draw if both ends are on this page
134
+
135
+ def _centre(bbox):
136
+ tl = bbox.to_top_left_origin(
137
+ page_height=doc.pages[page_no].size.height
138
+ )
139
+ l, t, r, b = tl.as_tuple()
140
+ return ((l + r) / 2 * scale_x, (t + b) / 2 * scale_y)
141
+
142
+ src_xy = _centre(src_cell.prov.bbox)
143
+ tgt_xy = _centre(tgt_cell.prov.bbox)
144
+
145
+ draw.line([src_xy, tgt_xy], fill=_LINK_COLOUR, width=2)
146
+
147
+ # draw a small arrow‑head by rendering a short orthogonal line
148
+ # segment; exact geometry is not critical for visual inspection
149
+ arrow_len = 6
150
+ dx = tgt_xy[0] - src_xy[0]
151
+ dy = tgt_xy[1] - src_xy[1]
152
+ length = (dx**2 + dy**2) ** 0.5 or 1.0
153
+ ux, uy = dx / length, dy / length
154
+ # perpendicular vector
155
+ px, py = -uy, ux
156
+ # two points forming the arrow head triangle base
157
+ head_base_left = (
158
+ tgt_xy[0] - ux * arrow_len - px * arrow_len / 2,
159
+ tgt_xy[1] - uy * arrow_len - py * arrow_len / 2,
160
+ )
161
+ head_base_right = (
162
+ tgt_xy[0] - ux * arrow_len + px * arrow_len / 2,
163
+ tgt_xy[1] - uy * arrow_len + py * arrow_len / 2,
164
+ )
165
+ draw.polygon(
166
+ [tgt_xy, head_base_left, head_base_right], fill=_LINK_COLOUR
167
+ )
168
+
169
+ # ---------------------------------------------------------------------
170
+ # Public API – BaseVisualizer implementation
171
+ # ---------------------------------------------------------------------
172
+
173
+ @override
174
+ def get_visualization(
175
+ self,
176
+ *,
177
+ doc: DoclingDocument,
178
+ included_content_layers: Optional[set[ContentLayer]] = None,
179
+ **kwargs,
180
+ ) -> dict[Optional[int], Image]:
181
+ """Return page‑wise images with key/value overlay (incl. base layer)."""
182
+ base_images = (
183
+ self.base_visualizer.get_visualization(
184
+ doc=doc, included_content_layers=included_content_layers, **kwargs
185
+ )
186
+ if self.base_visualizer
187
+ else None
188
+ )
189
+
190
+ if included_content_layers is None:
191
+ included_content_layers = {cl for cl in ContentLayer}
192
+
193
+ images: dict[Optional[int], Image] = {}
194
+
195
+ # Ensure we have page images to draw on
196
+ for page_nr, page in doc.pages.items():
197
+ base_img = (base_images or {}).get(page_nr)
198
+ if base_img is None:
199
+ if page.image is None or (pil_img := page.image.pil_image) is None:
200
+ raise RuntimeError("Cannot visualize document without page images")
201
+ base_img = deepcopy(pil_img)
202
+ images[page_nr] = base_img
203
+
204
+ # Overlay key‑value content
205
+ for page_nr, img in images.items(): # type: ignore
206
+ assert isinstance(page_nr, int)
207
+ scale_x = img.width / doc.pages[page_nr].size.width
208
+ scale_y = img.height / doc.pages[page_nr].size.height
209
+ self._draw_key_value_layer(
210
+ image=img,
211
+ doc=doc,
212
+ page_no=page_nr,
213
+ scale_x=scale_x,
214
+ scale_y=scale_y,
215
+ )
216
+
217
+ return images
@@ -1373,11 +1373,12 @@ class PictureItem(FloatingItem):
1373
1373
  ) # Encode to Base64 and decode to string
1374
1374
  return img_base64
1375
1375
 
1376
- def _image_to_hexhash(self) -> Optional[str]:
1376
+ @staticmethod
1377
+ def _image_to_hexhash(img: Optional[PILImage.Image]) -> Optional[str]:
1377
1378
  """Hexash from the image."""
1378
- if self.image is not None and self.image._pil is not None:
1379
+ if img is not None:
1379
1380
  # Convert the image to raw bytes
1380
- image_bytes = self.image._pil.tobytes()
1381
+ image_bytes = img.tobytes()
1381
1382
 
1382
1383
  # Create a hash object (e.g., SHA-256)
1383
1384
  hasher = hashlib.sha256(usedforsecurity=False)
@@ -4116,16 +4117,10 @@ class DoclingDocument(BaseModel):
4116
4117
  if image_dir.is_dir():
4117
4118
  for item, level in result.iterate_items(page_no=page_no, with_groups=False):
4118
4119
  if isinstance(item, PictureItem):
4120
+ img = item.get_image(doc=self)
4121
+ if img is not None:
4119
4122
 
4120
- if (
4121
- item.image is not None
4122
- and isinstance(item.image.uri, AnyUrl)
4123
- and item.image.uri.scheme == "data"
4124
- and item.image.pil_image is not None
4125
- ):
4126
- img = item.image.pil_image
4127
-
4128
- hexhash = item._image_to_hexhash()
4123
+ hexhash = PictureItem._image_to_hexhash(img)
4129
4124
 
4130
4125
  # loc_path = image_dir / f"image_{img_count:06}.png"
4131
4126
  if hexhash is not None:
@@ -4140,6 +4135,11 @@ class DoclingDocument(BaseModel):
4140
4135
  else:
4141
4136
  obj_path = loc_path
4142
4137
 
4138
+ if item.image is None:
4139
+ scale = img.size[0] / item.prov[0].bbox.width
4140
+ item.image = ImageRef.from_pil(
4141
+ image=img, dpi=round(72 * scale)
4142
+ )
4143
4143
  item.image.uri = Path(obj_path)
4144
4144
 
4145
4145
  # if item.image._pil is not None:
@@ -4539,6 +4539,8 @@ class DoclingDocument(BaseModel):
4539
4539
  reference_path = None
4540
4540
  else:
4541
4541
  reference_path = filename.parent
4542
+ artifacts_dir = reference_path / artifacts_dir
4543
+
4542
4544
  return artifacts_dir, reference_path
4543
4545
 
4544
4546
  def _make_copy_with_refmode(
@@ -5543,8 +5545,27 @@ class DoclingDocument(BaseModel):
5543
5545
  self,
5544
5546
  show_label: bool = True,
5545
5547
  show_branch_numbering: bool = False,
5548
+ viz_mode: Literal["reading_order", "key_value"] = "reading_order",
5549
+ show_cell_id: bool = False,
5546
5550
  ) -> dict[Optional[int], PILImage.Image]:
5547
- """Get visualization of the document as images by page."""
5551
+ """Get visualization of the document as images by page.
5552
+
5553
+ :param show_label: Show labels on elements (applies to all visualizers).
5554
+ :type show_label: bool
5555
+ :param show_branch_numbering: Show branch numbering (reading order visualizer only).
5556
+ :type show_branch_numbering: bool
5557
+ :param visualizer: Which visualizer to use. One of 'reading_order' (default), 'key_value'.
5558
+ :type visualizer: str
5559
+ :param show_cell_id: Show cell IDs (key value visualizer only).
5560
+ :type show_cell_id: bool
5561
+
5562
+ :returns: Dictionary mapping page numbers to PIL images.
5563
+ :rtype: dict[Optional[int], PILImage.Image]
5564
+ """
5565
+ from docling_core.transforms.visualizer.base import BaseVisualizer
5566
+ from docling_core.transforms.visualizer.key_value_visualizer import (
5567
+ KeyValueVisualizer,
5568
+ )
5548
5569
  from docling_core.transforms.visualizer.layout_visualizer import (
5549
5570
  LayoutVisualizer,
5550
5571
  )
@@ -5552,18 +5573,34 @@ class DoclingDocument(BaseModel):
5552
5573
  ReadingOrderVisualizer,
5553
5574
  )
5554
5575
 
5555
- visualizer = ReadingOrderVisualizer(
5556
- base_visualizer=LayoutVisualizer(
5557
- params=LayoutVisualizer.Params(
5576
+ visualizer_obj: BaseVisualizer
5577
+ if viz_mode == "reading_order":
5578
+ visualizer_obj = ReadingOrderVisualizer(
5579
+ base_visualizer=LayoutVisualizer(
5580
+ params=LayoutVisualizer.Params(
5581
+ show_label=show_label,
5582
+ ),
5583
+ ),
5584
+ params=ReadingOrderVisualizer.Params(
5585
+ show_branch_numbering=show_branch_numbering,
5586
+ ),
5587
+ )
5588
+ elif viz_mode == "key_value":
5589
+ visualizer_obj = KeyValueVisualizer(
5590
+ base_visualizer=LayoutVisualizer(
5591
+ params=LayoutVisualizer.Params(
5592
+ show_label=show_label,
5593
+ ),
5594
+ ),
5595
+ params=KeyValueVisualizer.Params(
5558
5596
  show_label=show_label,
5597
+ show_cell_id=show_cell_id,
5559
5598
  ),
5560
- ),
5561
- params=ReadingOrderVisualizer.Params(
5562
- show_branch_numbering=show_branch_numbering,
5563
- ),
5564
- )
5565
- images = visualizer.get_visualization(doc=self)
5599
+ )
5600
+ else:
5601
+ raise ValueError(f"Unknown visualization mode: {viz_mode}")
5566
5602
 
5603
+ images = visualizer_obj.get_visualization(doc=self)
5567
5604
  return images
5568
5605
 
5569
5606
  @field_validator("version")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.43.1
3
+ Version: 2.44.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -35,6 +35,7 @@ docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx9
35
35
  docling_core/transforms/serializer/markdown.py,sha256=VwonuAkuOPmQM7ibDIGvQBHOqhTcTJ_t187fLQQiNPo,23951
36
36
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
37
37
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
38
+ docling_core/transforms/visualizer/key_value_visualizer.py,sha256=fp7nFLy4flOSiavdRgg5y1Mu7WVLIDGh1zEHsq8kgVM,8979
38
39
  docling_core/transforms/visualizer/layout_visualizer.py,sha256=k93ORWxA1oTDNkNxyqlzqB2aunxBX3aq4e4ihdJzRkE,8089
39
40
  docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao39X3Dut0934NAjU3I4v3JN5VzzdjmoGRY,7776
40
41
  docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
@@ -42,7 +43,7 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
42
43
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
43
44
  docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
44
45
  docling_core/types/doc/base.py,sha256=i98y4IF250adR-8BSS374K90fwfwG-vBfWh14tLC5Cs,15906
45
- docling_core/types/doc/document.py,sha256=SUqIJ-huO3ELLRdCMUYjkkXHeGGkeY2oaOWFQ1nq5lg,199315
46
+ docling_core/types/doc/document.py,sha256=-cL4eGFRbQHgXAsCG8zALxAx-IoanvkqG5E1zvKOMxI,201012
46
47
  docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
47
48
  docling_core/types/doc/page.py,sha256=35h1xdtCM3-AaN8Dim9jDseZIiw-3GxpB-ofF-H2rQQ,41878
48
49
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
@@ -75,9 +76,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
75
76
  docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
76
77
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
77
78
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
78
- docling_core-2.43.1.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
79
- docling_core-2.43.1.dist-info/METADATA,sha256=uVJIJpT7DuKgWIYmdqxgTf2Av_5dB2Da9ZfcPiHqbPc,6453
80
- docling_core-2.43.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
81
- docling_core-2.43.1.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
82
- docling_core-2.43.1.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
83
- docling_core-2.43.1.dist-info/RECORD,,
79
+ docling_core-2.44.1.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
80
+ docling_core-2.44.1.dist-info/METADATA,sha256=NtybqGuK3bjSWq_AadW7B-pydco80WLkFYwWaWyaTb0,6453
81
+ docling_core-2.44.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ docling_core-2.44.1.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
83
+ docling_core-2.44.1.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
84
+ docling_core-2.44.1.dist-info/RECORD,,