docling-core 2.35.0__py3-none-any.whl → 2.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -40,6 +40,7 @@ class LayoutVisualizer(BaseVisualizer):
40
40
  """Layout visualization parameters."""
41
41
 
42
42
  show_label: bool = True
43
+ content_layers: set[ContentLayer] = {cl for cl in ContentLayer}
43
44
 
44
45
  base_visualizer: Optional[BaseVisualizer] = None
45
46
  params: Params = Params()
@@ -5,6 +5,7 @@ from typing import Optional
5
5
 
6
6
  from PIL import ImageDraw
7
7
  from PIL.Image import Image
8
+ from pydantic import BaseModel
8
9
  from typing_extensions import override
9
10
 
10
11
  from docling_core.transforms.visualizer.base import BaseVisualizer
@@ -14,7 +15,16 @@ from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocume
14
15
  class ReadingOrderVisualizer(BaseVisualizer):
15
16
  """Reading order visualizer."""
16
17
 
18
+ class Params(BaseModel):
19
+ """Layout visualization parameters."""
20
+
21
+ show_label: bool = True
22
+ content_layers: set[ContentLayer] = {
23
+ cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
24
+ }
25
+
17
26
  base_visualizer: Optional[BaseVisualizer] = None
27
+ params: Params = Params()
18
28
 
19
29
  def _draw_arrow(
20
30
  self,
@@ -71,7 +81,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
71
81
  my_images: dict[Optional[int], Image] = images or {}
72
82
  prev_page = None
73
83
  for elem, _ in doc.iterate_items(
74
- included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE},
84
+ included_content_layers=self.params.content_layers,
75
85
  ):
76
86
  if not isinstance(elem, DocItem):
77
87
  continue
@@ -623,6 +623,7 @@ class ContentLayer(str, Enum):
623
623
 
624
624
  BODY = "body"
625
625
  FURNITURE = "furniture"
626
+ BACKGROUND = "background"
626
627
 
627
628
 
628
629
  DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
@@ -860,6 +861,7 @@ class TextItem(DocItem):
860
861
  DocItemLabel.PARAGRAPH,
861
862
  DocItemLabel.REFERENCE,
862
863
  DocItemLabel.TEXT,
864
+ DocItemLabel.EMPTY_VALUE,
863
865
  ]
864
866
 
865
867
  orig: str # untreated representation
@@ -2867,23 +2869,46 @@ class DoclingDocument(BaseModel):
2867
2869
 
2868
2870
  def print_element_tree(self):
2869
2871
  """Print_element_tree."""
2870
- for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2872
+ for ix, (item, level) in enumerate(
2873
+ self.iterate_items(
2874
+ with_groups=True,
2875
+ traverse_pictures=True,
2876
+ included_content_layers={cl for cl in ContentLayer},
2877
+ )
2878
+ ):
2871
2879
  if isinstance(item, GroupItem):
2872
2880
  print(
2873
2881
  " " * level,
2874
2882
  f"{ix}: {item.label.value} with name={item.name}",
2875
2883
  )
2884
+ elif isinstance(item, TextItem):
2885
+ print(
2886
+ " " * level,
2887
+ f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}",
2888
+ )
2889
+
2876
2890
  elif isinstance(item, DocItem):
2877
2891
  print(" " * level, f"{ix}: {item.label.value}")
2878
2892
 
2879
2893
  def export_to_element_tree(self) -> str:
2880
2894
  """Export_to_element_tree."""
2881
2895
  texts = []
2882
- for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2896
+ for ix, (item, level) in enumerate(
2897
+ self.iterate_items(
2898
+ with_groups=True,
2899
+ traverse_pictures=True,
2900
+ included_content_layers={cl for cl in ContentLayer},
2901
+ )
2902
+ ):
2883
2903
  if isinstance(item, GroupItem):
2884
2904
  texts.append(
2885
2905
  " " * level + f"{ix}: {item.label.value} with name={item.name}"
2886
2906
  )
2907
+ elif isinstance(item, TextItem):
2908
+ texts.append(
2909
+ " " * level
2910
+ + f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}"
2911
+ )
2887
2912
  elif isinstance(item, DocItem):
2888
2913
  texts.append(" " * level + f"{ix}: {item.label.value}")
2889
2914
 
@@ -27,6 +27,9 @@ class DocItemLabel(str, Enum):
27
27
  KEY_VALUE_REGION = "key_value_region"
28
28
  GRADING_SCALE = "grading_scale" # for elements in forms, questionaires representing a grading scale
29
29
  # e.g. [strongly disagree | ... | ... | strongly agree]
30
+ # e.g. ★★☆☆☆
31
+ HANDWRITTEN_TEXT = "handwritten_text"
32
+ EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
30
33
 
31
34
  # Additional labels for markup-based formats (e.g. HTML, Word)
32
35
  PARAGRAPH = "paragraph"
@@ -60,6 +63,9 @@ class DocItemLabel(str, Enum):
60
63
  DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),
61
64
  DocItemLabel.PARAGRAPH: (255, 255, 153),
62
65
  DocItemLabel.REFERENCE: (176, 224, 230),
66
+ DocItemLabel.GRADING_SCALE: (255, 204, 204),
67
+ DocItemLabel.HANDWRITTEN_TEXT: (204, 255, 204),
68
+ DocItemLabel.EMPTY_VALUE: (220, 220, 220),
63
69
  }
64
70
  return color_map.get(label, (0, 0, 0))
65
71
 
@@ -166,7 +172,6 @@ class GraphCellLabel(str, Enum):
166
172
  KEY = "key" # used to designate a key (label) of a key-value element
167
173
  VALUE = "value" # Data value with or without explicit Key, but filled in,
168
174
  # e.g. telephone number, address, quantity, name, date
169
- EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
170
175
  CHECKBOX = "checkbox"
171
176
 
172
177
  def __str__(self):
@@ -179,7 +184,6 @@ class GraphCellLabel(str, Enum):
179
184
  color_map = {
180
185
  GraphCellLabel.KEY: (255, 0, 0),
181
186
  GraphCellLabel.VALUE: (0, 255, 0),
182
- GraphCellLabel.EMPTY_VALUE: (0, 0, 255),
183
187
  }
184
188
  return color_map.get(label, (0, 0, 0))
185
189
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.35.0
3
+ Version: 2.36.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -34,15 +34,15 @@ docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx9
34
34
  docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
35
35
  docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
36
  docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
- docling_core/transforms/visualizer/layout_visualizer.py,sha256=N3SA9sMkg2bEZ_2r52FpwRXcI3EJ2M5P9LYK4Az4jqQ,7968
38
- docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=-ej5uLriNtr9C7YBHXMg8sZfB9Uc8cSRr1bJ8FVjpY8,5320
37
+ docling_core/transforms/visualizer/layout_visualizer.py,sha256=hpq7OnyBgGxt3iW3_aNy9KH_0kmKdgoiJIFPcA2SSHU,8040
38
+ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=yBra_W33bb16BxrTqP-ABu5NfRplTEJgu3dKdew3zKA,5601
39
39
  docling_core/transforms/visualizer/table_visualizer.py,sha256=XlLMSROyRW2UtAjKTltcESSs_rdQNKjO3QvO7ET7uc0,4275
40
40
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
41
41
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
42
42
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
43
43
  docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
44
- docling_core/types/doc/document.py,sha256=UMQ7eSfXM0l2QPBpc4fg67OlYaWEJsgWqke_fk9jCUw,148549
45
- docling_core/types/doc/labels.py,sha256=vp4h3e7AmBvezRmgrfuPehjAHTZOufphErLB4ENhdME,7171
44
+ docling_core/types/doc/document.py,sha256=elFR5J7O9FUWXiweNK2W7S-cPvAakdzkMls0Uh4ViU8,149361
45
+ docling_core/types/doc/labels.py,sha256=JiciRK7_DOkebsrfQ6PVCvS__TsKgWn1ANk84BeB14k,7359
46
46
  docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
47
47
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
48
48
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
74
74
  docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
75
75
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
76
76
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
77
- docling_core-2.35.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
- docling_core-2.35.0.dist-info/METADATA,sha256=Gube58hbnoDQGoeGmaK-yrMulAuKHiw7lUeGKxzSDsc,6453
79
- docling_core-2.35.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- docling_core-2.35.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
- docling_core-2.35.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
- docling_core-2.35.0.dist-info/RECORD,,
77
+ docling_core-2.36.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
+ docling_core-2.36.0.dist-info/METADATA,sha256=8CnZkQHylNT1mgEEs_lIB18f2NL96R3kFAl-rBYVR0U,6453
79
+ docling_core-2.36.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ docling_core-2.36.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
+ docling_core-2.36.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
+ docling_core-2.36.0.dist-info/RECORD,,