docling-core 2.34.2__tar.gz → 2.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (107) hide show
  1. {docling_core-2.34.2 → docling_core-2.36.0}/PKG-INFO +1 -1
  2. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/visualizer/layout_visualizer.py +9 -4
  3. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/visualizer/reading_order_visualizer.py +11 -1
  4. docling_core-2.36.0/docling_core/transforms/visualizer/table_visualizer.py +135 -0
  5. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/document.py +27 -2
  6. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/labels.py +6 -2
  7. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/PKG-INFO +1 -1
  8. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/SOURCES.txt +1 -0
  9. {docling_core-2.34.2 → docling_core-2.36.0}/pyproject.toml +1 -1
  10. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_visualization.py +14 -0
  11. {docling_core-2.34.2 → docling_core-2.36.0}/LICENSE +0 -0
  12. {docling_core-2.34.2 → docling_core-2.36.0}/README.md +0 -0
  13. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/__init__.py +0 -0
  14. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/cli/__init__.py +0 -0
  15. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/cli/view.py +0 -0
  16. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/experimental/__init__.py +0 -0
  17. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/py.typed +0 -0
  18. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
  19. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
  20. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  21. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
  22. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  23. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  24. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  25. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  26. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/search/__init__.py +0 -0
  27. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  28. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/search/mapping.py +0 -0
  29. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/search/meta.py +0 -0
  30. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/search/package.py +0 -0
  31. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/__init__.py +0 -0
  32. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/__init__.py +0 -0
  33. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/base.py +0 -0
  34. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  35. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  36. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  37. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  38. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  39. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  40. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/__init__.py +0 -0
  41. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/base.py +0 -0
  42. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/common.py +0 -0
  43. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/doctags.py +0 -0
  44. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/html.py +0 -0
  45. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/html_styles.py +0 -0
  46. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/serializer/markdown.py +0 -0
  47. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/visualizer/__init__.py +0 -0
  48. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/transforms/visualizer/base.py +0 -0
  49. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/__init__.py +0 -0
  50. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/base.py +0 -0
  51. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/__init__.py +0 -0
  52. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/base.py +0 -0
  53. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/page.py +0 -0
  54. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/tokens.py +0 -0
  55. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/doc/utils.py +0 -0
  56. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/gen/__init__.py +0 -0
  57. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/gen/generic.py +0 -0
  58. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/io/__init__.py +0 -0
  59. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/__init__.py +0 -0
  60. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/base.py +0 -0
  61. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  62. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  63. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  64. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/document.py +0 -0
  65. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/legacy_doc/tokens.py +0 -0
  66. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/nlp/__init__.py +0 -0
  67. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/nlp/qa.py +0 -0
  68. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/nlp/qa_labels.py +0 -0
  69. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/__init__.py +0 -0
  70. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/attribute.py +0 -0
  71. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/base.py +0 -0
  72. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/predicate.py +0 -0
  73. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/record.py +0 -0
  74. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/statement.py +0 -0
  75. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/types/rec/subject.py +0 -0
  76. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/__init__.py +0 -0
  77. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/alias.py +0 -0
  78. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/file.py +0 -0
  79. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/generate_docs.py +0 -0
  80. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/generate_jsonschema.py +0 -0
  81. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/legacy.py +0 -0
  82. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/validate.py +0 -0
  83. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core/utils/validators.py +0 -0
  84. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/dependency_links.txt +0 -0
  85. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/entry_points.txt +0 -0
  86. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/requires.txt +0 -0
  87. {docling_core-2.34.2 → docling_core-2.36.0}/docling_core.egg-info/top_level.txt +0 -0
  88. {docling_core-2.34.2 → docling_core-2.36.0}/setup.cfg +0 -0
  89. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_base.py +0 -0
  90. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_collection.py +0 -0
  91. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_data_gen_flag.py +0 -0
  92. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_doc_base.py +0 -0
  93. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_doc_legacy_convert.py +0 -0
  94. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_doc_schema.py +0 -0
  95. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_doc_schema_extractor.py +0 -0
  96. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_docling_doc.py +0 -0
  97. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_doctags_load.py +0 -0
  98. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_hierarchical_chunker.py +0 -0
  99. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_hybrid_chunker.py +0 -0
  100. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_json_schema_to_search_mapper.py +0 -0
  101. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_nlp_qa.py +0 -0
  102. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_otsl_table_export.py +0 -0
  103. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_page.py +0 -0
  104. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_rec_schema.py +0 -0
  105. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_search_meta.py +0 -0
  106. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_serialization.py +0 -0
  107. {docling_core-2.34.2 → docling_core-2.36.0}/test/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.34.2
3
+ Version: 2.36.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -40,6 +40,7 @@ class LayoutVisualizer(BaseVisualizer):
40
40
  """Layout visualization parameters."""
41
41
 
42
42
  show_label: bool = True
43
+ content_layers: set[ContentLayer] = {cl for cl in ContentLayer}
43
44
 
44
45
  base_visualizer: Optional[BaseVisualizer] = None
45
46
  params: Params = Params()
@@ -119,7 +120,10 @@ class LayoutVisualizer(BaseVisualizer):
119
120
  )
120
121
 
121
122
  def _draw_doc_layout(
122
- self, doc: DoclingDocument, images: Optional[dict[Optional[int], Image]] = None
123
+ self,
124
+ doc: DoclingDocument,
125
+ images: Optional[dict[Optional[int], Image]] = None,
126
+ included_content_layers: Optional[set[ContentLayer]] = None,
123
127
  ):
124
128
  """Draw the document clusters and optionaly the reading order."""
125
129
  clusters = []
@@ -128,6 +132,9 @@ class LayoutVisualizer(BaseVisualizer):
128
132
  if images is not None:
129
133
  my_images = images
130
134
 
135
+ if included_content_layers is None:
136
+ included_content_layers = {c for c in ContentLayer}
137
+
131
138
  # Initialise `my_images` beforehand: sometimes, you have the
132
139
  # page-images but no DocItems!
133
140
  for page_nr, page in doc.pages.items():
@@ -141,9 +148,7 @@ class LayoutVisualizer(BaseVisualizer):
141
148
  prev_image = None
142
149
  prev_page_nr = None
143
150
  for idx, (elem, _) in enumerate(
144
- doc.iterate_items(
145
- included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
146
- )
151
+ doc.iterate_items(included_content_layers=included_content_layers)
147
152
  ):
148
153
  if not isinstance(elem, DocItem):
149
154
  continue
@@ -5,6 +5,7 @@ from typing import Optional
5
5
 
6
6
  from PIL import ImageDraw
7
7
  from PIL.Image import Image
8
+ from pydantic import BaseModel
8
9
  from typing_extensions import override
9
10
 
10
11
  from docling_core.transforms.visualizer.base import BaseVisualizer
@@ -14,7 +15,16 @@ from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocume
14
15
  class ReadingOrderVisualizer(BaseVisualizer):
15
16
  """Reading order visualizer."""
16
17
 
18
+ class Params(BaseModel):
19
+ """Layout visualization parameters."""
20
+
21
+ show_label: bool = True
22
+ content_layers: set[ContentLayer] = {
23
+ cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
24
+ }
25
+
17
26
  base_visualizer: Optional[BaseVisualizer] = None
27
+ params: Params = Params()
18
28
 
19
29
  def _draw_arrow(
20
30
  self,
@@ -71,7 +81,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
71
81
  my_images: dict[Optional[int], Image] = images or {}
72
82
  prev_page = None
73
83
  for elem, _ in doc.iterate_items(
74
- included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE},
84
+ included_content_layers=self.params.content_layers,
75
85
  ):
76
86
  if not isinstance(elem, DocItem):
77
87
  continue
@@ -0,0 +1,135 @@
1
+ """Define classes for layout visualization."""
2
+
3
+ import logging
4
+ from copy import deepcopy
5
+ from typing import Optional
6
+
7
+ from PIL import ImageDraw
8
+ from PIL.Image import Image
9
+ from pydantic import BaseModel
10
+ from typing_extensions import override
11
+
12
+ from docling_core.transforms.visualizer.base import BaseVisualizer
13
+ from docling_core.types.doc.document import ContentLayer, DoclingDocument, TableItem
14
+
15
+ _log = logging.getLogger(__name__)
16
+
17
+
18
+ class TableVisualizer(BaseVisualizer):
19
+ """Table visualizer."""
20
+
21
+ class Params(BaseModel):
22
+ """Table visualization parameters."""
23
+
24
+ # show_Label: bool = False
25
+ show_cells: bool = True
26
+ # show_rows: bool = False
27
+ # show_cols: bool = False
28
+
29
+ base_visualizer: Optional[BaseVisualizer] = None
30
+ params: Params = Params()
31
+
32
+ def _draw_table_cells(
33
+ self,
34
+ table: TableItem,
35
+ page_image: Image,
36
+ page_height: float,
37
+ scale_x: float,
38
+ scale_y: float,
39
+ ):
40
+ """Draw individual table cells."""
41
+ draw = ImageDraw.Draw(page_image, "RGBA")
42
+
43
+ for cell in table.data.table_cells:
44
+ if cell.bbox is not None:
45
+
46
+ tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
47
+
48
+ cell_color = (256, 0, 0, 32) # Transparent black for cells
49
+
50
+ cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
51
+ cx0 *= scale_x
52
+ cx1 *= scale_x
53
+ cy0 *= scale_y
54
+ cy1 *= scale_y
55
+
56
+ draw.rectangle(
57
+ [(cx0, cy0), (cx1, cy1)],
58
+ outline=(256, 0, 0, 128),
59
+ fill=cell_color,
60
+ )
61
+
62
+ def _draw_doc_tables(
63
+ self,
64
+ doc: DoclingDocument,
65
+ images: Optional[dict[Optional[int], Image]] = None,
66
+ included_content_layers: Optional[set[ContentLayer]] = None,
67
+ ):
68
+ """Draw the document tables."""
69
+ my_images: dict[Optional[int], Image] = {}
70
+
71
+ if images is not None:
72
+ my_images = images
73
+
74
+ if included_content_layers is None:
75
+ included_content_layers = {c for c in ContentLayer}
76
+
77
+ # Initialise `my_images` beforehand: sometimes, you have the
78
+ # page-images but no DocItems!
79
+ for page_nr, page in doc.pages.items():
80
+ page_image = doc.pages[page_nr].image
81
+ if page_image is None or (pil_img := page_image.pil_image) is None:
82
+ raise RuntimeError("Cannot visualize document without images")
83
+ elif page_nr not in my_images:
84
+ image = deepcopy(pil_img)
85
+ my_images[page_nr] = image
86
+
87
+ for idx, (elem, _) in enumerate(
88
+ doc.iterate_items(included_content_layers=included_content_layers)
89
+ ):
90
+ if not isinstance(elem, TableItem):
91
+ continue
92
+ if len(elem.prov) == 0:
93
+ continue # Skip elements without provenances
94
+
95
+ if len(elem.prov) == 1:
96
+
97
+ page_nr = elem.prov[0].page_no
98
+
99
+ if page_nr in my_images:
100
+ image = my_images[page_nr]
101
+
102
+ if self.params.show_cells:
103
+ self._draw_table_cells(
104
+ table=elem,
105
+ page_height=doc.pages[page_nr].size.height,
106
+ page_image=image,
107
+ scale_x=image.width / doc.pages[page_nr].size.width,
108
+ scale_y=image.height / doc.pages[page_nr].size.height,
109
+ )
110
+
111
+ else:
112
+ raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
113
+
114
+ else:
115
+ _log.error("Can not yet visualise tables with multiple provenances")
116
+
117
+ return my_images
118
+
119
+ @override
120
+ def get_visualization(
121
+ self,
122
+ *,
123
+ doc: DoclingDocument,
124
+ **kwargs,
125
+ ) -> dict[Optional[int], Image]:
126
+ """Get visualization of the document as images by page."""
127
+ base_images = (
128
+ self.base_visualizer.get_visualization(doc=doc, **kwargs)
129
+ if self.base_visualizer
130
+ else None
131
+ )
132
+ return self._draw_doc_tables(
133
+ doc=doc,
134
+ images=base_images,
135
+ )
@@ -623,6 +623,7 @@ class ContentLayer(str, Enum):
623
623
 
624
624
  BODY = "body"
625
625
  FURNITURE = "furniture"
626
+ BACKGROUND = "background"
626
627
 
627
628
 
628
629
  DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
@@ -860,6 +861,7 @@ class TextItem(DocItem):
860
861
  DocItemLabel.PARAGRAPH,
861
862
  DocItemLabel.REFERENCE,
862
863
  DocItemLabel.TEXT,
864
+ DocItemLabel.EMPTY_VALUE,
863
865
  ]
864
866
 
865
867
  orig: str # untreated representation
@@ -2867,23 +2869,46 @@ class DoclingDocument(BaseModel):
2867
2869
 
2868
2870
  def print_element_tree(self):
2869
2871
  """Print_element_tree."""
2870
- for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2872
+ for ix, (item, level) in enumerate(
2873
+ self.iterate_items(
2874
+ with_groups=True,
2875
+ traverse_pictures=True,
2876
+ included_content_layers={cl for cl in ContentLayer},
2877
+ )
2878
+ ):
2871
2879
  if isinstance(item, GroupItem):
2872
2880
  print(
2873
2881
  " " * level,
2874
2882
  f"{ix}: {item.label.value} with name={item.name}",
2875
2883
  )
2884
+ elif isinstance(item, TextItem):
2885
+ print(
2886
+ " " * level,
2887
+ f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}",
2888
+ )
2889
+
2876
2890
  elif isinstance(item, DocItem):
2877
2891
  print(" " * level, f"{ix}: {item.label.value}")
2878
2892
 
2879
2893
  def export_to_element_tree(self) -> str:
2880
2894
  """Export_to_element_tree."""
2881
2895
  texts = []
2882
- for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2896
+ for ix, (item, level) in enumerate(
2897
+ self.iterate_items(
2898
+ with_groups=True,
2899
+ traverse_pictures=True,
2900
+ included_content_layers={cl for cl in ContentLayer},
2901
+ )
2902
+ ):
2883
2903
  if isinstance(item, GroupItem):
2884
2904
  texts.append(
2885
2905
  " " * level + f"{ix}: {item.label.value} with name={item.name}"
2886
2906
  )
2907
+ elif isinstance(item, TextItem):
2908
+ texts.append(
2909
+ " " * level
2910
+ + f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}"
2911
+ )
2887
2912
  elif isinstance(item, DocItem):
2888
2913
  texts.append(" " * level + f"{ix}: {item.label.value}")
2889
2914
 
@@ -27,6 +27,9 @@ class DocItemLabel(str, Enum):
27
27
  KEY_VALUE_REGION = "key_value_region"
28
28
  GRADING_SCALE = "grading_scale" # for elements in forms, questionaires representing a grading scale
29
29
  # e.g. [strongly disagree | ... | ... | strongly agree]
30
+ # e.g. ★★☆☆☆
31
+ HANDWRITTEN_TEXT = "handwritten_text"
32
+ EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
30
33
 
31
34
  # Additional labels for markup-based formats (e.g. HTML, Word)
32
35
  PARAGRAPH = "paragraph"
@@ -60,6 +63,9 @@ class DocItemLabel(str, Enum):
60
63
  DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),
61
64
  DocItemLabel.PARAGRAPH: (255, 255, 153),
62
65
  DocItemLabel.REFERENCE: (176, 224, 230),
66
+ DocItemLabel.GRADING_SCALE: (255, 204, 204),
67
+ DocItemLabel.HANDWRITTEN_TEXT: (204, 255, 204),
68
+ DocItemLabel.EMPTY_VALUE: (220, 220, 220),
63
69
  }
64
70
  return color_map.get(label, (0, 0, 0))
65
71
 
@@ -166,7 +172,6 @@ class GraphCellLabel(str, Enum):
166
172
  KEY = "key" # used to designate a key (label) of a key-value element
167
173
  VALUE = "value" # Data value with or without explicit Key, but filled in,
168
174
  # e.g. telephone number, address, quantity, name, date
169
- EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
170
175
  CHECKBOX = "checkbox"
171
176
 
172
177
  def __str__(self):
@@ -179,7 +184,6 @@ class GraphCellLabel(str, Enum):
179
184
  color_map = {
180
185
  GraphCellLabel.KEY: (255, 0, 0),
181
186
  GraphCellLabel.VALUE: (0, 255, 0),
182
- GraphCellLabel.EMPTY_VALUE: (0, 0, 255),
183
187
  }
184
188
  return color_map.get(label, (0, 0, 0))
185
189
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.34.2
3
+ Version: 2.36.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -45,6 +45,7 @@ docling_core/transforms/visualizer/__init__.py
45
45
  docling_core/transforms/visualizer/base.py
46
46
  docling_core/transforms/visualizer/layout_visualizer.py
47
47
  docling_core/transforms/visualizer/reading_order_visualizer.py
48
+ docling_core/transforms/visualizer/table_visualizer.py
48
49
  docling_core/types/__init__.py
49
50
  docling_core/types/base.py
50
51
  docling_core/types/doc/__init__.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.34.2" # DO NOT EDIT, updated automatically
3
+ version = "2.36.0" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -2,6 +2,7 @@ from pathlib import Path
2
2
 
3
3
  import PIL.Image
4
4
 
5
+ from docling_core.transforms.visualizer.table_visualizer import TableVisualizer
5
6
  from docling_core.types.doc.document import DoclingDocument
6
7
 
7
8
  from .test_data_gen_flag import GEN_TEST_DATA
@@ -52,3 +53,16 @@ def test_doc_visualization_no_label():
52
53
  exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_wout_lbl_p{k}.png",
53
54
  actual=viz_pages[k],
54
55
  )
56
+
57
+
58
+ def test_table_visualization_no_label():
59
+ src = Path("./test/data/doc/2408.09869v3_enriched.json")
60
+ doc = DoclingDocument.load_from_json(src)
61
+
62
+ visualizer = TableVisualizer()
63
+ viz_pages = visualizer.get_visualization(doc=doc)
64
+
65
+ verify(
66
+ exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_table_viz_wout_lbl_p5.png",
67
+ actual=viz_pages[5],
68
+ )
File without changes
File without changes
File without changes