docling-core 2.35.0__py3-none-any.whl → 2.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/transforms/visualizer/layout_visualizer.py +1 -0
- docling_core/transforms/visualizer/reading_order_visualizer.py +11 -1
- docling_core/types/doc/document.py +27 -2
- docling_core/types/doc/labels.py +6 -2
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/METADATA +1 -1
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/RECORD +10 -10
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/WHEEL +0 -0
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/entry_points.txt +0 -0
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.35.0.dist-info → docling_core-2.36.0.dist-info}/top_level.txt +0 -0
|
@@ -40,6 +40,7 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
40
40
|
"""Layout visualization parameters."""
|
|
41
41
|
|
|
42
42
|
show_label: bool = True
|
|
43
|
+
content_layers: set[ContentLayer] = {cl for cl in ContentLayer}
|
|
43
44
|
|
|
44
45
|
base_visualizer: Optional[BaseVisualizer] = None
|
|
45
46
|
params: Params = Params()
|
|
@@ -5,6 +5,7 @@ from typing import Optional
|
|
|
5
5
|
|
|
6
6
|
from PIL import ImageDraw
|
|
7
7
|
from PIL.Image import Image
|
|
8
|
+
from pydantic import BaseModel
|
|
8
9
|
from typing_extensions import override
|
|
9
10
|
|
|
10
11
|
from docling_core.transforms.visualizer.base import BaseVisualizer
|
|
@@ -14,7 +15,16 @@ from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocume
|
|
|
14
15
|
class ReadingOrderVisualizer(BaseVisualizer):
|
|
15
16
|
"""Reading order visualizer."""
|
|
16
17
|
|
|
18
|
+
class Params(BaseModel):
|
|
19
|
+
"""Layout visualization parameters."""
|
|
20
|
+
|
|
21
|
+
show_label: bool = True
|
|
22
|
+
content_layers: set[ContentLayer] = {
|
|
23
|
+
cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
|
|
24
|
+
}
|
|
25
|
+
|
|
17
26
|
base_visualizer: Optional[BaseVisualizer] = None
|
|
27
|
+
params: Params = Params()
|
|
18
28
|
|
|
19
29
|
def _draw_arrow(
|
|
20
30
|
self,
|
|
@@ -71,7 +81,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
71
81
|
my_images: dict[Optional[int], Image] = images or {}
|
|
72
82
|
prev_page = None
|
|
73
83
|
for elem, _ in doc.iterate_items(
|
|
74
|
-
included_content_layers=
|
|
84
|
+
included_content_layers=self.params.content_layers,
|
|
75
85
|
):
|
|
76
86
|
if not isinstance(elem, DocItem):
|
|
77
87
|
continue
|
|
@@ -623,6 +623,7 @@ class ContentLayer(str, Enum):
|
|
|
623
623
|
|
|
624
624
|
BODY = "body"
|
|
625
625
|
FURNITURE = "furniture"
|
|
626
|
+
BACKGROUND = "background"
|
|
626
627
|
|
|
627
628
|
|
|
628
629
|
DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
|
|
@@ -860,6 +861,7 @@ class TextItem(DocItem):
|
|
|
860
861
|
DocItemLabel.PARAGRAPH,
|
|
861
862
|
DocItemLabel.REFERENCE,
|
|
862
863
|
DocItemLabel.TEXT,
|
|
864
|
+
DocItemLabel.EMPTY_VALUE,
|
|
863
865
|
]
|
|
864
866
|
|
|
865
867
|
orig: str # untreated representation
|
|
@@ -2867,23 +2869,46 @@ class DoclingDocument(BaseModel):
|
|
|
2867
2869
|
|
|
2868
2870
|
def print_element_tree(self):
|
|
2869
2871
|
"""Print_element_tree."""
|
|
2870
|
-
for ix, (item, level) in enumerate(
|
|
2872
|
+
for ix, (item, level) in enumerate(
|
|
2873
|
+
self.iterate_items(
|
|
2874
|
+
with_groups=True,
|
|
2875
|
+
traverse_pictures=True,
|
|
2876
|
+
included_content_layers={cl for cl in ContentLayer},
|
|
2877
|
+
)
|
|
2878
|
+
):
|
|
2871
2879
|
if isinstance(item, GroupItem):
|
|
2872
2880
|
print(
|
|
2873
2881
|
" " * level,
|
|
2874
2882
|
f"{ix}: {item.label.value} with name={item.name}",
|
|
2875
2883
|
)
|
|
2884
|
+
elif isinstance(item, TextItem):
|
|
2885
|
+
print(
|
|
2886
|
+
" " * level,
|
|
2887
|
+
f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}",
|
|
2888
|
+
)
|
|
2889
|
+
|
|
2876
2890
|
elif isinstance(item, DocItem):
|
|
2877
2891
|
print(" " * level, f"{ix}: {item.label.value}")
|
|
2878
2892
|
|
|
2879
2893
|
def export_to_element_tree(self) -> str:
|
|
2880
2894
|
"""Export_to_element_tree."""
|
|
2881
2895
|
texts = []
|
|
2882
|
-
for ix, (item, level) in enumerate(
|
|
2896
|
+
for ix, (item, level) in enumerate(
|
|
2897
|
+
self.iterate_items(
|
|
2898
|
+
with_groups=True,
|
|
2899
|
+
traverse_pictures=True,
|
|
2900
|
+
included_content_layers={cl for cl in ContentLayer},
|
|
2901
|
+
)
|
|
2902
|
+
):
|
|
2883
2903
|
if isinstance(item, GroupItem):
|
|
2884
2904
|
texts.append(
|
|
2885
2905
|
" " * level + f"{ix}: {item.label.value} with name={item.name}"
|
|
2886
2906
|
)
|
|
2907
|
+
elif isinstance(item, TextItem):
|
|
2908
|
+
texts.append(
|
|
2909
|
+
" " * level
|
|
2910
|
+
+ f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}"
|
|
2911
|
+
)
|
|
2887
2912
|
elif isinstance(item, DocItem):
|
|
2888
2913
|
texts.append(" " * level + f"{ix}: {item.label.value}")
|
|
2889
2914
|
|
docling_core/types/doc/labels.py
CHANGED
|
@@ -27,6 +27,9 @@ class DocItemLabel(str, Enum):
|
|
|
27
27
|
KEY_VALUE_REGION = "key_value_region"
|
|
28
28
|
GRADING_SCALE = "grading_scale" # for elements in forms, questionaires representing a grading scale
|
|
29
29
|
# e.g. [strongly disagree | ... | ... | strongly agree]
|
|
30
|
+
# e.g. ★★☆☆☆
|
|
31
|
+
HANDWRITTEN_TEXT = "handwritten_text"
|
|
32
|
+
EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
|
|
30
33
|
|
|
31
34
|
# Additional labels for markup-based formats (e.g. HTML, Word)
|
|
32
35
|
PARAGRAPH = "paragraph"
|
|
@@ -60,6 +63,9 @@ class DocItemLabel(str, Enum):
|
|
|
60
63
|
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),
|
|
61
64
|
DocItemLabel.PARAGRAPH: (255, 255, 153),
|
|
62
65
|
DocItemLabel.REFERENCE: (176, 224, 230),
|
|
66
|
+
DocItemLabel.GRADING_SCALE: (255, 204, 204),
|
|
67
|
+
DocItemLabel.HANDWRITTEN_TEXT: (204, 255, 204),
|
|
68
|
+
DocItemLabel.EMPTY_VALUE: (220, 220, 220),
|
|
63
69
|
}
|
|
64
70
|
return color_map.get(label, (0, 0, 0))
|
|
65
71
|
|
|
@@ -166,7 +172,6 @@ class GraphCellLabel(str, Enum):
|
|
|
166
172
|
KEY = "key" # used to designate a key (label) of a key-value element
|
|
167
173
|
VALUE = "value" # Data value with or without explicit Key, but filled in,
|
|
168
174
|
# e.g. telephone number, address, quantity, name, date
|
|
169
|
-
EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
|
|
170
175
|
CHECKBOX = "checkbox"
|
|
171
176
|
|
|
172
177
|
def __str__(self):
|
|
@@ -179,7 +184,6 @@ class GraphCellLabel(str, Enum):
|
|
|
179
184
|
color_map = {
|
|
180
185
|
GraphCellLabel.KEY: (255, 0, 0),
|
|
181
186
|
GraphCellLabel.VALUE: (0, 255, 0),
|
|
182
|
-
GraphCellLabel.EMPTY_VALUE: (0, 0, 255),
|
|
183
187
|
}
|
|
184
188
|
return color_map.get(label, (0, 0, 0))
|
|
185
189
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.36.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -34,15 +34,15 @@ docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx9
|
|
|
34
34
|
docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
|
|
35
35
|
docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
|
|
36
36
|
docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
|
|
37
|
-
docling_core/transforms/visualizer/layout_visualizer.py,sha256=
|
|
38
|
-
docling_core/transforms/visualizer/reading_order_visualizer.py,sha256
|
|
37
|
+
docling_core/transforms/visualizer/layout_visualizer.py,sha256=hpq7OnyBgGxt3iW3_aNy9KH_0kmKdgoiJIFPcA2SSHU,8040
|
|
38
|
+
docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=yBra_W33bb16BxrTqP-ABu5NfRplTEJgu3dKdew3zKA,5601
|
|
39
39
|
docling_core/transforms/visualizer/table_visualizer.py,sha256=XlLMSROyRW2UtAjKTltcESSs_rdQNKjO3QvO7ET7uc0,4275
|
|
40
40
|
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
41
41
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
42
42
|
docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
|
|
43
43
|
docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
|
|
44
|
-
docling_core/types/doc/document.py,sha256=
|
|
45
|
-
docling_core/types/doc/labels.py,sha256=
|
|
44
|
+
docling_core/types/doc/document.py,sha256=elFR5J7O9FUWXiweNK2W7S-cPvAakdzkMls0Uh4ViU8,149361
|
|
45
|
+
docling_core/types/doc/labels.py,sha256=JiciRK7_DOkebsrfQ6PVCvS__TsKgWn1ANk84BeB14k,7359
|
|
46
46
|
docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
|
|
47
47
|
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
48
48
|
docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
|
|
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
74
74
|
docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
|
|
75
75
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
76
76
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
77
|
-
docling_core-2.
|
|
78
|
-
docling_core-2.
|
|
79
|
-
docling_core-2.
|
|
80
|
-
docling_core-2.
|
|
81
|
-
docling_core-2.
|
|
82
|
-
docling_core-2.
|
|
77
|
+
docling_core-2.36.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
78
|
+
docling_core-2.36.0.dist-info/METADATA,sha256=8CnZkQHylNT1mgEEs_lIB18f2NL96R3kFAl-rBYVR0U,6453
|
|
79
|
+
docling_core-2.36.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
80
|
+
docling_core-2.36.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
|
|
81
|
+
docling_core-2.36.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
|
|
82
|
+
docling_core-2.36.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|