docling-core 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/document.py +162 -3
- docling_core/types/doc/labels.py +12 -0
- {docling_core-2.2.2.dist-info → docling_core-2.3.0.dist-info}/METADATA +1 -1
- {docling_core-2.2.2.dist-info → docling_core-2.3.0.dist-info}/RECORD +7 -7
- {docling_core-2.2.2.dist-info → docling_core-2.3.0.dist-info}/LICENSE +0 -0
- {docling_core-2.2.2.dist-info → docling_core-2.3.0.dist-info}/WHEEL +0 -0
- {docling_core-2.2.2.dist-info → docling_core-2.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -4,6 +4,7 @@ import base64
|
|
|
4
4
|
import mimetypes
|
|
5
5
|
import re
|
|
6
6
|
import sys
|
|
7
|
+
import textwrap
|
|
7
8
|
import typing
|
|
8
9
|
from io import BytesIO
|
|
9
10
|
from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
|
|
@@ -99,12 +100,166 @@ class PictureMiscData(BaseModel):
|
|
|
99
100
|
content: Dict[str, Any]
|
|
100
101
|
|
|
101
102
|
|
|
103
|
+
class ChartLine(BaseModel):
|
|
104
|
+
"""Represents a line in a line chart.
|
|
105
|
+
|
|
106
|
+
Attributes:
|
|
107
|
+
label (str): The label for the line.
|
|
108
|
+
values (List[Tuple[float, float]]): A list of (x, y) coordinate pairs
|
|
109
|
+
representing the line's data points.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
label: str
|
|
113
|
+
values: List[Tuple[float, float]]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ChartBar(BaseModel):
|
|
117
|
+
"""Represents a bar in a bar chart.
|
|
118
|
+
|
|
119
|
+
Attributes:
|
|
120
|
+
label (str): The label for the bar.
|
|
121
|
+
values (float): The value associated with the bar.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
label: str
|
|
125
|
+
values: float
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ChartStackedBar(BaseModel):
|
|
129
|
+
"""Represents a stacked bar in a stacked bar chart.
|
|
130
|
+
|
|
131
|
+
Attributes:
|
|
132
|
+
label (List[str]): The labels for the stacked bars. Multiple values are stored
|
|
133
|
+
in cases where the chart is "double stacked," meaning bars are stacked both
|
|
134
|
+
horizontally and vertically.
|
|
135
|
+
values (List[Tuple[str, int]]): A list of values representing different segments
|
|
136
|
+
of the stacked bar along with their label.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
label: List[str]
|
|
140
|
+
values: List[Tuple[str, int]]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class ChartSlice(BaseModel):
|
|
144
|
+
"""Represents a slice in a pie chart.
|
|
145
|
+
|
|
146
|
+
Attributes:
|
|
147
|
+
label (str): The label for the slice.
|
|
148
|
+
value (float): The value represented by the slice.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
label: str
|
|
152
|
+
value: float
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class ChartPoint(BaseModel):
|
|
156
|
+
"""Represents a point in a scatter chart.
|
|
157
|
+
|
|
158
|
+
Attributes:
|
|
159
|
+
value (Tuple[float, float]): A (x, y) coordinate pair representing a point in a
|
|
160
|
+
chart.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
value: Tuple[float, float]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class PictureChartData(BaseModel):
|
|
167
|
+
"""Base class for picture chart data.
|
|
168
|
+
|
|
169
|
+
Attributes:
|
|
170
|
+
title (str): The title of the chart.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
title: str
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class PictureLineChartData(PictureChartData):
|
|
177
|
+
"""Represents data of a line chart.
|
|
178
|
+
|
|
179
|
+
Attributes:
|
|
180
|
+
kind (Literal["line_chart_data"]): The type of the chart.
|
|
181
|
+
x_axis_label (str): The label for the x-axis.
|
|
182
|
+
y_axis_label (str): The label for the y-axis.
|
|
183
|
+
lines (List[ChartLine]): A list of lines in the chart.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
kind: Literal["line_chart_data"] = "line_chart_data"
|
|
187
|
+
x_axis_label: str
|
|
188
|
+
y_axis_label: str
|
|
189
|
+
lines: List[ChartLine]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class PictureBarChartData(PictureChartData):
|
|
193
|
+
"""Represents data of a bar chart.
|
|
194
|
+
|
|
195
|
+
Attributes:
|
|
196
|
+
kind (Literal["bar_chart_data"]): The type of the chart.
|
|
197
|
+
x_axis_label (str): The label for the x-axis.
|
|
198
|
+
y_axis_label (str): The label for the y-axis.
|
|
199
|
+
bars (List[ChartBar]): A list of bars in the chart.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
kind: Literal["bar_chart_data"] = "bar_chart_data"
|
|
203
|
+
x_axis_label: str
|
|
204
|
+
y_axis_label: str
|
|
205
|
+
bars: List[ChartBar]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class PictureStackedBarChartData(PictureChartData):
|
|
209
|
+
"""Represents data of a stacked bar chart.
|
|
210
|
+
|
|
211
|
+
Attributes:
|
|
212
|
+
kind (Literal["stacked_bar_chart_data"]): The type of the chart.
|
|
213
|
+
x_axis_label (str): The label for the x-axis.
|
|
214
|
+
y_axis_label (str): The label for the y-axis.
|
|
215
|
+
stacked_bars (List[ChartStackedBar]): A list of stacked bars in the chart.
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
kind: Literal["stacked_bar_chart_data"] = "stacked_bar_chart_data"
|
|
219
|
+
x_axis_label: str
|
|
220
|
+
y_axis_label: str
|
|
221
|
+
stacked_bars: List[ChartStackedBar]
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class PicturePieChartData(PictureChartData):
|
|
225
|
+
"""Represents data of a pie chart.
|
|
226
|
+
|
|
227
|
+
Attributes:
|
|
228
|
+
kind (Literal["pie_chart_data"]): The type of the chart.
|
|
229
|
+
slices (List[ChartSlice]): A list of slices in the pie chart.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
kind: Literal["pie_chart_data"] = "pie_chart_data"
|
|
233
|
+
slices: List[ChartSlice]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class PictureScatterChartData(PictureChartData):
|
|
237
|
+
"""Represents data of a scatter chart.
|
|
238
|
+
|
|
239
|
+
Attributes:
|
|
240
|
+
kind (Literal["scatter_chart_data"]): The type of the chart.
|
|
241
|
+
x_axis_label (str): The label for the x-axis.
|
|
242
|
+
y_axis_label (str): The label for the y-axis.
|
|
243
|
+
points (List[ChartPoint]): A list of points in the scatter chart.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
kind: Literal["scatter_chart_data"] = "scatter_chart_data"
|
|
247
|
+
x_axis_label: str
|
|
248
|
+
y_axis_label: str
|
|
249
|
+
points: List[ChartPoint]
|
|
250
|
+
|
|
251
|
+
|
|
102
252
|
PictureDataType = Annotated[
|
|
103
253
|
Union[
|
|
104
254
|
PictureClassificationData,
|
|
105
255
|
PictureDescriptionData,
|
|
106
256
|
PictureMoleculeData,
|
|
107
257
|
PictureMiscData,
|
|
258
|
+
PictureLineChartData,
|
|
259
|
+
PictureBarChartData,
|
|
260
|
+
PictureStackedBarChartData,
|
|
261
|
+
PicturePieChartData,
|
|
262
|
+
PictureScatterChartData,
|
|
108
263
|
],
|
|
109
264
|
Field(discriminator="kind"),
|
|
110
265
|
]
|
|
@@ -1125,6 +1280,7 @@ class DoclingDocument(BaseModel):
|
|
|
1125
1280
|
image_placeholder: str = "<!-- image -->",
|
|
1126
1281
|
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
|
|
1127
1282
|
indent: int = 4,
|
|
1283
|
+
text_width: int = -1,
|
|
1128
1284
|
) -> str:
|
|
1129
1285
|
r"""Serialize to Markdown.
|
|
1130
1286
|
|
|
@@ -1207,8 +1363,8 @@ class DoclingDocument(BaseModel):
|
|
|
1207
1363
|
elif isinstance(item, TextItem) and item.label in [DocItemLabel.TITLE]:
|
|
1208
1364
|
in_list = False
|
|
1209
1365
|
marker = "" if strict_text else "#"
|
|
1210
|
-
text = f"{marker} {item.text}
|
|
1211
|
-
mdtexts.append(text.strip())
|
|
1366
|
+
text = f"{marker} {item.text}"
|
|
1367
|
+
mdtexts.append(text.strip() + "\n")
|
|
1212
1368
|
|
|
1213
1369
|
elif (
|
|
1214
1370
|
isinstance(item, TextItem)
|
|
@@ -1251,7 +1407,10 @@ class DoclingDocument(BaseModel):
|
|
|
1251
1407
|
|
|
1252
1408
|
elif isinstance(item, TextItem) and item.label in labels:
|
|
1253
1409
|
in_list = False
|
|
1254
|
-
if len(item.text):
|
|
1410
|
+
if len(item.text) and text_width > 0:
|
|
1411
|
+
wrapped_text = textwrap.fill(text, width=text_width)
|
|
1412
|
+
mdtexts.append(wrapped_text + "\n")
|
|
1413
|
+
elif len(item.text):
|
|
1255
1414
|
text = f"{item.text}\n"
|
|
1256
1415
|
mdtexts.append(text)
|
|
1257
1416
|
|
docling_core/types/doc/labels.py
CHANGED
|
@@ -29,6 +29,10 @@ class DocItemLabel(str, Enum):
|
|
|
29
29
|
PARAGRAPH = "paragraph" # explicitly a paragraph and not arbitrary text
|
|
30
30
|
REFERENCE = "reference"
|
|
31
31
|
|
|
32
|
+
def __str__(self):
|
|
33
|
+
"""Get string value."""
|
|
34
|
+
return str(self.value)
|
|
35
|
+
|
|
32
36
|
|
|
33
37
|
class GroupLabel(str, Enum):
|
|
34
38
|
"""GroupLabel."""
|
|
@@ -43,6 +47,10 @@ class GroupLabel(str, Enum):
|
|
|
43
47
|
SHEET = "sheet"
|
|
44
48
|
SLIDE = "slide"
|
|
45
49
|
|
|
50
|
+
def __str__(self):
|
|
51
|
+
"""Get string value."""
|
|
52
|
+
return str(self.value)
|
|
53
|
+
|
|
46
54
|
|
|
47
55
|
class TableCellLabel(str, Enum):
|
|
48
56
|
"""TableCellLabel."""
|
|
@@ -51,3 +59,7 @@ class TableCellLabel(str, Enum):
|
|
|
51
59
|
ROW_HEADER = "row_header"
|
|
52
60
|
ROW_SECTION = "row_section"
|
|
53
61
|
BODY = "body"
|
|
62
|
+
|
|
63
|
+
def __str__(self):
|
|
64
|
+
"""Get string value."""
|
|
65
|
+
return str(self.value)
|
|
@@ -21,8 +21,8 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
21
21
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
22
22
|
docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
|
|
23
23
|
docling_core/types/doc/base.py,sha256=zvx631U_yQCcJam83hNdDanXEYnO3eN-CCw9vDr6S-I,4442
|
|
24
|
-
docling_core/types/doc/document.py,sha256=
|
|
25
|
-
docling_core/types/doc/labels.py,sha256=
|
|
24
|
+
docling_core/types/doc/document.py,sha256=XF43-v9oflV-E5r2k2quoKvq8qBp5mAB_VunshY9b10,56356
|
|
25
|
+
docling_core/types/doc/labels.py,sha256=A8vWP82VAeXO1rlCO0oDKo_Hb8uDeQe0myOTY3P03hk,1596
|
|
26
26
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
27
27
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
28
28
|
docling_core/types/legacy_doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
|
|
@@ -49,8 +49,8 @@ docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6
|
|
|
49
49
|
docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
|
|
50
50
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
51
51
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
52
|
-
docling_core-2.
|
|
53
|
-
docling_core-2.
|
|
54
|
-
docling_core-2.
|
|
55
|
-
docling_core-2.
|
|
56
|
-
docling_core-2.
|
|
52
|
+
docling_core-2.3.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
53
|
+
docling_core-2.3.0.dist-info/METADATA,sha256=1cthKA2Ke9tujdMQzh4sNnBB1K90yFNQ9TCvD2MhKDI,5432
|
|
54
|
+
docling_core-2.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
55
|
+
docling_core-2.3.0.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
|
|
56
|
+
docling_core-2.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|