docling-core 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -4,6 +4,7 @@ import base64
4
4
  import mimetypes
5
5
  import re
6
6
  import sys
7
+ import textwrap
7
8
  import typing
8
9
  from io import BytesIO
9
10
  from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union
@@ -99,12 +100,166 @@ class PictureMiscData(BaseModel):
99
100
  content: Dict[str, Any]
100
101
 
101
102
 
103
+ class ChartLine(BaseModel):
104
+ """Represents a line in a line chart.
105
+
106
+ Attributes:
107
+ label (str): The label for the line.
108
+ values (List[Tuple[float, float]]): A list of (x, y) coordinate pairs
109
+ representing the line's data points.
110
+ """
111
+
112
+ label: str
113
+ values: List[Tuple[float, float]]
114
+
115
+
116
+ class ChartBar(BaseModel):
117
+ """Represents a bar in a bar chart.
118
+
119
+ Attributes:
120
+ label (str): The label for the bar.
121
+ values (float): The value associated with the bar.
122
+ """
123
+
124
+ label: str
125
+ values: float
126
+
127
+
128
+ class ChartStackedBar(BaseModel):
129
+ """Represents a stacked bar in a stacked bar chart.
130
+
131
+ Attributes:
132
+ label (List[str]): The labels for the stacked bars. Multiple values are stored
133
+ in cases where the chart is "double stacked," meaning bars are stacked both
134
+ horizontally and vertically.
135
+ values (List[Tuple[str, int]]): A list of values representing different segments
136
+ of the stacked bar along with their label.
137
+ """
138
+
139
+ label: List[str]
140
+ values: List[Tuple[str, int]]
141
+
142
+
143
+ class ChartSlice(BaseModel):
144
+ """Represents a slice in a pie chart.
145
+
146
+ Attributes:
147
+ label (str): The label for the slice.
148
+ value (float): The value represented by the slice.
149
+ """
150
+
151
+ label: str
152
+ value: float
153
+
154
+
155
+ class ChartPoint(BaseModel):
156
+ """Represents a point in a scatter chart.
157
+
158
+ Attributes:
159
+ value (Tuple[float, float]): A (x, y) coordinate pair representing a point in a
160
+ chart.
161
+ """
162
+
163
+ value: Tuple[float, float]
164
+
165
+
166
+ class PictureChartData(BaseModel):
167
+ """Base class for picture chart data.
168
+
169
+ Attributes:
170
+ title (str): The title of the chart.
171
+ """
172
+
173
+ title: str
174
+
175
+
176
+ class PictureLineChartData(PictureChartData):
177
+ """Represents data of a line chart.
178
+
179
+ Attributes:
180
+ kind (Literal["line_chart_data"]): The type of the chart.
181
+ x_axis_label (str): The label for the x-axis.
182
+ y_axis_label (str): The label for the y-axis.
183
+ lines (List[ChartLine]): A list of lines in the chart.
184
+ """
185
+
186
+ kind: Literal["line_chart_data"] = "line_chart_data"
187
+ x_axis_label: str
188
+ y_axis_label: str
189
+ lines: List[ChartLine]
190
+
191
+
192
+ class PictureBarChartData(PictureChartData):
193
+ """Represents data of a bar chart.
194
+
195
+ Attributes:
196
+ kind (Literal["bar_chart_data"]): The type of the chart.
197
+ x_axis_label (str): The label for the x-axis.
198
+ y_axis_label (str): The label for the y-axis.
199
+ bars (List[ChartBar]): A list of bars in the chart.
200
+ """
201
+
202
+ kind: Literal["bar_chart_data"] = "bar_chart_data"
203
+ x_axis_label: str
204
+ y_axis_label: str
205
+ bars: List[ChartBar]
206
+
207
+
208
+ class PictureStackedBarChartData(PictureChartData):
209
+ """Represents data of a stacked bar chart.
210
+
211
+ Attributes:
212
+ kind (Literal["stacked_bar_chart_data"]): The type of the chart.
213
+ x_axis_label (str): The label for the x-axis.
214
+ y_axis_label (str): The label for the y-axis.
215
+ stacked_bars (List[ChartStackedBar]): A list of stacked bars in the chart.
216
+ """
217
+
218
+ kind: Literal["stacked_bar_chart_data"] = "stacked_bar_chart_data"
219
+ x_axis_label: str
220
+ y_axis_label: str
221
+ stacked_bars: List[ChartStackedBar]
222
+
223
+
224
+ class PicturePieChartData(PictureChartData):
225
+ """Represents data of a pie chart.
226
+
227
+ Attributes:
228
+ kind (Literal["pie_chart_data"]): The type of the chart.
229
+ slices (List[ChartSlice]): A list of slices in the pie chart.
230
+ """
231
+
232
+ kind: Literal["pie_chart_data"] = "pie_chart_data"
233
+ slices: List[ChartSlice]
234
+
235
+
236
+ class PictureScatterChartData(PictureChartData):
237
+ """Represents data of a scatter chart.
238
+
239
+ Attributes:
240
+ kind (Literal["scatter_chart_data"]): The type of the chart.
241
+ x_axis_label (str): The label for the x-axis.
242
+ y_axis_label (str): The label for the y-axis.
243
+ points (List[ChartPoint]): A list of points in the scatter chart.
244
+ """
245
+
246
+ kind: Literal["scatter_chart_data"] = "scatter_chart_data"
247
+ x_axis_label: str
248
+ y_axis_label: str
249
+ points: List[ChartPoint]
250
+
251
+
102
252
  PictureDataType = Annotated[
103
253
  Union[
104
254
  PictureClassificationData,
105
255
  PictureDescriptionData,
106
256
  PictureMoleculeData,
107
257
  PictureMiscData,
258
+ PictureLineChartData,
259
+ PictureBarChartData,
260
+ PictureStackedBarChartData,
261
+ PicturePieChartData,
262
+ PictureScatterChartData,
108
263
  ],
109
264
  Field(discriminator="kind"),
110
265
  ]
@@ -1125,6 +1280,7 @@ class DoclingDocument(BaseModel):
1125
1280
  image_placeholder: str = "<!-- image -->",
1126
1281
  image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
1127
1282
  indent: int = 4,
1283
+ text_width: int = -1,
1128
1284
  ) -> str:
1129
1285
  r"""Serialize to Markdown.
1130
1286
 
@@ -1207,8 +1363,8 @@ class DoclingDocument(BaseModel):
1207
1363
  elif isinstance(item, TextItem) and item.label in [DocItemLabel.TITLE]:
1208
1364
  in_list = False
1209
1365
  marker = "" if strict_text else "#"
1210
- text = f"{marker} {item.text}\n"
1211
- mdtexts.append(text.strip())
1366
+ text = f"{marker} {item.text}"
1367
+ mdtexts.append(text.strip() + "\n")
1212
1368
 
1213
1369
  elif (
1214
1370
  isinstance(item, TextItem)
@@ -1251,7 +1407,10 @@ class DoclingDocument(BaseModel):
1251
1407
 
1252
1408
  elif isinstance(item, TextItem) and item.label in labels:
1253
1409
  in_list = False
1254
- if len(item.text):
1410
+ if len(item.text) and text_width > 0:
1411
+ wrapped_text = textwrap.fill(text, width=text_width)
1412
+ mdtexts.append(wrapped_text + "\n")
1413
+ elif len(item.text):
1255
1414
  text = f"{item.text}\n"
1256
1415
  mdtexts.append(text)
1257
1416
 
@@ -29,6 +29,10 @@ class DocItemLabel(str, Enum):
29
29
  PARAGRAPH = "paragraph" # explicitly a paragraph and not arbitrary text
30
30
  REFERENCE = "reference"
31
31
 
32
+ def __str__(self):
33
+ """Get string value."""
34
+ return str(self.value)
35
+
32
36
 
33
37
  class GroupLabel(str, Enum):
34
38
  """GroupLabel."""
@@ -43,6 +47,10 @@ class GroupLabel(str, Enum):
43
47
  SHEET = "sheet"
44
48
  SLIDE = "slide"
45
49
 
50
+ def __str__(self):
51
+ """Get string value."""
52
+ return str(self.value)
53
+
46
54
 
47
55
  class TableCellLabel(str, Enum):
48
56
  """TableCellLabel."""
@@ -51,3 +59,7 @@ class TableCellLabel(str, Enum):
51
59
  ROW_HEADER = "row_header"
52
60
  ROW_SECTION = "row_section"
53
61
  BODY = "body"
62
+
63
+ def __str__(self):
64
+ """Get string value."""
65
+ return str(self.value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.2.2
3
+ Version: 2.3.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -21,8 +21,8 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
21
21
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
22
22
  docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
23
23
  docling_core/types/doc/base.py,sha256=zvx631U_yQCcJam83hNdDanXEYnO3eN-CCw9vDr6S-I,4442
24
- docling_core/types/doc/document.py,sha256=SrOXpO6iCIYFkhWW-pksd4C4PeJ2jubKI5m34K_lTac,51902
25
- docling_core/types/doc/labels.py,sha256=mzmSd072A-qW3IThswHxwIHV8IoyTCbHHlNOrisinRA,1335
24
+ docling_core/types/doc/document.py,sha256=XF43-v9oflV-E5r2k2quoKvq8qBp5mAB_VunshY9b10,56356
25
+ docling_core/types/doc/labels.py,sha256=A8vWP82VAeXO1rlCO0oDKo_Hb8uDeQe0myOTY3P03hk,1596
26
26
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
27
27
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
28
28
  docling_core/types/legacy_doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
@@ -49,8 +49,8 @@ docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6
49
49
  docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
50
50
  docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
51
51
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
52
- docling_core-2.2.2.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
53
- docling_core-2.2.2.dist-info/METADATA,sha256=680OW3ffU0R_QL5UdvoUASJBOyLj7YKlfeDcftd3Kkw,5432
54
- docling_core-2.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
- docling_core-2.2.2.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
56
- docling_core-2.2.2.dist-info/RECORD,,
52
+ docling_core-2.3.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
53
+ docling_core-2.3.0.dist-info/METADATA,sha256=1cthKA2Ke9tujdMQzh4sNnBB1K90yFNQ9TCvD2MhKDI,5432
54
+ docling_core-2.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
+ docling_core-2.3.0.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
56
+ docling_core-2.3.0.dist-info/RECORD,,