docling-core 2.2.3__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -183,14 +183,15 @@ class HierarchicalChunker(BaseChunker):
183
183
  )
184
184
  list_items = [] # reset
185
185
 
186
- if isinstance(
187
- item, SectionHeaderItem
188
- ) or ( # TODO remove when all captured as SectionHeaderItem:
186
+ if isinstance(item, SectionHeaderItem) or (
189
187
  isinstance(item, TextItem)
190
- and item.label == DocItemLabel.SECTION_HEADER
188
+ and item.label in [DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE]
191
189
  ):
192
- # TODO second branch not needed once cleanup above complete:
193
- level = item.level if isinstance(item, SectionHeaderItem) else 1
190
+ level = (
191
+ item.level
192
+ if isinstance(item, SectionHeaderItem)
193
+ else (0 if item.label == DocItemLabel.TITLE else 1)
194
+ )
194
195
  heading_by_level[level] = item.text
195
196
 
196
197
  # remove headings of higher level as they just went out of scope
@@ -100,12 +100,166 @@ class PictureMiscData(BaseModel):
100
100
  content: Dict[str, Any]
101
101
 
102
102
 
103
+ class ChartLine(BaseModel):
104
+ """Represents a line in a line chart.
105
+
106
+ Attributes:
107
+ label (str): The label for the line.
108
+ values (List[Tuple[float, float]]): A list of (x, y) coordinate pairs
109
+ representing the line's data points.
110
+ """
111
+
112
+ label: str
113
+ values: List[Tuple[float, float]]
114
+
115
+
116
+ class ChartBar(BaseModel):
117
+ """Represents a bar in a bar chart.
118
+
119
+ Attributes:
120
+ label (str): The label for the bar.
121
+ values (float): The value associated with the bar.
122
+ """
123
+
124
+ label: str
125
+ values: float
126
+
127
+
128
+ class ChartStackedBar(BaseModel):
129
+ """Represents a stacked bar in a stacked bar chart.
130
+
131
+ Attributes:
132
+ label (List[str]): The labels for the stacked bars. Multiple values are stored
133
+ in cases where the chart is "double stacked," meaning bars are stacked both
134
+ horizontally and vertically.
135
+ values (List[Tuple[str, int]]): A list of values representing different segments
136
+ of the stacked bar along with their label.
137
+ """
138
+
139
+ label: List[str]
140
+ values: List[Tuple[str, int]]
141
+
142
+
143
+ class ChartSlice(BaseModel):
144
+ """Represents a slice in a pie chart.
145
+
146
+ Attributes:
147
+ label (str): The label for the slice.
148
+ value (float): The value represented by the slice.
149
+ """
150
+
151
+ label: str
152
+ value: float
153
+
154
+
155
+ class ChartPoint(BaseModel):
156
+ """Represents a point in a scatter chart.
157
+
158
+ Attributes:
159
+ value (Tuple[float, float]): A (x, y) coordinate pair representing a point in a
160
+ chart.
161
+ """
162
+
163
+ value: Tuple[float, float]
164
+
165
+
166
+ class PictureChartData(BaseModel):
167
+ """Base class for picture chart data.
168
+
169
+ Attributes:
170
+ title (str): The title of the chart.
171
+ """
172
+
173
+ title: str
174
+
175
+
176
+ class PictureLineChartData(PictureChartData):
177
+ """Represents data of a line chart.
178
+
179
+ Attributes:
180
+ kind (Literal["line_chart_data"]): The type of the chart.
181
+ x_axis_label (str): The label for the x-axis.
182
+ y_axis_label (str): The label for the y-axis.
183
+ lines (List[ChartLine]): A list of lines in the chart.
184
+ """
185
+
186
+ kind: Literal["line_chart_data"] = "line_chart_data"
187
+ x_axis_label: str
188
+ y_axis_label: str
189
+ lines: List[ChartLine]
190
+
191
+
192
+ class PictureBarChartData(PictureChartData):
193
+ """Represents data of a bar chart.
194
+
195
+ Attributes:
196
+ kind (Literal["bar_chart_data"]): The type of the chart.
197
+ x_axis_label (str): The label for the x-axis.
198
+ y_axis_label (str): The label for the y-axis.
199
+ bars (List[ChartBar]): A list of bars in the chart.
200
+ """
201
+
202
+ kind: Literal["bar_chart_data"] = "bar_chart_data"
203
+ x_axis_label: str
204
+ y_axis_label: str
205
+ bars: List[ChartBar]
206
+
207
+
208
+ class PictureStackedBarChartData(PictureChartData):
209
+ """Represents data of a stacked bar chart.
210
+
211
+ Attributes:
212
+ kind (Literal["stacked_bar_chart_data"]): The type of the chart.
213
+ x_axis_label (str): The label for the x-axis.
214
+ y_axis_label (str): The label for the y-axis.
215
+ stacked_bars (List[ChartStackedBar]): A list of stacked bars in the chart.
216
+ """
217
+
218
+ kind: Literal["stacked_bar_chart_data"] = "stacked_bar_chart_data"
219
+ x_axis_label: str
220
+ y_axis_label: str
221
+ stacked_bars: List[ChartStackedBar]
222
+
223
+
224
+ class PicturePieChartData(PictureChartData):
225
+ """Represents data of a pie chart.
226
+
227
+ Attributes:
228
+ kind (Literal["pie_chart_data"]): The type of the chart.
229
+ slices (List[ChartSlice]): A list of slices in the pie chart.
230
+ """
231
+
232
+ kind: Literal["pie_chart_data"] = "pie_chart_data"
233
+ slices: List[ChartSlice]
234
+
235
+
236
+ class PictureScatterChartData(PictureChartData):
237
+ """Represents data of a scatter chart.
238
+
239
+ Attributes:
240
+ kind (Literal["scatter_chart_data"]): The type of the chart.
241
+ x_axis_label (str): The label for the x-axis.
242
+ y_axis_label (str): The label for the y-axis.
243
+ points (List[ChartPoint]): A list of points in the scatter chart.
244
+ """
245
+
246
+ kind: Literal["scatter_chart_data"] = "scatter_chart_data"
247
+ x_axis_label: str
248
+ y_axis_label: str
249
+ points: List[ChartPoint]
250
+
251
+
103
252
  PictureDataType = Annotated[
104
253
  Union[
105
254
  PictureClassificationData,
106
255
  PictureDescriptionData,
107
256
  PictureMoleculeData,
108
257
  PictureMiscData,
258
+ PictureLineChartData,
259
+ PictureBarChartData,
260
+ PictureStackedBarChartData,
261
+ PicturePieChartData,
262
+ PictureScatterChartData,
109
263
  ],
110
264
  Field(discriminator="kind"),
111
265
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.2.3
3
+ Version: 2.3.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -16,12 +16,12 @@ docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75
16
16
  docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
17
17
  docling_core/transforms/chunker/__init__.py,sha256=sSSTnt7ZCt8Og1e0jhApNTtA0pyyHyzwcl8yXFLb2J8,292
18
18
  docling_core/transforms/chunker/base.py,sha256=iPouZOJ3cYWvai4P0Gpd3QmsTKQuY5fFUXzTMk_XNmE,1571
19
- docling_core/transforms/chunker/hierarchical_chunker.py,sha256=uG7nNoUCFqWeQAKydQg731JYJ9sayUe4J48nMF0VHE8,8097
19
+ docling_core/transforms/chunker/hierarchical_chunker.py,sha256=V4FiOYqL0GgBqVB7x6CafAJs3WF5oYifKIiexVggGPE,8086
20
20
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
21
21
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
22
22
  docling_core/types/doc/__init__.py,sha256=bEL4zKVOG7Wxm6xQrgF58mu-Teds9aSavuEAKVNhrTU,639
23
23
  docling_core/types/doc/base.py,sha256=zvx631U_yQCcJam83hNdDanXEYnO3eN-CCw9vDr6S-I,4442
24
- docling_core/types/doc/document.py,sha256=B56FA5lGAEodjfIUncXSstQclAmyt3GOybMiKEEIc7s,52138
24
+ docling_core/types/doc/document.py,sha256=XF43-v9oflV-E5r2k2quoKvq8qBp5mAB_VunshY9b10,56356
25
25
  docling_core/types/doc/labels.py,sha256=A8vWP82VAeXO1rlCO0oDKo_Hb8uDeQe0myOTY3P03hk,1596
26
26
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
27
27
  docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
@@ -49,8 +49,8 @@ docling_core/utils/generate_docs.py,sha256=BdKAoduWXOc7YMvcmlhjoJOFlUxij1ybxglj6
49
49
  docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2tyi_OhHepHYtZg,1654
50
50
  docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
51
51
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
52
- docling_core-2.2.3.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
53
- docling_core-2.2.3.dist-info/METADATA,sha256=DlV-TrYKPq-qbI9d0iS4mrOJs_CwV9QZNflqGEy0crE,5432
54
- docling_core-2.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
- docling_core-2.2.3.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
56
- docling_core-2.2.3.dist-info/RECORD,,
52
+ docling_core-2.3.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
53
+ docling_core-2.3.1.dist-info/METADATA,sha256=mASC44D6AB2bIACFr2oGrsZHtHRzn5e1wjBJyy6ccns,5432
54
+ docling_core-2.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
+ docling_core-2.3.1.dist-info/entry_points.txt,sha256=jIxlWv3tnO04irlZc0zfhqJIgz1bg9Hha4AkaLWSdUA,177
56
+ docling_core-2.3.1.dist-info/RECORD,,