docling-core 2.2.3__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (55) hide show
  1. {docling_core-2.2.3 → docling_core-2.3.1}/PKG-INFO +1 -1
  2. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/transforms/chunker/hierarchical_chunker.py +7 -6
  3. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/doc/document.py +154 -0
  4. {docling_core-2.2.3 → docling_core-2.3.1}/pyproject.toml +1 -1
  5. {docling_core-2.2.3 → docling_core-2.3.1}/LICENSE +0 -0
  6. {docling_core-2.2.3 → docling_core-2.3.1}/README.md +0 -0
  7. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/__init__.py +0 -0
  8. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/py.typed +0 -0
  9. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
  10. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
  11. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  12. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
  13. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  14. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  15. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  16. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  17. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/search/__init__.py +0 -0
  18. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  19. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/search/mapping.py +0 -0
  20. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/search/meta.py +0 -0
  21. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/search/package.py +0 -0
  22. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/transforms/__init__.py +0 -0
  23. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/transforms/chunker/__init__.py +0 -0
  24. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/transforms/chunker/base.py +0 -0
  25. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/__init__.py +0 -0
  26. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/base.py +0 -0
  27. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/doc/__init__.py +0 -0
  28. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/doc/base.py +0 -0
  29. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/doc/labels.py +0 -0
  30. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/gen/__init__.py +0 -0
  31. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/gen/generic.py +0 -0
  32. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/__init__.py +0 -0
  33. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/base.py +0 -0
  34. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  35. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  36. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  37. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/document.py +0 -0
  38. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/legacy_doc/tokens.py +0 -0
  39. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/nlp/__init__.py +0 -0
  40. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/nlp/qa.py +0 -0
  41. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/nlp/qa_labels.py +0 -0
  42. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/__init__.py +0 -0
  43. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/attribute.py +0 -0
  44. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/base.py +0 -0
  45. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/predicate.py +0 -0
  46. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/record.py +0 -0
  47. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/statement.py +0 -0
  48. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/types/rec/subject.py +0 -0
  49. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/__init__.py +0 -0
  50. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/alias.py +0 -0
  51. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/file.py +0 -0
  52. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/generate_docs.py +0 -0
  53. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/generate_jsonschema.py +0 -0
  54. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/validate.py +0 -0
  55. {docling_core-2.2.3 → docling_core-2.3.1}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.2.3
3
+ Version: 2.3.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -183,14 +183,15 @@ class HierarchicalChunker(BaseChunker):
183
183
  )
184
184
  list_items = [] # reset
185
185
 
186
- if isinstance(
187
- item, SectionHeaderItem
188
- ) or ( # TODO remove when all captured as SectionHeaderItem:
186
+ if isinstance(item, SectionHeaderItem) or (
189
187
  isinstance(item, TextItem)
190
- and item.label == DocItemLabel.SECTION_HEADER
188
+ and item.label in [DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE]
191
189
  ):
192
- # TODO second branch not needed once cleanup above complete:
193
- level = item.level if isinstance(item, SectionHeaderItem) else 1
190
+ level = (
191
+ item.level
192
+ if isinstance(item, SectionHeaderItem)
193
+ else (0 if item.label == DocItemLabel.TITLE else 1)
194
+ )
194
195
  heading_by_level[level] = item.text
195
196
 
196
197
  # remove headings of higher level as they just went out of scope
@@ -100,12 +100,166 @@ class PictureMiscData(BaseModel):
100
100
  content: Dict[str, Any]
101
101
 
102
102
 
103
+ class ChartLine(BaseModel):
104
+ """Represents a line in a line chart.
105
+
106
+ Attributes:
107
+ label (str): The label for the line.
108
+ values (List[Tuple[float, float]]): A list of (x, y) coordinate pairs
109
+ representing the line's data points.
110
+ """
111
+
112
+ label: str
113
+ values: List[Tuple[float, float]]
114
+
115
+
116
+ class ChartBar(BaseModel):
117
+ """Represents a bar in a bar chart.
118
+
119
+ Attributes:
120
+ label (str): The label for the bar.
121
+ values (float): The value associated with the bar.
122
+ """
123
+
124
+ label: str
125
+ values: float
126
+
127
+
128
+ class ChartStackedBar(BaseModel):
129
+ """Represents a stacked bar in a stacked bar chart.
130
+
131
+ Attributes:
132
+ label (List[str]): The labels for the stacked bars. Multiple values are stored
133
+ in cases where the chart is "double stacked," meaning bars are stacked both
134
+ horizontally and vertically.
135
+ values (List[Tuple[str, int]]): A list of values representing different segments
136
+ of the stacked bar along with their label.
137
+ """
138
+
139
+ label: List[str]
140
+ values: List[Tuple[str, int]]
141
+
142
+
143
+ class ChartSlice(BaseModel):
144
+ """Represents a slice in a pie chart.
145
+
146
+ Attributes:
147
+ label (str): The label for the slice.
148
+ value (float): The value represented by the slice.
149
+ """
150
+
151
+ label: str
152
+ value: float
153
+
154
+
155
+ class ChartPoint(BaseModel):
156
+ """Represents a point in a scatter chart.
157
+
158
+ Attributes:
159
+ value (Tuple[float, float]): A (x, y) coordinate pair representing a point in a
160
+ chart.
161
+ """
162
+
163
+ value: Tuple[float, float]
164
+
165
+
166
+ class PictureChartData(BaseModel):
167
+ """Base class for picture chart data.
168
+
169
+ Attributes:
170
+ title (str): The title of the chart.
171
+ """
172
+
173
+ title: str
174
+
175
+
176
+ class PictureLineChartData(PictureChartData):
177
+ """Represents data of a line chart.
178
+
179
+ Attributes:
180
+ kind (Literal["line_chart_data"]): The type of the chart.
181
+ x_axis_label (str): The label for the x-axis.
182
+ y_axis_label (str): The label for the y-axis.
183
+ lines (List[ChartLine]): A list of lines in the chart.
184
+ """
185
+
186
+ kind: Literal["line_chart_data"] = "line_chart_data"
187
+ x_axis_label: str
188
+ y_axis_label: str
189
+ lines: List[ChartLine]
190
+
191
+
192
+ class PictureBarChartData(PictureChartData):
193
+ """Represents data of a bar chart.
194
+
195
+ Attributes:
196
+ kind (Literal["bar_chart_data"]): The type of the chart.
197
+ x_axis_label (str): The label for the x-axis.
198
+ y_axis_label (str): The label for the y-axis.
199
+ bars (List[ChartBar]): A list of bars in the chart.
200
+ """
201
+
202
+ kind: Literal["bar_chart_data"] = "bar_chart_data"
203
+ x_axis_label: str
204
+ y_axis_label: str
205
+ bars: List[ChartBar]
206
+
207
+
208
+ class PictureStackedBarChartData(PictureChartData):
209
+ """Represents data of a stacked bar chart.
210
+
211
+ Attributes:
212
+ kind (Literal["stacked_bar_chart_data"]): The type of the chart.
213
+ x_axis_label (str): The label for the x-axis.
214
+ y_axis_label (str): The label for the y-axis.
215
+ stacked_bars (List[ChartStackedBar]): A list of stacked bars in the chart.
216
+ """
217
+
218
+ kind: Literal["stacked_bar_chart_data"] = "stacked_bar_chart_data"
219
+ x_axis_label: str
220
+ y_axis_label: str
221
+ stacked_bars: List[ChartStackedBar]
222
+
223
+
224
+ class PicturePieChartData(PictureChartData):
225
+ """Represents data of a pie chart.
226
+
227
+ Attributes:
228
+ kind (Literal["pie_chart_data"]): The type of the chart.
229
+ slices (List[ChartSlice]): A list of slices in the pie chart.
230
+ """
231
+
232
+ kind: Literal["pie_chart_data"] = "pie_chart_data"
233
+ slices: List[ChartSlice]
234
+
235
+
236
+ class PictureScatterChartData(PictureChartData):
237
+ """Represents data of a scatter chart.
238
+
239
+ Attributes:
240
+ kind (Literal["scatter_chart_data"]): The type of the chart.
241
+ x_axis_label (str): The label for the x-axis.
242
+ y_axis_label (str): The label for the y-axis.
243
+ points (List[ChartPoint]): A list of points in the scatter chart.
244
+ """
245
+
246
+ kind: Literal["scatter_chart_data"] = "scatter_chart_data"
247
+ x_axis_label: str
248
+ y_axis_label: str
249
+ points: List[ChartPoint]
250
+
251
+
103
252
  PictureDataType = Annotated[
104
253
  Union[
105
254
  PictureClassificationData,
106
255
  PictureDescriptionData,
107
256
  PictureMoleculeData,
108
257
  PictureMiscData,
258
+ PictureLineChartData,
259
+ PictureBarChartData,
260
+ PictureStackedBarChartData,
261
+ PicturePieChartData,
262
+ PictureScatterChartData,
109
263
  ],
110
264
  Field(discriminator="kind"),
111
265
  ]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.2.3"
3
+ version = "2.3.1"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes