docling-core 2.26.4__py3-none-any.whl → 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -300,7 +300,7 @@ class TableCell(BaseModel):
300
300
  @classmethod
301
301
  def from_dict_format(cls, data: Any) -> Any:
302
302
  """from_dict_format."""
303
- if isinstance(data, Dict):
303
+ if isinstance(data, dict):
304
304
  # Check if this is a native BoundingBox or a bbox from docling-ibm-models
305
305
  if (
306
306
  # "bbox" not in data
@@ -2888,7 +2888,7 @@ class DoclingDocument(BaseModel):
2888
2888
  mode: str = "json",
2889
2889
  by_alias: bool = True,
2890
2890
  exclude_none: bool = True,
2891
- ) -> Dict:
2891
+ ) -> Dict[str, Any]:
2892
2892
  """Export to dict."""
2893
2893
  out = self.model_dump(mode=mode, by_alias=by_alias, exclude_none=exclude_none)
2894
2894
 
@@ -3143,6 +3143,7 @@ class DoclingDocument(BaseModel):
3143
3143
  from_element: int = 0,
3144
3144
  to_element: int = sys.maxsize,
3145
3145
  labels: Optional[set[DocItemLabel]] = None,
3146
+ enable_chart_tables: bool = True,
3146
3147
  image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
3147
3148
  formula_to_mathml: bool = True,
3148
3149
  page_no: Optional[int] = None,
@@ -3176,6 +3177,7 @@ class DoclingDocument(BaseModel):
3176
3177
  start_idx=from_element,
3177
3178
  stop_idx=to_element,
3178
3179
  image_mode=image_mode,
3180
+ enable_chart_tables=enable_chart_tables,
3179
3181
  formula_to_mathml=formula_to_mathml,
3180
3182
  html_head=html_head,
3181
3183
  html_lang=html_lang,
@@ -4042,6 +4044,28 @@ class DoclingDocument(BaseModel):
4042
4044
  self.pages[page_no] = pitem
4043
4045
  return pitem
4044
4046
 
4047
+ def get_visualization(
4048
+ self, show_label: bool = True
4049
+ ) -> dict[Optional[int], PILImage.Image]:
4050
+ """Get visualization of the document as images by page."""
4051
+ from docling_core.transforms.visualizer.layout_visualizer import (
4052
+ LayoutVisualizer,
4053
+ )
4054
+ from docling_core.transforms.visualizer.reading_order_visualizer import (
4055
+ ReadingOrderVisualizer,
4056
+ )
4057
+
4058
+ visualizer = ReadingOrderVisualizer(
4059
+ base_visualizer=LayoutVisualizer(
4060
+ params=LayoutVisualizer.Params(
4061
+ show_label=show_label,
4062
+ ),
4063
+ ),
4064
+ )
4065
+ images = visualizer.get_visualization(doc=self)
4066
+
4067
+ return images
4068
+
4045
4069
  @field_validator("version")
4046
4070
  @classmethod
4047
4071
  def check_version_is_compatible(cls, v: str) -> str:
@@ -45,6 +45,7 @@ class DocItemLabel(str, Enum):
45
45
  DocItemLabel.PAGE_FOOTER: (204, 255, 204),
46
46
  DocItemLabel.PAGE_HEADER: (204, 255, 204),
47
47
  DocItemLabel.PICTURE: (255, 204, 164),
48
+ DocItemLabel.CHART: (255, 204, 164),
48
49
  DocItemLabel.SECTION_HEADER: (255, 153, 153),
49
50
  DocItemLabel.TABLE: (255, 204, 204),
50
51
  DocItemLabel.TEXT: (255, 255, 153),
@@ -58,7 +59,7 @@ class DocItemLabel(str, Enum):
58
59
  DocItemLabel.PARAGRAPH: (255, 255, 153),
59
60
  DocItemLabel.REFERENCE: (176, 224, 230),
60
61
  }
61
- return color_map[label]
62
+ return color_map.get(label, (0, 0, 0))
62
63
 
63
64
 
64
65
  class GroupLabel(str, Enum):
@@ -10,6 +10,7 @@ from enum import Enum
10
10
  from pathlib import Path
11
11
  from typing import (
12
12
  Annotated,
13
+ Any,
13
14
  Dict,
14
15
  Iterator,
15
16
  List,
@@ -538,7 +539,7 @@ class SegmentedPdfPage(SegmentedPage):
538
539
  cells.append(pc)
539
540
  return cells
540
541
 
541
- def export_to_dict(self) -> Dict:
542
+ def export_to_dict(self) -> Dict[str, Any]:
542
543
  """Export the page data to a dictionary.
543
544
 
544
545
  Returns:
@@ -1150,7 +1151,7 @@ class PdfTableOfContents(BaseModel):
1150
1151
 
1151
1152
  children: List["PdfTableOfContents"] = []
1152
1153
 
1153
- def export_to_dict(self, mode: str = "json") -> Dict:
1154
+ def export_to_dict(self, mode: str = "json") -> Dict[str, Any]:
1154
1155
  """Export the table of contents to a dictionary.
1155
1156
 
1156
1157
  Args:
@@ -1212,7 +1213,7 @@ class ParsedPdfDocument(BaseModel):
1212
1213
  def export_to_dict(
1213
1214
  self,
1214
1215
  mode: str = "json",
1215
- ) -> Dict:
1216
+ ) -> Dict[str, Any]:
1216
1217
  """Export the document to a dictionary.
1217
1218
 
1218
1219
  Args:
@@ -6,7 +6,7 @@
6
6
  """Models for the Docling Document data type."""
7
7
 
8
8
  from datetime import datetime
9
- from typing import Dict, Generic, Optional, Union
9
+ from typing import Any, Dict, Generic, Optional, Union
10
10
 
11
11
  from pydantic import (
12
12
  AnyHttpUrl,
@@ -434,7 +434,7 @@ class ExportedCCSDocument(
434
434
 
435
435
  return pagedims
436
436
 
437
- def export_to_dict(self) -> Dict:
437
+ def export_to_dict(self) -> Dict[str, Any]:
438
438
  """export_to_dict."""
439
439
  return self.model_dump(mode="json", by_alias=True, exclude_none=True)
440
440
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.26.4
3
+ Version: 2.28.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -26,6 +26,7 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Classifier: Typing :: Typed
28
28
  Provides-Extra: chunking
29
+ Provides-Extra: chunking-openai
29
30
  Requires-Dist: jsonref (>=1.1.0,<2.0.0)
30
31
  Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
31
32
  Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
@@ -33,8 +34,9 @@ Requires-Dist: pandas (>=2.1.4,<3.0.0)
33
34
  Requires-Dist: pillow (>=10.0.0,<12.0.0)
34
35
  Requires-Dist: pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)
35
36
  Requires-Dist: pyyaml (>=5.1,<7.0.0)
36
- Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking"
37
+ Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking" or extra == "chunking-openai"
37
38
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
39
+ Requires-Dist: tiktoken (>=0.9.0,<0.10.0) ; extra == "chunking-openai"
38
40
  Requires-Dist: transformers (>=4.34.0,<5.0.0) ; extra == "chunking"
39
41
  Requires-Dist: typer (>=0.12.5,<0.16.0)
40
42
  Requires-Dist: typing-extensions (>=4.12.2,<5.0.0)
@@ -3,12 +3,12 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
3
3
  docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
4
4
  docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
5
5
  docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
6
- docling_core/experimental/serializer/base.py,sha256=1sD1v5rWC4MT_Y6BWpMDjUAwuEqC0TR9YjQJZlhPt50,5901
7
- docling_core/experimental/serializer/common.py,sha256=iQUJPRZUhpGMi_s4makkZcINy5sdtxu2ehS9N8lnoMM,17332
8
- docling_core/experimental/serializer/doctags.py,sha256=e97FJHh77x--g2t1O2YprBzF8lkihn_xOr59EjnR7ag,17794
9
- docling_core/experimental/serializer/html.py,sha256=C-xf_PH28R6DoISFDoZwBK67rsfFmTemgZ_VW2i0haY,31896
6
+ docling_core/experimental/serializer/base.py,sha256=9bgpWA0oMmZNRc3yIuZVnu5bJ1glClBsswtVF1vYwMI,6046
7
+ docling_core/experimental/serializer/common.py,sha256=uviwBuYowzqvCbY-vy8v2VaEadJISk9aDETrkrfDo38,17437
8
+ docling_core/experimental/serializer/doctags.py,sha256=RbHdqmFJ-t3oUvCsv0QjbIZqgUajPrt41jMaJGp4sdA,17874
9
+ docling_core/experimental/serializer/html.py,sha256=By7NoDXQ4GDW-iFf8zWCYuU4f_TOHA8i86eGk60d4WM,33070
10
10
  docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
11
- docling_core/experimental/serializer/markdown.py,sha256=5bvONhaA1EdAD0c3WlWfr2x2KmRaSZd8muG-91XVHgc,17733
11
+ docling_core/experimental/serializer/markdown.py,sha256=WineuzwGDbFhbqEdz-sNWYewrUwBM0zfj88T8URaq6w,17877
12
12
  docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
14
14
  docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
@@ -27,14 +27,22 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
27
27
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
28
28
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
29
29
  docling_core/transforms/chunker/hierarchical_chunker.py,sha256=iYzA65INFo89klc94jixuzQP8ivywe-3aVYznt2Csv8,8287
30
- docling_core/transforms/chunker/hybrid_chunker.py,sha256=JPKKgfAdHqkYp4qyZWZyjJ3fYFq9lgD-mTaVVnm5T0Y,10936
30
+ docling_core/transforms/chunker/hybrid_chunker.py,sha256=i2rxSE_6JZPClljcA_HVf0Pq5KgLyILhzG7CwRFcTIE,11888
31
+ docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
32
+ docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
33
+ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=JQ-D3b5vTPQbvu4HaMfYqFzSBLbV_HnmoBGv7d6Kqn4,2220
34
+ docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
35
+ docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
+ docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
+ docling_core/transforms/visualizer/layout_visualizer.py,sha256=G_xPs5S_87RPPAIMKM6ryMU2aV_zGLYUTOlTQprIRD4,7336
38
+ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=XXVuiI-Y0AH5uJCXINmfzcSSkTwR55-4fL6TOgzir6Y,5203
31
39
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
32
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
33
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
34
42
  docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
35
- docling_core/types/doc/document.py,sha256=gK9-qiMi74p0jPeAHW6YGKl-O0ZSYE-p36MQAco3lx4,139341
36
- docling_core/types/doc/labels.py,sha256=Kmrrdmd6ejXomeXDlxjpmBEpPxMCYOc_3I2GSaoqqi4,5748
37
- docling_core/types/doc/page.py,sha256=QI1D5p63AxboT6PnHa7UlbPmH2i2_E3qIk_Gk2fdrxs,40270
43
+ docling_core/types/doc/document.py,sha256=uYQTUEeZ40T5698Xff7NhC3iTbk1F76omZNvHIUmrfc,140174
44
+ docling_core/types/doc/labels.py,sha256=3QgteZZ4jKi0fideTuTnuriviJBwew-5RKE4pse7Ppk,5812
45
+ docling_core/types/doc/page.py,sha256=44tK6XM6Py0pK7zTyJ4kaZ5MLj8PvXIiw31hoQYa-Xs,40309
38
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
39
47
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
40
48
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
@@ -45,7 +53,7 @@ docling_core/types/legacy_doc/base.py,sha256=aBKBunw6M6nvEq4lqP1cfFWK3GpGa6PXwNQ
45
53
  docling_core/types/legacy_doc/doc_ann.py,sha256=CIQHW8yzu70bsMR9gtu7dqe4oz603Tq2eDDt9sh-tYo,1203
46
54
  docling_core/types/legacy_doc/doc_ocr.py,sha256=FfFqHAyMSbFt5cKeE7QLcxS0qUweBilBJoN9CH2TsQs,1394
47
55
  docling_core/types/legacy_doc/doc_raw.py,sha256=LrvQ9DhNjBRy98p_F9PUyHZeTGAxMKWqJzY4WJ7v-xs,3895
48
- docling_core/types/legacy_doc/document.py,sha256=AW8AIBM19k-HtTmXPsFKagqd6gi9THJdB4RsPb1C5F0,24534
56
+ docling_core/types/legacy_doc/document.py,sha256=lEuxUS03YrY4dKvfzB1I208x6LtD0zukV9QU0hfjuwM,24549
49
57
  docling_core/types/legacy_doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
50
58
  docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
51
59
  docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
@@ -65,8 +73,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
65
73
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
66
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
67
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
68
- docling_core-2.26.4.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
- docling_core-2.26.4.dist-info/METADATA,sha256=S9qfeO5R35zkBwzIYnd5q3-O3LBl3-WOsQRlL50klqw,5843
70
- docling_core-2.26.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
- docling_core-2.26.4.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
- docling_core-2.26.4.dist-info/RECORD,,
76
+ docling_core-2.28.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.28.0.dist-info/METADATA,sha256=uWyLwSsIWmUuQvfTYctf24fkDeYck3PAE9UsjSf85z8,5976
78
+ docling_core-2.28.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
79
+ docling_core-2.28.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
80
+ docling_core-2.28.0.dist-info/RECORD,,