docling-core 2.27.0__py3-none-any.whl → 2.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -300,7 +300,7 @@ class TableCell(BaseModel):
300
300
  @classmethod
301
301
  def from_dict_format(cls, data: Any) -> Any:
302
302
  """from_dict_format."""
303
- if isinstance(data, Dict):
303
+ if isinstance(data, dict):
304
304
  # Check if this is a native BoundingBox or a bbox from docling-ibm-models
305
305
  if (
306
306
  # "bbox" not in data
@@ -1383,7 +1383,7 @@ class TableItem(FloatingItem):
1383
1383
  if add_cross_cell:
1384
1384
  body.append(str(TableToken.OTSL_XCEL.value))
1385
1385
  body.append(str(TableToken.OTSL_NL.value))
1386
- body_str = "".join(body)
1386
+ body_str = "".join(body)
1387
1387
  return body_str
1388
1388
 
1389
1389
  @deprecated("Use export_to_doctags() instead.")
@@ -2888,7 +2888,7 @@ class DoclingDocument(BaseModel):
2888
2888
  mode: str = "json",
2889
2889
  by_alias: bool = True,
2890
2890
  exclude_none: bool = True,
2891
- ) -> Dict:
2891
+ ) -> Dict[str, Any]:
2892
2892
  """Export to dict."""
2893
2893
  out = self.model_dump(mode=mode, by_alias=by_alias, exclude_none=exclude_none)
2894
2894
 
@@ -4044,6 +4044,28 @@ class DoclingDocument(BaseModel):
4044
4044
  self.pages[page_no] = pitem
4045
4045
  return pitem
4046
4046
 
4047
+ def get_visualization(
4048
+ self, show_label: bool = True
4049
+ ) -> dict[Optional[int], PILImage.Image]:
4050
+ """Get visualization of the document as images by page."""
4051
+ from docling_core.transforms.visualizer.layout_visualizer import (
4052
+ LayoutVisualizer,
4053
+ )
4054
+ from docling_core.transforms.visualizer.reading_order_visualizer import (
4055
+ ReadingOrderVisualizer,
4056
+ )
4057
+
4058
+ visualizer = ReadingOrderVisualizer(
4059
+ base_visualizer=LayoutVisualizer(
4060
+ params=LayoutVisualizer.Params(
4061
+ show_label=show_label,
4062
+ ),
4063
+ ),
4064
+ )
4065
+ images = visualizer.get_visualization(doc=self)
4066
+
4067
+ return images
4068
+
4047
4069
  @field_validator("version")
4048
4070
  @classmethod
4049
4071
  def check_version_is_compatible(cls, v: str) -> str:
@@ -10,6 +10,7 @@ from enum import Enum
10
10
  from pathlib import Path
11
11
  from typing import (
12
12
  Annotated,
13
+ Any,
13
14
  Dict,
14
15
  Iterator,
15
16
  List,
@@ -538,7 +539,7 @@ class SegmentedPdfPage(SegmentedPage):
538
539
  cells.append(pc)
539
540
  return cells
540
541
 
541
- def export_to_dict(self) -> Dict:
542
+ def export_to_dict(self) -> Dict[str, Any]:
542
543
  """Export the page data to a dictionary.
543
544
 
544
545
  Returns:
@@ -1150,7 +1151,7 @@ class PdfTableOfContents(BaseModel):
1150
1151
 
1151
1152
  children: List["PdfTableOfContents"] = []
1152
1153
 
1153
- def export_to_dict(self, mode: str = "json") -> Dict:
1154
+ def export_to_dict(self, mode: str = "json") -> Dict[str, Any]:
1154
1155
  """Export the table of contents to a dictionary.
1155
1156
 
1156
1157
  Args:
@@ -1212,7 +1213,7 @@ class ParsedPdfDocument(BaseModel):
1212
1213
  def export_to_dict(
1213
1214
  self,
1214
1215
  mode: str = "json",
1215
- ) -> Dict:
1216
+ ) -> Dict[str, Any]:
1216
1217
  """Export the document to a dictionary.
1217
1218
 
1218
1219
  Args:
@@ -6,7 +6,7 @@
6
6
  """Models for the Docling Document data type."""
7
7
 
8
8
  from datetime import datetime
9
- from typing import Dict, Generic, Optional, Union
9
+ from typing import Any, Dict, Generic, Optional, Union
10
10
 
11
11
  from pydantic import (
12
12
  AnyHttpUrl,
@@ -434,7 +434,7 @@ class ExportedCCSDocument(
434
434
 
435
435
  return pagedims
436
436
 
437
- def export_to_dict(self) -> Dict:
437
+ def export_to_dict(self) -> Dict[str, Any]:
438
438
  """export_to_dict."""
439
439
  return self.model_dump(mode="json", by_alias=True, exclude_none=True)
440
440
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.27.0
3
+ Version: 2.28.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -26,6 +26,7 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Classifier: Typing :: Typed
28
28
  Provides-Extra: chunking
29
+ Provides-Extra: chunking-openai
29
30
  Requires-Dist: jsonref (>=1.1.0,<2.0.0)
30
31
  Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
31
32
  Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
@@ -33,8 +34,9 @@ Requires-Dist: pandas (>=2.1.4,<3.0.0)
33
34
  Requires-Dist: pillow (>=10.0.0,<12.0.0)
34
35
  Requires-Dist: pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)
35
36
  Requires-Dist: pyyaml (>=5.1,<7.0.0)
36
- Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking"
37
+ Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking" or extra == "chunking-openai"
37
38
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
39
+ Requires-Dist: tiktoken (>=0.9.0,<0.10.0) ; extra == "chunking-openai"
38
40
  Requires-Dist: transformers (>=4.34.0,<5.0.0) ; extra == "chunking"
39
41
  Requires-Dist: typer (>=0.12.5,<0.16.0)
40
42
  Requires-Dist: typing-extensions (>=4.12.2,<5.0.0)
@@ -3,12 +3,12 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
3
3
  docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
4
4
  docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
5
5
  docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
6
- docling_core/experimental/serializer/base.py,sha256=1sD1v5rWC4MT_Y6BWpMDjUAwuEqC0TR9YjQJZlhPt50,5901
7
- docling_core/experimental/serializer/common.py,sha256=iQUJPRZUhpGMi_s4makkZcINy5sdtxu2ehS9N8lnoMM,17332
8
- docling_core/experimental/serializer/doctags.py,sha256=e97FJHh77x--g2t1O2YprBzF8lkihn_xOr59EjnR7ag,17794
9
- docling_core/experimental/serializer/html.py,sha256=4uUthJZvUL6zvtynjrVXeOHAR43SnNP3EM7ORx3T-SE,32948
6
+ docling_core/experimental/serializer/base.py,sha256=9bgpWA0oMmZNRc3yIuZVnu5bJ1glClBsswtVF1vYwMI,6046
7
+ docling_core/experimental/serializer/common.py,sha256=uviwBuYowzqvCbY-vy8v2VaEadJISk9aDETrkrfDo38,17437
8
+ docling_core/experimental/serializer/doctags.py,sha256=RbHdqmFJ-t3oUvCsv0QjbIZqgUajPrt41jMaJGp4sdA,17874
9
+ docling_core/experimental/serializer/html.py,sha256=By7NoDXQ4GDW-iFf8zWCYuU4f_TOHA8i86eGk60d4WM,33070
10
10
  docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
11
- docling_core/experimental/serializer/markdown.py,sha256=5bvONhaA1EdAD0c3WlWfr2x2KmRaSZd8muG-91XVHgc,17733
11
+ docling_core/experimental/serializer/markdown.py,sha256=WineuzwGDbFhbqEdz-sNWYewrUwBM0zfj88T8URaq6w,17877
12
12
  docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
14
14
  docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
@@ -27,14 +27,22 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
27
27
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
28
28
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
29
29
  docling_core/transforms/chunker/hierarchical_chunker.py,sha256=iYzA65INFo89klc94jixuzQP8ivywe-3aVYznt2Csv8,8287
30
- docling_core/transforms/chunker/hybrid_chunker.py,sha256=JPKKgfAdHqkYp4qyZWZyjJ3fYFq9lgD-mTaVVnm5T0Y,10936
30
+ docling_core/transforms/chunker/hybrid_chunker.py,sha256=i2rxSE_6JZPClljcA_HVf0Pq5KgLyILhzG7CwRFcTIE,11888
31
+ docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
32
+ docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
33
+ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=JQ-D3b5vTPQbvu4HaMfYqFzSBLbV_HnmoBGv7d6Kqn4,2220
34
+ docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
35
+ docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
+ docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
+ docling_core/transforms/visualizer/layout_visualizer.py,sha256=nUOiDHuDYLM-Bcagiwz6JicaAhZroOdFCOyl1I8GUjA,7655
38
+ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=XXVuiI-Y0AH5uJCXINmfzcSSkTwR55-4fL6TOgzir6Y,5203
31
39
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
32
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
33
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
34
42
  docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
35
- docling_core/types/doc/document.py,sha256=eCQUpOJbGdu5lKIaBs-IXddHrF38SgqYd8XYskv-Rpg,139436
43
+ docling_core/types/doc/document.py,sha256=4toRMU04V1rWaquyvcXPB9hzefD3cH_8MatgBCf1Mc4,140170
36
44
  docling_core/types/doc/labels.py,sha256=3QgteZZ4jKi0fideTuTnuriviJBwew-5RKE4pse7Ppk,5812
37
- docling_core/types/doc/page.py,sha256=QI1D5p63AxboT6PnHa7UlbPmH2i2_E3qIk_Gk2fdrxs,40270
45
+ docling_core/types/doc/page.py,sha256=44tK6XM6Py0pK7zTyJ4kaZ5MLj8PvXIiw31hoQYa-Xs,40309
38
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
39
47
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
40
48
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
@@ -45,7 +53,7 @@ docling_core/types/legacy_doc/base.py,sha256=aBKBunw6M6nvEq4lqP1cfFWK3GpGa6PXwNQ
45
53
  docling_core/types/legacy_doc/doc_ann.py,sha256=CIQHW8yzu70bsMR9gtu7dqe4oz603Tq2eDDt9sh-tYo,1203
46
54
  docling_core/types/legacy_doc/doc_ocr.py,sha256=FfFqHAyMSbFt5cKeE7QLcxS0qUweBilBJoN9CH2TsQs,1394
47
55
  docling_core/types/legacy_doc/doc_raw.py,sha256=LrvQ9DhNjBRy98p_F9PUyHZeTGAxMKWqJzY4WJ7v-xs,3895
48
- docling_core/types/legacy_doc/document.py,sha256=AW8AIBM19k-HtTmXPsFKagqd6gi9THJdB4RsPb1C5F0,24534
56
+ docling_core/types/legacy_doc/document.py,sha256=lEuxUS03YrY4dKvfzB1I208x6LtD0zukV9QU0hfjuwM,24549
49
57
  docling_core/types/legacy_doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
50
58
  docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
51
59
  docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
@@ -65,8 +73,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
65
73
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
66
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
67
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
68
- docling_core-2.27.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
- docling_core-2.27.0.dist-info/METADATA,sha256=8G_mgHJzCaJxuquf1nZualW0nWwfSC2MKCn3EyGJ__A,5843
70
- docling_core-2.27.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
- docling_core-2.27.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
- docling_core-2.27.0.dist-info/RECORD,,
76
+ docling_core-2.28.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.28.1.dist-info/METADATA,sha256=02gkT1pLcBA0yagMKLBAEpjML_omcGsZS8dDG2RCFVY,5976
78
+ docling_core-2.28.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
79
+ docling_core-2.28.1.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
80
+ docling_core-2.28.1.dist-info/RECORD,,