docling-core 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -10,6 +10,7 @@ from enum import Enum
10
10
  from pathlib import Path
11
11
  from typing import (
12
12
  Annotated,
13
+ Any,
13
14
  Dict,
14
15
  Iterator,
15
16
  List,
@@ -538,7 +539,7 @@ class SegmentedPdfPage(SegmentedPage):
538
539
  cells.append(pc)
539
540
  return cells
540
541
 
541
- def export_to_dict(self) -> Dict:
542
+ def export_to_dict(self) -> Dict[str, Any]:
542
543
  """Export the page data to a dictionary.
543
544
 
544
545
  Returns:
@@ -1150,7 +1151,7 @@ class PdfTableOfContents(BaseModel):
1150
1151
 
1151
1152
  children: List["PdfTableOfContents"] = []
1152
1153
 
1153
- def export_to_dict(self, mode: str = "json") -> Dict:
1154
+ def export_to_dict(self, mode: str = "json") -> Dict[str, Any]:
1154
1155
  """Export the table of contents to a dictionary.
1155
1156
 
1156
1157
  Args:
@@ -1212,7 +1213,7 @@ class ParsedPdfDocument(BaseModel):
1212
1213
  def export_to_dict(
1213
1214
  self,
1214
1215
  mode: str = "json",
1215
- ) -> Dict:
1216
+ ) -> Dict[str, Any]:
1216
1217
  """Export the document to a dictionary.
1217
1218
 
1218
1219
  Args:
@@ -6,7 +6,7 @@
6
6
  """Models for the Docling Document data type."""
7
7
 
8
8
  from datetime import datetime
9
- from typing import Dict, Generic, Optional, Union
9
+ from typing import Any, Dict, Generic, Optional, Union
10
10
 
11
11
  from pydantic import (
12
12
  AnyHttpUrl,
@@ -434,7 +434,7 @@ class ExportedCCSDocument(
434
434
 
435
435
  return pagedims
436
436
 
437
- def export_to_dict(self) -> Dict:
437
+ def export_to_dict(self) -> Dict[str, Any]:
438
438
  """export_to_dict."""
439
439
  return self.model_dump(mode="json", by_alias=True, exclude_none=True)
440
440
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.27.0
3
+ Version: 2.28.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -26,6 +26,7 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Classifier: Typing :: Typed
28
28
  Provides-Extra: chunking
29
+ Provides-Extra: chunking-openai
29
30
  Requires-Dist: jsonref (>=1.1.0,<2.0.0)
30
31
  Requires-Dist: jsonschema (>=4.16.0,<5.0.0)
31
32
  Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
@@ -33,8 +34,9 @@ Requires-Dist: pandas (>=2.1.4,<3.0.0)
33
34
  Requires-Dist: pillow (>=10.0.0,<12.0.0)
34
35
  Requires-Dist: pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)
35
36
  Requires-Dist: pyyaml (>=5.1,<7.0.0)
36
- Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking"
37
+ Requires-Dist: semchunk (>=2.2.0,<3.0.0) ; extra == "chunking" or extra == "chunking-openai"
37
38
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
39
+ Requires-Dist: tiktoken (>=0.9.0,<0.10.0) ; extra == "chunking-openai"
38
40
  Requires-Dist: transformers (>=4.34.0,<5.0.0) ; extra == "chunking"
39
41
  Requires-Dist: typer (>=0.12.5,<0.16.0)
40
42
  Requires-Dist: typing-extensions (>=4.12.2,<5.0.0)
@@ -3,12 +3,12 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
3
3
  docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
4
4
  docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
5
5
  docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
6
- docling_core/experimental/serializer/base.py,sha256=1sD1v5rWC4MT_Y6BWpMDjUAwuEqC0TR9YjQJZlhPt50,5901
7
- docling_core/experimental/serializer/common.py,sha256=iQUJPRZUhpGMi_s4makkZcINy5sdtxu2ehS9N8lnoMM,17332
8
- docling_core/experimental/serializer/doctags.py,sha256=e97FJHh77x--g2t1O2YprBzF8lkihn_xOr59EjnR7ag,17794
9
- docling_core/experimental/serializer/html.py,sha256=4uUthJZvUL6zvtynjrVXeOHAR43SnNP3EM7ORx3T-SE,32948
6
+ docling_core/experimental/serializer/base.py,sha256=9bgpWA0oMmZNRc3yIuZVnu5bJ1glClBsswtVF1vYwMI,6046
7
+ docling_core/experimental/serializer/common.py,sha256=uviwBuYowzqvCbY-vy8v2VaEadJISk9aDETrkrfDo38,17437
8
+ docling_core/experimental/serializer/doctags.py,sha256=RbHdqmFJ-t3oUvCsv0QjbIZqgUajPrt41jMaJGp4sdA,17874
9
+ docling_core/experimental/serializer/html.py,sha256=By7NoDXQ4GDW-iFf8zWCYuU4f_TOHA8i86eGk60d4WM,33070
10
10
  docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
11
- docling_core/experimental/serializer/markdown.py,sha256=5bvONhaA1EdAD0c3WlWfr2x2KmRaSZd8muG-91XVHgc,17733
11
+ docling_core/experimental/serializer/markdown.py,sha256=WineuzwGDbFhbqEdz-sNWYewrUwBM0zfj88T8URaq6w,17877
12
12
  docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
14
14
  docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
@@ -27,14 +27,22 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
27
27
  docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
28
28
  docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
29
29
  docling_core/transforms/chunker/hierarchical_chunker.py,sha256=iYzA65INFo89klc94jixuzQP8ivywe-3aVYznt2Csv8,8287
30
- docling_core/transforms/chunker/hybrid_chunker.py,sha256=JPKKgfAdHqkYp4qyZWZyjJ3fYFq9lgD-mTaVVnm5T0Y,10936
30
+ docling_core/transforms/chunker/hybrid_chunker.py,sha256=i2rxSE_6JZPClljcA_HVf0Pq5KgLyILhzG7CwRFcTIE,11888
31
+ docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
32
+ docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
33
+ docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=JQ-D3b5vTPQbvu4HaMfYqFzSBLbV_HnmoBGv7d6Kqn4,2220
34
+ docling_core/transforms/chunker/tokenizer/openai.py,sha256=zt2kwcC-r8MafeEG0CESab8E4RIC9aaFXxxnxOGyTMA,918
35
+ docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
36
+ docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
37
+ docling_core/transforms/visualizer/layout_visualizer.py,sha256=G_xPs5S_87RPPAIMKM6ryMU2aV_zGLYUTOlTQprIRD4,7336
38
+ docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=XXVuiI-Y0AH5uJCXINmfzcSSkTwR55-4fL6TOgzir6Y,5203
31
39
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
32
40
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
33
41
  docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
34
42
  docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
35
- docling_core/types/doc/document.py,sha256=eCQUpOJbGdu5lKIaBs-IXddHrF38SgqYd8XYskv-Rpg,139436
43
+ docling_core/types/doc/document.py,sha256=uYQTUEeZ40T5698Xff7NhC3iTbk1F76omZNvHIUmrfc,140174
36
44
  docling_core/types/doc/labels.py,sha256=3QgteZZ4jKi0fideTuTnuriviJBwew-5RKE4pse7Ppk,5812
37
- docling_core/types/doc/page.py,sha256=QI1D5p63AxboT6PnHa7UlbPmH2i2_E3qIk_Gk2fdrxs,40270
45
+ docling_core/types/doc/page.py,sha256=44tK6XM6Py0pK7zTyJ4kaZ5MLj8PvXIiw31hoQYa-Xs,40309
38
46
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
39
47
  docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
40
48
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
@@ -45,7 +53,7 @@ docling_core/types/legacy_doc/base.py,sha256=aBKBunw6M6nvEq4lqP1cfFWK3GpGa6PXwNQ
45
53
  docling_core/types/legacy_doc/doc_ann.py,sha256=CIQHW8yzu70bsMR9gtu7dqe4oz603Tq2eDDt9sh-tYo,1203
46
54
  docling_core/types/legacy_doc/doc_ocr.py,sha256=FfFqHAyMSbFt5cKeE7QLcxS0qUweBilBJoN9CH2TsQs,1394
47
55
  docling_core/types/legacy_doc/doc_raw.py,sha256=LrvQ9DhNjBRy98p_F9PUyHZeTGAxMKWqJzY4WJ7v-xs,3895
48
- docling_core/types/legacy_doc/document.py,sha256=AW8AIBM19k-HtTmXPsFKagqd6gi9THJdB4RsPb1C5F0,24534
56
+ docling_core/types/legacy_doc/document.py,sha256=lEuxUS03YrY4dKvfzB1I208x6LtD0zukV9QU0hfjuwM,24549
49
57
  docling_core/types/legacy_doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
50
58
  docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
51
59
  docling_core/types/nlp/qa.py,sha256=TyZjubqkEoREv0YzmuLKlq4WW_TnJNj7BoBY1_r2a1E,2731
@@ -65,8 +73,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
65
73
  docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
66
74
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
67
75
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
68
- docling_core-2.27.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
69
- docling_core-2.27.0.dist-info/METADATA,sha256=8G_mgHJzCaJxuquf1nZualW0nWwfSC2MKCn3EyGJ__A,5843
70
- docling_core-2.27.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
71
- docling_core-2.27.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
72
- docling_core-2.27.0.dist-info/RECORD,,
76
+ docling_core-2.28.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
77
+ docling_core-2.28.0.dist-info/METADATA,sha256=uWyLwSsIWmUuQvfTYctf24fkDeYck3PAE9UsjSf85z8,5976
78
+ docling_core-2.28.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
79
+ docling_core-2.28.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
80
+ docling_core-2.28.0.dist-info/RECORD,,