docling-core 2.40.0__py3-none-any.whl → 2.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/types/doc/base.py +21 -1
- docling_core/types/doc/document.py +38 -4
- docling_core/types/doc/page.py +18 -2
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/METADATA +1 -1
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/RECORD +9 -9
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/WHEEL +0 -0
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/entry_points.txt +0 -0
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.40.0.dist-info → docling_core-2.41.0.dist-info}/top_level.txt +0 -0
docling_core/types/doc/base.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import List, Tuple
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel
|
|
6
|
+
from pydantic import BaseModel, FieldSerializationInfo, field_serializer
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ImageRefMode(str, Enum):
|
|
@@ -21,12 +21,28 @@ class CoordOrigin(str, Enum):
|
|
|
21
21
|
BOTTOMLEFT = "BOTTOMLEFT"
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
_CTX_COORD_PREC = "coord_prec"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _serialize_precision(
|
|
28
|
+
value: float, info: FieldSerializationInfo, ctx_key: str
|
|
29
|
+
) -> float:
|
|
30
|
+
precision = info.context.get(ctx_key) if info.context else None
|
|
31
|
+
if isinstance(precision, int):
|
|
32
|
+
return round(value, precision)
|
|
33
|
+
return value
|
|
34
|
+
|
|
35
|
+
|
|
24
36
|
class Size(BaseModel):
|
|
25
37
|
"""Size."""
|
|
26
38
|
|
|
27
39
|
width: float = 0.0
|
|
28
40
|
height: float = 0.0
|
|
29
41
|
|
|
42
|
+
@field_serializer("width", "height")
|
|
43
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
44
|
+
return _serialize_precision(value, info, _CTX_COORD_PREC)
|
|
45
|
+
|
|
30
46
|
def as_tuple(self):
|
|
31
47
|
"""as_tuple."""
|
|
32
48
|
return (self.width, self.height)
|
|
@@ -52,6 +68,10 @@ class BoundingBox(BaseModel):
|
|
|
52
68
|
"""height."""
|
|
53
69
|
return abs(self.t - self.b)
|
|
54
70
|
|
|
71
|
+
@field_serializer("l", "t", "r", "b")
|
|
72
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
73
|
+
return _serialize_precision(value, info, _CTX_COORD_PREC)
|
|
74
|
+
|
|
55
75
|
def resize_by_scale(self, x_scale: float, y_scale: float):
|
|
56
76
|
"""resize_by_scale."""
|
|
57
77
|
return BoundingBox(
|
|
@@ -26,8 +26,10 @@ from pydantic import (
|
|
|
26
26
|
BaseModel,
|
|
27
27
|
ConfigDict,
|
|
28
28
|
Field,
|
|
29
|
+
FieldSerializationInfo,
|
|
29
30
|
StringConstraints,
|
|
30
31
|
computed_field,
|
|
32
|
+
field_serializer,
|
|
31
33
|
field_validator,
|
|
32
34
|
model_validator,
|
|
33
35
|
validate_call,
|
|
@@ -38,7 +40,12 @@ from typing_extensions import Annotated, Self, deprecated
|
|
|
38
40
|
from docling_core.search.package import VERSION_PATTERN
|
|
39
41
|
from docling_core.types.base import _JSON_POINTER_REGEX
|
|
40
42
|
from docling_core.types.doc import BoundingBox, Size
|
|
41
|
-
from docling_core.types.doc.base import
|
|
43
|
+
from docling_core.types.doc.base import (
|
|
44
|
+
_CTX_COORD_PREC,
|
|
45
|
+
CoordOrigin,
|
|
46
|
+
ImageRefMode,
|
|
47
|
+
_serialize_precision,
|
|
48
|
+
)
|
|
42
49
|
from docling_core.types.doc.labels import (
|
|
43
50
|
CodeLanguageLabel,
|
|
44
51
|
DocItemLabel,
|
|
@@ -85,6 +92,8 @@ DOCUMENT_TOKENS_EXPORT_LABELS.update(
|
|
|
85
92
|
]
|
|
86
93
|
)
|
|
87
94
|
|
|
95
|
+
_CTX_CONFID_PREC = "confid_prec"
|
|
96
|
+
|
|
88
97
|
|
|
89
98
|
class BaseAnnotation(BaseModel):
|
|
90
99
|
"""Base class for all annotation types."""
|
|
@@ -98,6 +107,10 @@ class PictureClassificationClass(BaseModel):
|
|
|
98
107
|
class_name: str
|
|
99
108
|
confidence: float
|
|
100
109
|
|
|
110
|
+
@field_serializer("confidence")
|
|
111
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
112
|
+
return _serialize_precision(value, info, _CTX_CONFID_PREC)
|
|
113
|
+
|
|
101
114
|
|
|
102
115
|
class PictureClassificationData(BaseAnnotation):
|
|
103
116
|
"""PictureClassificationData."""
|
|
@@ -125,6 +138,10 @@ class PictureMoleculeData(BaseAnnotation):
|
|
|
125
138
|
segmentation: List[Tuple[float, float]]
|
|
126
139
|
provenance: str
|
|
127
140
|
|
|
141
|
+
@field_serializer("confidence")
|
|
142
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
143
|
+
return _serialize_precision(value, info, _CTX_CONFID_PREC)
|
|
144
|
+
|
|
128
145
|
|
|
129
146
|
class MiscAnnotation(BaseAnnotation):
|
|
130
147
|
"""MiscAnnotation."""
|
|
@@ -3048,6 +3065,8 @@ class DoclingDocument(BaseModel):
|
|
|
3048
3065
|
artifacts_dir: Optional[Path] = None,
|
|
3049
3066
|
image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
|
|
3050
3067
|
indent: int = 2,
|
|
3068
|
+
coord_precision: Optional[int] = None,
|
|
3069
|
+
confid_precision: Optional[int] = None,
|
|
3051
3070
|
):
|
|
3052
3071
|
"""Save as json."""
|
|
3053
3072
|
if isinstance(filename, str):
|
|
@@ -3061,7 +3080,9 @@ class DoclingDocument(BaseModel):
|
|
|
3061
3080
|
artifacts_dir, image_mode, reference_path=reference_path
|
|
3062
3081
|
)
|
|
3063
3082
|
|
|
3064
|
-
out = new_doc.export_to_dict(
|
|
3083
|
+
out = new_doc.export_to_dict(
|
|
3084
|
+
coord_precision=coord_precision, confid_precision=confid_precision
|
|
3085
|
+
)
|
|
3065
3086
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
3066
3087
|
json.dump(out, fw, indent=indent)
|
|
3067
3088
|
|
|
@@ -3087,6 +3108,8 @@ class DoclingDocument(BaseModel):
|
|
|
3087
3108
|
artifacts_dir: Optional[Path] = None,
|
|
3088
3109
|
image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
|
|
3089
3110
|
default_flow_style: bool = False,
|
|
3111
|
+
coord_precision: Optional[int] = None,
|
|
3112
|
+
confid_precision: Optional[int] = None,
|
|
3090
3113
|
):
|
|
3091
3114
|
"""Save as yaml."""
|
|
3092
3115
|
if isinstance(filename, str):
|
|
@@ -3100,7 +3123,9 @@ class DoclingDocument(BaseModel):
|
|
|
3100
3123
|
artifacts_dir, image_mode, reference_path=reference_path
|
|
3101
3124
|
)
|
|
3102
3125
|
|
|
3103
|
-
out = new_doc.export_to_dict(
|
|
3126
|
+
out = new_doc.export_to_dict(
|
|
3127
|
+
coord_precision=coord_precision, confid_precision=confid_precision
|
|
3128
|
+
)
|
|
3104
3129
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
3105
3130
|
yaml.dump(out, fw, default_flow_style=default_flow_style)
|
|
3106
3131
|
|
|
@@ -3125,9 +3150,18 @@ class DoclingDocument(BaseModel):
|
|
|
3125
3150
|
mode: str = "json",
|
|
3126
3151
|
by_alias: bool = True,
|
|
3127
3152
|
exclude_none: bool = True,
|
|
3153
|
+
coord_precision: Optional[int] = None,
|
|
3154
|
+
confid_precision: Optional[int] = None,
|
|
3128
3155
|
) -> Dict[str, Any]:
|
|
3129
3156
|
"""Export to dict."""
|
|
3130
|
-
|
|
3157
|
+
context = {}
|
|
3158
|
+
if coord_precision is not None:
|
|
3159
|
+
context[_CTX_COORD_PREC] = coord_precision
|
|
3160
|
+
if confid_precision is not None:
|
|
3161
|
+
context[_CTX_CONFID_PREC] = confid_precision
|
|
3162
|
+
out = self.model_dump(
|
|
3163
|
+
mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
|
|
3164
|
+
)
|
|
3131
3165
|
|
|
3132
3166
|
return out
|
|
3133
3167
|
|
docling_core/types/doc/page.py
CHANGED
|
@@ -25,9 +25,21 @@ import numpy as np
|
|
|
25
25
|
from PIL import Image as PILImage
|
|
26
26
|
from PIL import ImageColor, ImageDraw, ImageFont
|
|
27
27
|
from PIL.ImageFont import FreeTypeFont
|
|
28
|
-
from pydantic import
|
|
28
|
+
from pydantic import (
|
|
29
|
+
AnyUrl,
|
|
30
|
+
BaseModel,
|
|
31
|
+
Field,
|
|
32
|
+
FieldSerializationInfo,
|
|
33
|
+
field_serializer,
|
|
34
|
+
model_validator,
|
|
35
|
+
)
|
|
29
36
|
|
|
30
|
-
from docling_core.types.doc.base import
|
|
37
|
+
from docling_core.types.doc.base import (
|
|
38
|
+
_CTX_COORD_PREC,
|
|
39
|
+
BoundingBox,
|
|
40
|
+
CoordOrigin,
|
|
41
|
+
_serialize_precision,
|
|
42
|
+
)
|
|
31
43
|
from docling_core.types.doc.document import ImageRef
|
|
32
44
|
|
|
33
45
|
_logger = logging.getLogger(__name__)
|
|
@@ -105,6 +117,10 @@ class BoundingRectangle(BaseModel):
|
|
|
105
117
|
|
|
106
118
|
coord_origin: CoordOrigin = CoordOrigin.BOTTOMLEFT
|
|
107
119
|
|
|
120
|
+
@field_serializer("r_x0", "r_y0", "r_x1", "r_y1", "r_x2", "r_y2", "r_x3", "r_y3")
|
|
121
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
|
122
|
+
return _serialize_precision(value, info, _CTX_COORD_PREC)
|
|
123
|
+
|
|
108
124
|
@property
|
|
109
125
|
def width(self) -> float:
|
|
110
126
|
"""Calculate the width of the rectangle."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.41.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -40,10 +40,10 @@ docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcj
|
|
|
40
40
|
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
41
41
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
42
42
|
docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
|
|
43
|
-
docling_core/types/doc/base.py,sha256=
|
|
44
|
-
docling_core/types/doc/document.py,sha256=
|
|
43
|
+
docling_core/types/doc/base.py,sha256=u8sFLA29x8QphvLzgy2wAKu3HXyM2GODfBXqEwQMrTY,15527
|
|
44
|
+
docling_core/types/doc/document.py,sha256=YAJIIdT2fBnlp8ASWvzJTjUbil_ZCwuBBSjmiwhZ1KI,158630
|
|
45
45
|
docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
|
|
46
|
-
docling_core/types/doc/page.py,sha256=
|
|
46
|
+
docling_core/types/doc/page.py,sha256=CH9DY3LLgnUdhRuJBWfnkDkPBdRzz9yi4el1LsxJSME,41651
|
|
47
47
|
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
48
48
|
docling_core/types/doc/utils.py,sha256=JpAi7x9DHksFlIj_gRJPcSZOHa8AHvVPEO_K9aSnw4c,2608
|
|
49
49
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
74
74
|
docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
|
|
75
75
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
76
76
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
77
|
-
docling_core-2.
|
|
78
|
-
docling_core-2.
|
|
79
|
-
docling_core-2.
|
|
80
|
-
docling_core-2.
|
|
81
|
-
docling_core-2.
|
|
82
|
-
docling_core-2.
|
|
77
|
+
docling_core-2.41.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
78
|
+
docling_core-2.41.0.dist-info/METADATA,sha256=CqsXanxB2dd22G__-Ws0XdLDzkf9uwMGmx98V2h9f3k,6453
|
|
79
|
+
docling_core-2.41.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
80
|
+
docling_core-2.41.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
|
|
81
|
+
docling_core-2.41.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
|
|
82
|
+
docling_core-2.41.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|