docling-core 2.40.0__py3-none-any.whl → 2.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

@@ -3,7 +3,7 @@
3
3
  from enum import Enum
4
4
  from typing import List, Tuple
5
5
 
6
- from pydantic import BaseModel
6
+ from pydantic import BaseModel, FieldSerializationInfo, field_serializer
7
7
 
8
8
 
9
9
  class ImageRefMode(str, Enum):
@@ -21,12 +21,28 @@ class CoordOrigin(str, Enum):
21
21
  BOTTOMLEFT = "BOTTOMLEFT"
22
22
 
23
23
 
24
+ _CTX_COORD_PREC = "coord_prec"
25
+
26
+
27
+ def _serialize_precision(
28
+ value: float, info: FieldSerializationInfo, ctx_key: str
29
+ ) -> float:
30
+ precision = info.context.get(ctx_key) if info.context else None
31
+ if isinstance(precision, int):
32
+ return round(value, precision)
33
+ return value
34
+
35
+
24
36
  class Size(BaseModel):
25
37
  """Size."""
26
38
 
27
39
  width: float = 0.0
28
40
  height: float = 0.0
29
41
 
42
+ @field_serializer("width", "height")
43
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
44
+ return _serialize_precision(value, info, _CTX_COORD_PREC)
45
+
30
46
  def as_tuple(self):
31
47
  """as_tuple."""
32
48
  return (self.width, self.height)
@@ -52,6 +68,10 @@ class BoundingBox(BaseModel):
52
68
  """height."""
53
69
  return abs(self.t - self.b)
54
70
 
71
+ @field_serializer("l", "t", "r", "b")
72
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
73
+ return _serialize_precision(value, info, _CTX_COORD_PREC)
74
+
55
75
  def resize_by_scale(self, x_scale: float, y_scale: float):
56
76
  """resize_by_scale."""
57
77
  return BoundingBox(
@@ -26,8 +26,10 @@ from pydantic import (
26
26
  BaseModel,
27
27
  ConfigDict,
28
28
  Field,
29
+ FieldSerializationInfo,
29
30
  StringConstraints,
30
31
  computed_field,
32
+ field_serializer,
31
33
  field_validator,
32
34
  model_validator,
33
35
  validate_call,
@@ -38,7 +40,12 @@ from typing_extensions import Annotated, Self, deprecated
38
40
  from docling_core.search.package import VERSION_PATTERN
39
41
  from docling_core.types.base import _JSON_POINTER_REGEX
40
42
  from docling_core.types.doc import BoundingBox, Size
41
- from docling_core.types.doc.base import CoordOrigin, ImageRefMode
43
+ from docling_core.types.doc.base import (
44
+ _CTX_COORD_PREC,
45
+ CoordOrigin,
46
+ ImageRefMode,
47
+ _serialize_precision,
48
+ )
42
49
  from docling_core.types.doc.labels import (
43
50
  CodeLanguageLabel,
44
51
  DocItemLabel,
@@ -85,6 +92,8 @@ DOCUMENT_TOKENS_EXPORT_LABELS.update(
85
92
  ]
86
93
  )
87
94
 
95
+ _CTX_CONFID_PREC = "confid_prec"
96
+
88
97
 
89
98
  class BaseAnnotation(BaseModel):
90
99
  """Base class for all annotation types."""
@@ -98,6 +107,10 @@ class PictureClassificationClass(BaseModel):
98
107
  class_name: str
99
108
  confidence: float
100
109
 
110
+ @field_serializer("confidence")
111
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
112
+ return _serialize_precision(value, info, _CTX_CONFID_PREC)
113
+
101
114
 
102
115
  class PictureClassificationData(BaseAnnotation):
103
116
  """PictureClassificationData."""
@@ -125,6 +138,10 @@ class PictureMoleculeData(BaseAnnotation):
125
138
  segmentation: List[Tuple[float, float]]
126
139
  provenance: str
127
140
 
141
+ @field_serializer("confidence")
142
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
143
+ return _serialize_precision(value, info, _CTX_CONFID_PREC)
144
+
128
145
 
129
146
  class MiscAnnotation(BaseAnnotation):
130
147
  """MiscAnnotation."""
@@ -3048,6 +3065,8 @@ class DoclingDocument(BaseModel):
3048
3065
  artifacts_dir: Optional[Path] = None,
3049
3066
  image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
3050
3067
  indent: int = 2,
3068
+ coord_precision: Optional[int] = None,
3069
+ confid_precision: Optional[int] = None,
3051
3070
  ):
3052
3071
  """Save as json."""
3053
3072
  if isinstance(filename, str):
@@ -3061,7 +3080,9 @@ class DoclingDocument(BaseModel):
3061
3080
  artifacts_dir, image_mode, reference_path=reference_path
3062
3081
  )
3063
3082
 
3064
- out = new_doc.export_to_dict()
3083
+ out = new_doc.export_to_dict(
3084
+ coord_precision=coord_precision, confid_precision=confid_precision
3085
+ )
3065
3086
  with open(filename, "w", encoding="utf-8") as fw:
3066
3087
  json.dump(out, fw, indent=indent)
3067
3088
 
@@ -3087,6 +3108,8 @@ class DoclingDocument(BaseModel):
3087
3108
  artifacts_dir: Optional[Path] = None,
3088
3109
  image_mode: ImageRefMode = ImageRefMode.EMBEDDED,
3089
3110
  default_flow_style: bool = False,
3111
+ coord_precision: Optional[int] = None,
3112
+ confid_precision: Optional[int] = None,
3090
3113
  ):
3091
3114
  """Save as yaml."""
3092
3115
  if isinstance(filename, str):
@@ -3100,7 +3123,9 @@ class DoclingDocument(BaseModel):
3100
3123
  artifacts_dir, image_mode, reference_path=reference_path
3101
3124
  )
3102
3125
 
3103
- out = new_doc.export_to_dict()
3126
+ out = new_doc.export_to_dict(
3127
+ coord_precision=coord_precision, confid_precision=confid_precision
3128
+ )
3104
3129
  with open(filename, "w", encoding="utf-8") as fw:
3105
3130
  yaml.dump(out, fw, default_flow_style=default_flow_style)
3106
3131
 
@@ -3125,9 +3150,18 @@ class DoclingDocument(BaseModel):
3125
3150
  mode: str = "json",
3126
3151
  by_alias: bool = True,
3127
3152
  exclude_none: bool = True,
3153
+ coord_precision: Optional[int] = None,
3154
+ confid_precision: Optional[int] = None,
3128
3155
  ) -> Dict[str, Any]:
3129
3156
  """Export to dict."""
3130
- out = self.model_dump(mode=mode, by_alias=by_alias, exclude_none=exclude_none)
3157
+ context = {}
3158
+ if coord_precision is not None:
3159
+ context[_CTX_COORD_PREC] = coord_precision
3160
+ if confid_precision is not None:
3161
+ context[_CTX_CONFID_PREC] = confid_precision
3162
+ out = self.model_dump(
3163
+ mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
3164
+ )
3131
3165
 
3132
3166
  return out
3133
3167
 
@@ -25,9 +25,21 @@ import numpy as np
25
25
  from PIL import Image as PILImage
26
26
  from PIL import ImageColor, ImageDraw, ImageFont
27
27
  from PIL.ImageFont import FreeTypeFont
28
- from pydantic import AnyUrl, BaseModel, Field, model_validator
28
+ from pydantic import (
29
+ AnyUrl,
30
+ BaseModel,
31
+ Field,
32
+ FieldSerializationInfo,
33
+ field_serializer,
34
+ model_validator,
35
+ )
29
36
 
30
- from docling_core.types.doc.base import BoundingBox, CoordOrigin
37
+ from docling_core.types.doc.base import (
38
+ _CTX_COORD_PREC,
39
+ BoundingBox,
40
+ CoordOrigin,
41
+ _serialize_precision,
42
+ )
31
43
  from docling_core.types.doc.document import ImageRef
32
44
 
33
45
  _logger = logging.getLogger(__name__)
@@ -105,6 +117,10 @@ class BoundingRectangle(BaseModel):
105
117
 
106
118
  coord_origin: CoordOrigin = CoordOrigin.BOTTOMLEFT
107
119
 
120
+ @field_serializer("r_x0", "r_y0", "r_x1", "r_y1", "r_x2", "r_y2", "r_x3", "r_y3")
121
+ def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
122
+ return _serialize_precision(value, info, _CTX_COORD_PREC)
123
+
108
124
  @property
109
125
  def width(self) -> float:
110
126
  """Calculate the width of the rectangle."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.40.0
3
+ Version: 2.41.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -40,10 +40,10 @@ docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcj
40
40
  docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
41
41
  docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
42
42
  docling_core/types/doc/__init__.py,sha256=8hOhm5W9mArf3zwgfoMxDs1pHizhLFSAZlLu1tPBBRk,1641
43
- docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
44
- docling_core/types/doc/document.py,sha256=9-n0tngXLTRVAkqGHe3bDSh1OJbBt87EW2nV8GdOGME,157406
43
+ docling_core/types/doc/base.py,sha256=u8sFLA29x8QphvLzgy2wAKu3HXyM2GODfBXqEwQMrTY,15527
44
+ docling_core/types/doc/document.py,sha256=YAJIIdT2fBnlp8ASWvzJTjUbil_ZCwuBBSjmiwhZ1KI,158630
45
45
  docling_core/types/doc/labels.py,sha256=-W1-LW6z0J9F9ExJqR0Wd1WeqWTaY3Unm-j1UkQGlC4,7330
46
- docling_core/types/doc/page.py,sha256=J_4ThNhrdhrfPtNMBTDHi-CQBvraejAwUaqVjyDeeeI,41288
46
+ docling_core/types/doc/page.py,sha256=CH9DY3LLgnUdhRuJBWfnkDkPBdRzz9yi4el1LsxJSME,41651
47
47
  docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
48
48
  docling_core/types/doc/utils.py,sha256=JpAi7x9DHksFlIj_gRJPcSZOHa8AHvVPEO_K9aSnw4c,2608
49
49
  docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
74
74
  docling_core/utils/legacy.py,sha256=5lghO48OEcV9V51tRnH3YSKgLtdqhr-Q5C_OcJZ8TOs,24392
75
75
  docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
76
76
  docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
77
- docling_core-2.40.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
- docling_core-2.40.0.dist-info/METADATA,sha256=A6_Wz_CJzmHa20USMUgQPDMpN5-S3f8VpNrx7ns1SXo,6453
79
- docling_core-2.40.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- docling_core-2.40.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
- docling_core-2.40.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
- docling_core-2.40.0.dist-info/RECORD,,
77
+ docling_core-2.41.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
78
+ docling_core-2.41.0.dist-info/METADATA,sha256=CqsXanxB2dd22G__-Ws0XdLDzkf9uwMGmx98V2h9f3k,6453
79
+ docling_core-2.41.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ docling_core-2.41.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
81
+ docling_core-2.41.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
82
+ docling_core-2.41.0.dist-info/RECORD,,