docling-core 2.25.0__py3-none-any.whl → 2.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/experimental/serializer/base.py +23 -2
- docling_core/experimental/serializer/common.py +79 -34
- docling_core/experimental/serializer/doctags.py +83 -47
- docling_core/experimental/serializer/html.py +931 -0
- docling_core/experimental/serializer/html_styles.py +212 -0
- docling_core/experimental/serializer/markdown.py +95 -57
- docling_core/transforms/chunker/base.py +8 -2
- docling_core/transforms/chunker/hierarchical_chunker.py +130 -109
- docling_core/transforms/chunker/hybrid_chunker.py +54 -12
- docling_core/types/doc/document.py +702 -482
- docling_core/types/doc/labels.py +2 -0
- docling_core/types/doc/page.py +12 -17
- docling_core/types/doc/tokens.py +3 -0
- {docling_core-2.25.0.dist-info → docling_core-2.26.0.dist-info}/METADATA +1 -1
- {docling_core-2.25.0.dist-info → docling_core-2.26.0.dist-info}/RECORD +18 -16
- {docling_core-2.25.0.dist-info → docling_core-2.26.0.dist-info}/LICENSE +0 -0
- {docling_core-2.25.0.dist-info → docling_core-2.26.0.dist-info}/WHEEL +0 -0
- {docling_core-2.25.0.dist-info → docling_core-2.26.0.dist-info}/entry_points.txt +0 -0
docling_core/types/doc/labels.py
CHANGED
|
@@ -8,6 +8,7 @@ class DocItemLabel(str, Enum):
|
|
|
8
8
|
"""DocItemLabel."""
|
|
9
9
|
|
|
10
10
|
CAPTION = "caption"
|
|
11
|
+
CHART = "chart"
|
|
11
12
|
FOOTNOTE = "footnote"
|
|
12
13
|
FORMULA = "formula"
|
|
13
14
|
LIST_ITEM = "list_item"
|
|
@@ -94,6 +95,7 @@ class PictureClassificationLabel(str, Enum):
|
|
|
94
95
|
# General
|
|
95
96
|
PIE_CHART = "pie_chart"
|
|
96
97
|
BAR_CHART = "bar_chart"
|
|
98
|
+
STACKED_BAR_CHART = "stacked_bar_chart"
|
|
97
99
|
LINE_CHART = "line_chart"
|
|
98
100
|
FLOW_CHART = "flow_chart"
|
|
99
101
|
SCATTER_CHART = "scatter_chart"
|
docling_core/types/doc/page.py
CHANGED
|
@@ -116,33 +116,28 @@ class BoundingRectangle(BaseModel):
|
|
|
116
116
|
|
|
117
117
|
@property
|
|
118
118
|
def angle(self) -> float:
|
|
119
|
-
"""Calculate the angle of the rectangle in radians."""
|
|
119
|
+
"""Calculate the angle of the rectangle in radians (0-2pi range)."""
|
|
120
120
|
p_0 = ((self.r_x0 + self.r_x3) / 2.0, (self.r_y0 + self.r_y3) / 2.0)
|
|
121
121
|
p_1 = ((self.r_x1 + self.r_x2) / 2.0, (self.r_y1 + self.r_y2) / 2.0)
|
|
122
122
|
|
|
123
123
|
delta_x, delta_y = p_1[0] - p_0[0], p_1[1] - p_0[1]
|
|
124
124
|
|
|
125
|
-
if abs(
|
|
126
|
-
|
|
127
|
-
elif
|
|
128
|
-
|
|
125
|
+
if abs(delta_y) < 1.0e-3:
|
|
126
|
+
angle = 0.0
|
|
127
|
+
elif abs(delta_x) < 1.0e-3:
|
|
128
|
+
angle = np.pi / 2.0 if delta_y > 0 else -np.pi / 2.0
|
|
129
129
|
else:
|
|
130
|
-
|
|
130
|
+
angle = math.atan(delta_y / delta_x)
|
|
131
|
+
if delta_x < 0:
|
|
132
|
+
angle += np.pi
|
|
133
|
+
if angle < 0:
|
|
134
|
+
angle += 2 * np.pi
|
|
135
|
+
return angle
|
|
131
136
|
|
|
132
137
|
@property
|
|
133
138
|
def angle_360(self) -> int:
|
|
134
139
|
"""Calculate the angle of the rectangle in degrees (0-360 range)."""
|
|
135
|
-
|
|
136
|
-
p_1 = ((self.r_x1 + self.r_x2) / 2.0, (self.r_y1 + self.r_y2) / 2.0)
|
|
137
|
-
|
|
138
|
-
delta_x, delta_y = p_1[0] - p_0[0], p_1[1] - p_0[1]
|
|
139
|
-
|
|
140
|
-
if abs(delta_y) < 1.0e-2:
|
|
141
|
-
return 0
|
|
142
|
-
elif abs(delta_x) < 1.0e-2:
|
|
143
|
-
return 90
|
|
144
|
-
else:
|
|
145
|
-
return round(-math.atan(delta_y / delta_x) / np.pi * 180)
|
|
140
|
+
return round(self.angle / np.pi * 180)
|
|
146
141
|
|
|
147
142
|
@property
|
|
148
143
|
def centre(self):
|
docling_core/types/doc/tokens.py
CHANGED
|
@@ -57,6 +57,7 @@ class _PictureClassificationToken(str, Enum):
|
|
|
57
57
|
# General
|
|
58
58
|
PIE_CHART = "<pie_chart>"
|
|
59
59
|
BAR_CHART = "<bar_chart>"
|
|
60
|
+
STACKED_BAR_CHART = "<stacked_bar_chart>"
|
|
60
61
|
LINE_CHART = "<line_chart>"
|
|
61
62
|
FLOW_CHART = "<flow_chart>"
|
|
62
63
|
SCATTER_CHART = "<scatter_chart>"
|
|
@@ -154,6 +155,7 @@ class DocumentToken(str, Enum):
|
|
|
154
155
|
|
|
155
156
|
DOCUMENT = "doctag"
|
|
156
157
|
OTSL = "otsl"
|
|
158
|
+
CHART = "chart"
|
|
157
159
|
ORDERED_LIST = "ordered_list"
|
|
158
160
|
UNORDERED_LIST = "unordered_list"
|
|
159
161
|
PAGE_BREAK = "page_break"
|
|
@@ -230,6 +232,7 @@ class DocumentToken(str, Enum):
|
|
|
230
232
|
DocItemLabel.KEY_VALUE_REGION: DocumentToken.KEY_VALUE_REGION,
|
|
231
233
|
DocItemLabel.PARAGRAPH: DocumentToken.PARAGRAPH,
|
|
232
234
|
DocItemLabel.REFERENCE: DocumentToken.REFERENCE,
|
|
235
|
+
DocItemLabel.CHART: DocumentToken.CHART,
|
|
233
236
|
}
|
|
234
237
|
|
|
235
238
|
res: str
|
|
@@ -3,10 +3,12 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
|
|
|
3
3
|
docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
|
|
4
4
|
docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
|
|
5
5
|
docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
|
|
6
|
-
docling_core/experimental/serializer/base.py,sha256=
|
|
7
|
-
docling_core/experimental/serializer/common.py,sha256=
|
|
8
|
-
docling_core/experimental/serializer/doctags.py,sha256=
|
|
9
|
-
docling_core/experimental/serializer/
|
|
6
|
+
docling_core/experimental/serializer/base.py,sha256=WwTBNGlaSfy2KBwHFmZ9c5o4Hj27bGmjnnNWD0TEXqo,5784
|
|
7
|
+
docling_core/experimental/serializer/common.py,sha256=KoiPCzfSBrHSrXwAOAOL-ISPBRoJinzGS2ojK5JVH8c,15266
|
|
8
|
+
docling_core/experimental/serializer/doctags.py,sha256=r_b9dUsfQqucK8TtmE_e5n5NAMcCGfBsEfjpnP-3evM,18028
|
|
9
|
+
docling_core/experimental/serializer/html.py,sha256=7PQHa1T1fctQocvwrOosjS3K44Tjh7FpcCBWZoZa3r8,31260
|
|
10
|
+
docling_core/experimental/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx90OmIKieO6TwPw57IuxcA,4692
|
|
11
|
+
docling_core/experimental/serializer/markdown.py,sha256=wcF75hZFBQxIJKrd_1-DRuVN3g1ofQGHTrb11pUQdUw,17680
|
|
10
12
|
docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
13
|
docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
|
|
12
14
|
docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
|
|
@@ -23,17 +25,17 @@ docling_core/search/meta.py,sha256=wSurrsqdP1N3gQKx027fVdzVmc33a7Y6rPl-FClQvtA,3
|
|
|
23
25
|
docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75A,1841
|
|
24
26
|
docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
|
|
25
27
|
docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
|
|
26
|
-
docling_core/transforms/chunker/base.py,sha256=
|
|
27
|
-
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=
|
|
28
|
-
docling_core/transforms/chunker/hybrid_chunker.py,sha256=
|
|
28
|
+
docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
|
|
29
|
+
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=iYzA65INFo89klc94jixuzQP8ivywe-3aVYznt2Csv8,8287
|
|
30
|
+
docling_core/transforms/chunker/hybrid_chunker.py,sha256=JPKKgfAdHqkYp4qyZWZyjJ3fYFq9lgD-mTaVVnm5T0Y,10936
|
|
29
31
|
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
30
32
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
31
33
|
docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
|
|
32
34
|
docling_core/types/doc/base.py,sha256=sM3IyFXzVh2WT8IGh5nejXYh8sf39yBh8TBSlHeJ9CI,12611
|
|
33
|
-
docling_core/types/doc/document.py,sha256=
|
|
34
|
-
docling_core/types/doc/labels.py,sha256=
|
|
35
|
-
docling_core/types/doc/page.py,sha256=
|
|
36
|
-
docling_core/types/doc/tokens.py,sha256=
|
|
35
|
+
docling_core/types/doc/document.py,sha256=gK9-qiMi74p0jPeAHW6YGKl-O0ZSYE-p36MQAco3lx4,139341
|
|
36
|
+
docling_core/types/doc/labels.py,sha256=Kmrrdmd6ejXomeXDlxjpmBEpPxMCYOc_3I2GSaoqqi4,5748
|
|
37
|
+
docling_core/types/doc/page.py,sha256=QI1D5p63AxboT6PnHa7UlbPmH2i2_E3qIk_Gk2fdrxs,40270
|
|
38
|
+
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
37
39
|
docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
|
|
38
40
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
39
41
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
@@ -63,8 +65,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
63
65
|
docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
|
|
64
66
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
65
67
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
66
|
-
docling_core-2.
|
|
67
|
-
docling_core-2.
|
|
68
|
-
docling_core-2.
|
|
69
|
-
docling_core-2.
|
|
70
|
-
docling_core-2.
|
|
68
|
+
docling_core-2.26.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
69
|
+
docling_core-2.26.0.dist-info/METADATA,sha256=_GeilTNKmhnT5woL2myAwHd6KjP29rlBb9C8Ed8_WJ8,5843
|
|
70
|
+
docling_core-2.26.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
71
|
+
docling_core-2.26.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
|
|
72
|
+
docling_core-2.26.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|