docling-core 2.23.3__py3-none-any.whl → 2.24.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/experimental/serializer/base.py +2 -2
- docling_core/experimental/serializer/common.py +250 -196
- docling_core/experimental/serializer/doctags.py +492 -0
- docling_core/experimental/serializer/markdown.py +70 -41
- docling_core/types/doc/document.py +412 -418
- docling_core/types/doc/page.py +28 -9
- docling_core/types/doc/tokens.py +192 -26
- {docling_core-2.23.3.dist-info → docling_core-2.24.1.dist-info}/METADATA +1 -1
- {docling_core-2.23.3.dist-info → docling_core-2.24.1.dist-info}/RECORD +12 -11
- {docling_core-2.23.3.dist-info → docling_core-2.24.1.dist-info}/LICENSE +0 -0
- {docling_core-2.23.3.dist-info → docling_core-2.24.1.dist-info}/WHEEL +0 -0
- {docling_core-2.23.3.dist-info → docling_core-2.24.1.dist-info}/entry_points.txt +0 -0
docling_core/types/doc/page.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Datastructures for PaginatedDocument."""
|
|
2
2
|
|
|
3
|
+
import copy
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import math
|
|
@@ -530,10 +531,16 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
530
531
|
"""
|
|
531
532
|
cells = []
|
|
532
533
|
for page_cell in self.iterate_cells(cell_unit):
|
|
533
|
-
|
|
534
|
+
pc = copy.deepcopy(page_cell)
|
|
535
|
+
# Bring cell_bbox coord origin to the same as input bbox.coord_origin:
|
|
536
|
+
if page_cell.rect.coord_origin != bbox.coord_origin:
|
|
537
|
+
if bbox.coord_origin == CoordOrigin.TOPLEFT:
|
|
538
|
+
pc.rect = pc.rect.to_top_left_origin(self.dimension.height)
|
|
539
|
+
elif bbox.coord_origin == CoordOrigin.BOTTOMLEFT:
|
|
540
|
+
pc.rect = pc.rect.to_bottom_left_origin(self.dimension.height)
|
|
541
|
+
cell_bbox = pc.to_bounding_box()
|
|
534
542
|
if cell_bbox.intersection_over_self(bbox) > ios:
|
|
535
|
-
cells.append(
|
|
536
|
-
|
|
543
|
+
cells.append(pc)
|
|
537
544
|
return cells
|
|
538
545
|
|
|
539
546
|
def export_to_dict(self) -> Dict:
|
|
@@ -546,7 +553,7 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
546
553
|
|
|
547
554
|
def save_as_json(
|
|
548
555
|
self,
|
|
549
|
-
filename: Path,
|
|
556
|
+
filename: Union[str, Path],
|
|
550
557
|
indent: int = 2,
|
|
551
558
|
):
|
|
552
559
|
"""Save the page data as a JSON file.
|
|
@@ -555,12 +562,14 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
555
562
|
filename: Path to save the JSON file
|
|
556
563
|
indent: Indentation level for JSON formatting
|
|
557
564
|
"""
|
|
565
|
+
if isinstance(filename, str):
|
|
566
|
+
filename = Path(filename)
|
|
558
567
|
out = self.export_to_dict()
|
|
559
568
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
560
569
|
json.dump(out, fw, indent=indent)
|
|
561
570
|
|
|
562
571
|
@classmethod
|
|
563
|
-
def load_from_json(cls, filename: Path) -> "SegmentedPdfPage":
|
|
572
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "SegmentedPdfPage":
|
|
564
573
|
"""Load page data from a JSON file.
|
|
565
574
|
|
|
566
575
|
Args:
|
|
@@ -569,6 +578,8 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
569
578
|
Returns:
|
|
570
579
|
Instantiated SegmentedPdfPage object
|
|
571
580
|
"""
|
|
581
|
+
if isinstance(filename, str):
|
|
582
|
+
filename = Path(filename)
|
|
572
583
|
with open(filename, "r", encoding="utf-8") as f:
|
|
573
584
|
return cls.model_validate_json(f.read())
|
|
574
585
|
|
|
@@ -1155,19 +1166,21 @@ class PdfTableOfContents(BaseModel):
|
|
|
1155
1166
|
"""
|
|
1156
1167
|
return self.model_dump(mode=mode, by_alias=True, exclude_none=True)
|
|
1157
1168
|
|
|
1158
|
-
def save_as_json(self, filename: Path, indent: int = 2):
|
|
1169
|
+
def save_as_json(self, filename: Union[str, Path], indent: int = 2):
|
|
1159
1170
|
"""Save the table of contents as a JSON file.
|
|
1160
1171
|
|
|
1161
1172
|
Args:
|
|
1162
1173
|
filename: Path to save the JSON file
|
|
1163
1174
|
indent: Indentation level for JSON formatting
|
|
1164
1175
|
"""
|
|
1176
|
+
if isinstance(filename, str):
|
|
1177
|
+
filename = Path(filename)
|
|
1165
1178
|
out = self.export_to_dict()
|
|
1166
1179
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
1167
1180
|
json.dump(out, fw, indent=indent)
|
|
1168
1181
|
|
|
1169
1182
|
@classmethod
|
|
1170
|
-
def load_from_json(cls, filename: Path) -> "PdfTableOfContents":
|
|
1183
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "PdfTableOfContents":
|
|
1171
1184
|
"""Load table of contents from a JSON file.
|
|
1172
1185
|
|
|
1173
1186
|
Args:
|
|
@@ -1176,6 +1189,8 @@ class PdfTableOfContents(BaseModel):
|
|
|
1176
1189
|
Returns:
|
|
1177
1190
|
Instantiated PdfTableOfContents object
|
|
1178
1191
|
"""
|
|
1192
|
+
if isinstance(filename, str):
|
|
1193
|
+
filename = Path(filename)
|
|
1179
1194
|
with open(filename, "r", encoding="utf-8") as f:
|
|
1180
1195
|
return cls.model_validate_json(f.read())
|
|
1181
1196
|
|
|
@@ -1213,19 +1228,21 @@ class ParsedPdfDocument(BaseModel):
|
|
|
1213
1228
|
"""
|
|
1214
1229
|
return self.model_dump(mode=mode, by_alias=True, exclude_none=True)
|
|
1215
1230
|
|
|
1216
|
-
def save_as_json(self, filename: Path, indent: int = 2):
|
|
1231
|
+
def save_as_json(self, filename: Union[str, Path], indent: int = 2):
|
|
1217
1232
|
"""Save the document as a JSON file.
|
|
1218
1233
|
|
|
1219
1234
|
Args:
|
|
1220
1235
|
filename: Path to save the JSON file
|
|
1221
1236
|
indent: Indentation level for JSON formatting
|
|
1222
1237
|
"""
|
|
1238
|
+
if isinstance(filename, str):
|
|
1239
|
+
filename = Path(filename)
|
|
1223
1240
|
out = self.export_to_dict()
|
|
1224
1241
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
1225
1242
|
json.dump(out, fw, indent=indent)
|
|
1226
1243
|
|
|
1227
1244
|
@classmethod
|
|
1228
|
-
def load_from_json(cls, filename: Path) -> "ParsedPdfDocument":
|
|
1245
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "ParsedPdfDocument":
|
|
1229
1246
|
"""Load document from a JSON file.
|
|
1230
1247
|
|
|
1231
1248
|
Args:
|
|
@@ -1234,5 +1251,7 @@ class ParsedPdfDocument(BaseModel):
|
|
|
1234
1251
|
Returns:
|
|
1235
1252
|
Instantiated ParsedPdfDocument object
|
|
1236
1253
|
"""
|
|
1254
|
+
if isinstance(filename, str):
|
|
1255
|
+
filename = Path(filename)
|
|
1237
1256
|
with open(filename, "r", encoding="utf-8") as f:
|
|
1238
1257
|
return cls.model_validate_json(f.read())
|
docling_core/types/doc/tokens.py
CHANGED
|
@@ -8,10 +8,10 @@
|
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from typing import Tuple
|
|
10
10
|
|
|
11
|
-
from docling_core.types.doc.labels import
|
|
11
|
+
from docling_core.types.doc.labels import DocItemLabel
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class TableToken(Enum):
|
|
14
|
+
class TableToken(str, Enum):
|
|
15
15
|
"""Class to represent an LLM friendly representation of a Table."""
|
|
16
16
|
|
|
17
17
|
CELL_LABEL_COLUMN_HEADER = "<column_header>"
|
|
@@ -41,41 +41,207 @@ class TableToken(Enum):
|
|
|
41
41
|
return label in TableToken.get_special_tokens()
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
_LOC_PREFIX = "loc_"
|
|
45
|
+
_SECTION_HEADER_PREFIX = "section_header_level_"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _PictureClassificationToken(str, Enum):
|
|
49
|
+
"""PictureClassificationToken."""
|
|
50
|
+
|
|
51
|
+
OTHER = "<other>"
|
|
52
|
+
|
|
53
|
+
# If more than one picture is grouped together, it
|
|
54
|
+
# is generally not possible to assign a label
|
|
55
|
+
PICTURE_GROUP = "<picture_group>"
|
|
56
|
+
|
|
57
|
+
# General
|
|
58
|
+
PIE_CHART = "<pie_chart>"
|
|
59
|
+
BAR_CHART = "<bar_chart>"
|
|
60
|
+
LINE_CHART = "<line_chart>"
|
|
61
|
+
FLOW_CHART = "<flow_chart>"
|
|
62
|
+
SCATTER_CHART = "<scatter_chart>"
|
|
63
|
+
HEATMAP = "<heatmap>"
|
|
64
|
+
REMOTE_SENSING = "<remote_sensing>"
|
|
65
|
+
|
|
66
|
+
NATURAL_IMAGE = "<natural_image>"
|
|
67
|
+
|
|
68
|
+
# Chemistry
|
|
69
|
+
MOLECULAR_STRUCTURE = "<chemistry_molecular_structure>"
|
|
70
|
+
MARKUSH_STRUCTURE = "<chemistry_markush_structure>"
|
|
71
|
+
|
|
72
|
+
# Company
|
|
73
|
+
ICON = "<icon>"
|
|
74
|
+
LOGO = "<logo>"
|
|
75
|
+
SIGNATURE = "<signature>"
|
|
76
|
+
STAMP = "<stamp>"
|
|
77
|
+
QR_CODE = "<qr_code>"
|
|
78
|
+
BAR_CODE = "<bar_code>"
|
|
79
|
+
SCREENSHOT = "<screenshot>"
|
|
80
|
+
|
|
81
|
+
# Geology/Geography
|
|
82
|
+
GEOGRAPHIC_MAP = "<map>"
|
|
83
|
+
STRATIGRAPHIC_CHART = "<stratigraphic_chart>"
|
|
84
|
+
|
|
85
|
+
# Engineering
|
|
86
|
+
CAD_DRAWING = "<cad_drawing>"
|
|
87
|
+
ELECTRICAL_DIAGRAM = "<electrical_diagram>"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _CodeLanguageToken(str, Enum):
|
|
91
|
+
"""CodeLanguageToken."""
|
|
92
|
+
|
|
93
|
+
ADA = "<_Ada_>"
|
|
94
|
+
AWK = "<_Awk_>"
|
|
95
|
+
BASH = "<_Bash_>"
|
|
96
|
+
BC = "<_bc_>"
|
|
97
|
+
C = "<_C_>"
|
|
98
|
+
C_SHARP = "<_C#_>"
|
|
99
|
+
C_PLUS_PLUS = "<_C++_>"
|
|
100
|
+
CMAKE = "<_CMake_>"
|
|
101
|
+
COBOL = "<_COBOL_>"
|
|
102
|
+
CSS = "<_CSS_>"
|
|
103
|
+
CEYLON = "<_Ceylon_>"
|
|
104
|
+
CLOJURE = "<_Clojure_>"
|
|
105
|
+
CRYSTAL = "<_Crystal_>"
|
|
106
|
+
CUDA = "<_Cuda_>"
|
|
107
|
+
CYTHON = "<_Cython_>"
|
|
108
|
+
D = "<_D_>"
|
|
109
|
+
DART = "<_Dart_>"
|
|
110
|
+
DC = "<_dc_>"
|
|
111
|
+
DOCKERFILE = "<_Dockerfile_>"
|
|
112
|
+
ELIXIR = "<_Elixir_>"
|
|
113
|
+
ERLANG = "<_Erlang_>"
|
|
114
|
+
FORTRAN = "<_FORTRAN_>"
|
|
115
|
+
FORTH = "<_Forth_>"
|
|
116
|
+
GO = "<_Go_>"
|
|
117
|
+
HTML = "<_HTML_>"
|
|
118
|
+
HASKELL = "<_Haskell_>"
|
|
119
|
+
HAXE = "<_Haxe_>"
|
|
120
|
+
JAVA = "<_Java_>"
|
|
121
|
+
JAVASCRIPT = "<_JavaScript_>"
|
|
122
|
+
JULIA = "<_Julia_>"
|
|
123
|
+
KOTLIN = "<_Kotlin_>"
|
|
124
|
+
LISP = "<_Lisp_>"
|
|
125
|
+
LUA = "<_Lua_>"
|
|
126
|
+
MATLAB = "<_Matlab_>"
|
|
127
|
+
MOONSCRIPT = "<_MoonScript_>"
|
|
128
|
+
NIM = "<_Nim_>"
|
|
129
|
+
OCAML = "<_OCaml_>"
|
|
130
|
+
OBJECTIVEC = "<_ObjectiveC_>"
|
|
131
|
+
OCTAVE = "<_Octave_>"
|
|
132
|
+
PHP = "<_PHP_>"
|
|
133
|
+
PASCAL = "<_Pascal_>"
|
|
134
|
+
PERL = "<_Perl_>"
|
|
135
|
+
PROLOG = "<_Prolog_>"
|
|
136
|
+
PYTHON = "<_Python_>"
|
|
137
|
+
RACKET = "<_Racket_>"
|
|
138
|
+
RUBY = "<_Ruby_>"
|
|
139
|
+
RUST = "<_Rust_>"
|
|
140
|
+
SML = "<_SML_>"
|
|
141
|
+
SQL = "<_SQL_>"
|
|
142
|
+
SCALA = "<_Scala_>"
|
|
143
|
+
SCHEME = "<_Scheme_>"
|
|
144
|
+
SWIFT = "<_Swift_>"
|
|
145
|
+
TYPESCRIPT = "<_TypeScript_>"
|
|
146
|
+
UNKNOWN = "<_unknown_>"
|
|
147
|
+
VISUALBASIC = "<_VisualBasic_>"
|
|
148
|
+
XML = "<_XML_>"
|
|
149
|
+
YAML = "<_YAML_>"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class DocumentToken(str, Enum):
|
|
45
153
|
"""Class to represent an LLM friendly representation of a Document."""
|
|
46
154
|
|
|
47
155
|
DOCUMENT = "doctag"
|
|
48
156
|
OTSL = "otsl"
|
|
49
157
|
ORDERED_LIST = "ordered_list"
|
|
50
158
|
UNORDERED_LIST = "unordered_list"
|
|
51
|
-
LOC = "loc_"
|
|
52
159
|
PAGE_BREAK = "page_break"
|
|
53
160
|
SMILES = "smiles"
|
|
161
|
+
INLINE = "inline"
|
|
162
|
+
|
|
163
|
+
CAPTION = "caption"
|
|
164
|
+
FOOTNOTE = "footnote"
|
|
165
|
+
FORMULA = "formula"
|
|
166
|
+
LIST_ITEM = "list_item"
|
|
167
|
+
PAGE_FOOTER = "page_footer"
|
|
168
|
+
PAGE_HEADER = "page_header"
|
|
169
|
+
PICTURE = "picture"
|
|
170
|
+
TABLE = "table"
|
|
171
|
+
TEXT = "text"
|
|
172
|
+
TITLE = "title"
|
|
173
|
+
DOCUMENT_INDEX = "document_index"
|
|
174
|
+
CODE = "code"
|
|
175
|
+
CHECKBOX_SELECTED = "checkbox_selected"
|
|
176
|
+
CHECKBOX_UNSELECTED = "checkbox_unselected"
|
|
177
|
+
FORM = "form"
|
|
178
|
+
KEY_VALUE_REGION = "key_value_region"
|
|
179
|
+
|
|
180
|
+
PARAGRAPH = "paragraph"
|
|
181
|
+
REFERENCE = "reference"
|
|
54
182
|
|
|
55
183
|
@classmethod
|
|
56
184
|
def get_special_tokens(
|
|
57
185
|
cls,
|
|
58
|
-
page_dimension: Tuple[int, int] = (
|
|
186
|
+
page_dimension: Tuple[int, int] = (500, 500),
|
|
59
187
|
):
|
|
60
188
|
"""Function to get all special document tokens."""
|
|
61
|
-
special_tokens = [
|
|
189
|
+
special_tokens: list[str] = []
|
|
190
|
+
for token in cls:
|
|
191
|
+
special_tokens.append(f"<{token.value}>")
|
|
192
|
+
special_tokens.append(f"</{token.value}>")
|
|
62
193
|
|
|
63
194
|
for i in range(6):
|
|
64
195
|
special_tokens += [
|
|
65
|
-
f"<
|
|
66
|
-
f"</
|
|
196
|
+
f"<{_SECTION_HEADER_PREFIX}{i}>",
|
|
197
|
+
f"</{_SECTION_HEADER_PREFIX}{i}>",
|
|
67
198
|
]
|
|
68
199
|
|
|
69
|
-
|
|
70
|
-
for
|
|
71
|
-
|
|
200
|
+
special_tokens.extend([t.value for t in _PictureClassificationToken])
|
|
201
|
+
special_tokens.extend([t.value for t in _CodeLanguageToken])
|
|
202
|
+
|
|
203
|
+
special_tokens.extend(TableToken.get_special_tokens())
|
|
72
204
|
|
|
73
205
|
# Adding dynamically generated location-tokens
|
|
74
|
-
for i in range(0, max(page_dimension[0]
|
|
75
|
-
special_tokens.append(f"<
|
|
206
|
+
for i in range(0, max(page_dimension[0], page_dimension[1])):
|
|
207
|
+
special_tokens.append(f"<{_LOC_PREFIX}{i}>")
|
|
76
208
|
|
|
77
209
|
return special_tokens
|
|
78
210
|
|
|
211
|
+
@classmethod
|
|
212
|
+
def create_token_name_from_doc_item_label(cls, label: str, level: int = 1) -> str:
|
|
213
|
+
"""Get token corresponding to passed doc item label."""
|
|
214
|
+
doc_token_by_item_label = {
|
|
215
|
+
DocItemLabel.CAPTION: DocumentToken.CAPTION,
|
|
216
|
+
DocItemLabel.FOOTNOTE: DocumentToken.FOOTNOTE,
|
|
217
|
+
DocItemLabel.FORMULA: DocumentToken.FORMULA,
|
|
218
|
+
DocItemLabel.LIST_ITEM: DocumentToken.LIST_ITEM,
|
|
219
|
+
DocItemLabel.PAGE_FOOTER: DocumentToken.PAGE_FOOTER,
|
|
220
|
+
DocItemLabel.PAGE_HEADER: DocumentToken.PAGE_HEADER,
|
|
221
|
+
DocItemLabel.PICTURE: DocumentToken.PICTURE,
|
|
222
|
+
DocItemLabel.TABLE: DocumentToken.TABLE,
|
|
223
|
+
DocItemLabel.TEXT: DocumentToken.TEXT,
|
|
224
|
+
DocItemLabel.TITLE: DocumentToken.TITLE,
|
|
225
|
+
DocItemLabel.DOCUMENT_INDEX: DocumentToken.DOCUMENT_INDEX,
|
|
226
|
+
DocItemLabel.CODE: DocumentToken.CODE,
|
|
227
|
+
DocItemLabel.CHECKBOX_SELECTED: DocumentToken.CHECKBOX_SELECTED,
|
|
228
|
+
DocItemLabel.CHECKBOX_UNSELECTED: DocumentToken.CHECKBOX_UNSELECTED,
|
|
229
|
+
DocItemLabel.FORM: DocumentToken.FORM,
|
|
230
|
+
DocItemLabel.KEY_VALUE_REGION: DocumentToken.KEY_VALUE_REGION,
|
|
231
|
+
DocItemLabel.PARAGRAPH: DocumentToken.PARAGRAPH,
|
|
232
|
+
DocItemLabel.REFERENCE: DocumentToken.REFERENCE,
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
res: str
|
|
236
|
+
if label == DocItemLabel.SECTION_HEADER:
|
|
237
|
+
res = f"{_SECTION_HEADER_PREFIX}{level}"
|
|
238
|
+
else:
|
|
239
|
+
try:
|
|
240
|
+
res = doc_token_by_item_label[DocItemLabel(label)].value
|
|
241
|
+
except KeyError as e:
|
|
242
|
+
raise RuntimeError(f"Unexpected DocItemLabel: {label}") from e
|
|
243
|
+
return res
|
|
244
|
+
|
|
79
245
|
@staticmethod
|
|
80
246
|
def is_known_token(label):
|
|
81
247
|
"""Function to check if label is in tokens."""
|
|
@@ -83,29 +249,29 @@ class DocumentToken(Enum):
|
|
|
83
249
|
|
|
84
250
|
@staticmethod
|
|
85
251
|
def get_picture_classification_token(classification: str) -> str:
|
|
86
|
-
"""Function to get picture classification
|
|
87
|
-
return f"<{classification}>"
|
|
252
|
+
"""Function to get the token for a given picture classification value."""
|
|
253
|
+
return _PictureClassificationToken(f"<{classification}>").value
|
|
254
|
+
|
|
255
|
+
@staticmethod
|
|
256
|
+
def get_code_language_token(code_language: str) -> str:
|
|
257
|
+
"""Function to get the token for a given code language."""
|
|
258
|
+
return _CodeLanguageToken(f"<_{code_language}_>").value
|
|
88
259
|
|
|
89
260
|
@staticmethod
|
|
90
|
-
def get_location_token(val: float, rnorm: int =
|
|
261
|
+
def get_location_token(val: float, rnorm: int = 500): # TODO review
|
|
91
262
|
"""Function to get location tokens."""
|
|
92
263
|
val_ = round(rnorm * val)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if val_ > rnorm:
|
|
98
|
-
return f"<loc_{rnorm}>"
|
|
99
|
-
|
|
100
|
-
return f"<loc_{val_}>"
|
|
264
|
+
val_ = max(val_, 0)
|
|
265
|
+
val_ = min(val_, rnorm - 1)
|
|
266
|
+
return f"<{_LOC_PREFIX}{val_}>"
|
|
101
267
|
|
|
102
268
|
@staticmethod
|
|
103
269
|
def get_location(
|
|
104
270
|
bbox: tuple[float, float, float, float],
|
|
105
271
|
page_w: float,
|
|
106
272
|
page_h: float,
|
|
107
|
-
xsize: int =
|
|
108
|
-
ysize: int =
|
|
273
|
+
xsize: int = 500, # TODO review
|
|
274
|
+
ysize: int = 500, # TODO review
|
|
109
275
|
):
|
|
110
276
|
"""Get the location string give bbox and page-dim."""
|
|
111
277
|
assert bbox[0] <= bbox[2], f"bbox[0]<=bbox[2] => {bbox[0]}<={bbox[2]}"
|
|
@@ -3,9 +3,10 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
|
|
|
3
3
|
docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
|
|
4
4
|
docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
|
|
5
5
|
docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
|
|
6
|
-
docling_core/experimental/serializer/base.py,sha256=
|
|
7
|
-
docling_core/experimental/serializer/common.py,sha256=
|
|
8
|
-
docling_core/experimental/serializer/
|
|
6
|
+
docling_core/experimental/serializer/base.py,sha256=avNYy8Lgv45Gm0jfO1OV4wSRsv-O9Eeow2PkUAPY1pA,5152
|
|
7
|
+
docling_core/experimental/serializer/common.py,sha256=g_o-wSQONXIZM7YJF_ghlwc3W3_VkePpM6pDS4ZjrhI,13701
|
|
8
|
+
docling_core/experimental/serializer/doctags.py,sha256=bNUd5vOj1JnvIYFfSc_TSzQKQ7eQ34TY7NAUNK3C604,15953
|
|
9
|
+
docling_core/experimental/serializer/markdown.py,sha256=oEzuPXiooJPVL7yTbXPPFhWF8Phstmzm3mev3yqcqbo,15950
|
|
9
10
|
docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
11
|
docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
|
|
11
12
|
docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
|
|
@@ -29,10 +30,10 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
29
30
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
30
31
|
docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
|
|
31
32
|
docling_core/types/doc/base.py,sha256=22U1qDlD-2ICmgzbdZrjNayoPHnq4S1ks1GRoqB7y1Q,12542
|
|
32
|
-
docling_core/types/doc/document.py,sha256=
|
|
33
|
+
docling_core/types/doc/document.py,sha256=_FJtmp0yh6F_3AVLVN4Xpo7E1hz50gvS_-HrJmp8FOA,128806
|
|
33
34
|
docling_core/types/doc/labels.py,sha256=0J9Gsqz-jQ4FP2yxs9wOxoTr3qg97BniFX7MJVziUmk,5684
|
|
34
|
-
docling_core/types/doc/page.py,sha256=
|
|
35
|
-
docling_core/types/doc/tokens.py,sha256=
|
|
35
|
+
docling_core/types/doc/page.py,sha256=s5DxxoS-6RS0gv3C3ZHWqo2RND2j_iksGJStdby6dBw,40466
|
|
36
|
+
docling_core/types/doc/tokens.py,sha256=fpPtVHfO5RXk8mkqZ7YrW5LyHipg697kbFBNqn6jXQU,9159
|
|
36
37
|
docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
|
|
37
38
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
38
39
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
@@ -62,8 +63,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
62
63
|
docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
|
|
63
64
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
64
65
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
65
|
-
docling_core-2.
|
|
66
|
-
docling_core-2.
|
|
67
|
-
docling_core-2.
|
|
68
|
-
docling_core-2.
|
|
69
|
-
docling_core-2.
|
|
66
|
+
docling_core-2.24.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
67
|
+
docling_core-2.24.1.dist-info/METADATA,sha256=GYkFcQg28UpfzdBadMKZ6AL6V9ezUVTlL50B__Mje5g,5843
|
|
68
|
+
docling_core-2.24.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
69
|
+
docling_core-2.24.1.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
|
|
70
|
+
docling_core-2.24.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|