docling-core 2.23.2__py3-none-any.whl → 2.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/experimental/serializer/base.py +2 -2
- docling_core/experimental/serializer/common.py +250 -196
- docling_core/experimental/serializer/doctags.py +492 -0
- docling_core/experimental/serializer/markdown.py +80 -43
- docling_core/types/doc/document.py +412 -418
- docling_core/types/doc/page.py +18 -6
- docling_core/types/doc/tokens.py +192 -26
- {docling_core-2.23.2.dist-info → docling_core-2.24.0.dist-info}/METADATA +1 -1
- {docling_core-2.23.2.dist-info → docling_core-2.24.0.dist-info}/RECORD +12 -11
- {docling_core-2.23.2.dist-info → docling_core-2.24.0.dist-info}/LICENSE +0 -0
- {docling_core-2.23.2.dist-info → docling_core-2.24.0.dist-info}/WHEEL +0 -0
- {docling_core-2.23.2.dist-info → docling_core-2.24.0.dist-info}/entry_points.txt +0 -0
docling_core/types/doc/page.py
CHANGED
|
@@ -546,7 +546,7 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
546
546
|
|
|
547
547
|
def save_as_json(
|
|
548
548
|
self,
|
|
549
|
-
filename: Path,
|
|
549
|
+
filename: Union[str, Path],
|
|
550
550
|
indent: int = 2,
|
|
551
551
|
):
|
|
552
552
|
"""Save the page data as a JSON file.
|
|
@@ -555,12 +555,14 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
555
555
|
filename: Path to save the JSON file
|
|
556
556
|
indent: Indentation level for JSON formatting
|
|
557
557
|
"""
|
|
558
|
+
if isinstance(filename, str):
|
|
559
|
+
filename = Path(filename)
|
|
558
560
|
out = self.export_to_dict()
|
|
559
561
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
560
562
|
json.dump(out, fw, indent=indent)
|
|
561
563
|
|
|
562
564
|
@classmethod
|
|
563
|
-
def load_from_json(cls, filename: Path) -> "SegmentedPdfPage":
|
|
565
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "SegmentedPdfPage":
|
|
564
566
|
"""Load page data from a JSON file.
|
|
565
567
|
|
|
566
568
|
Args:
|
|
@@ -569,6 +571,8 @@ class SegmentedPdfPage(SegmentedPage):
|
|
|
569
571
|
Returns:
|
|
570
572
|
Instantiated SegmentedPdfPage object
|
|
571
573
|
"""
|
|
574
|
+
if isinstance(filename, str):
|
|
575
|
+
filename = Path(filename)
|
|
572
576
|
with open(filename, "r", encoding="utf-8") as f:
|
|
573
577
|
return cls.model_validate_json(f.read())
|
|
574
578
|
|
|
@@ -1155,19 +1159,21 @@ class PdfTableOfContents(BaseModel):
|
|
|
1155
1159
|
"""
|
|
1156
1160
|
return self.model_dump(mode=mode, by_alias=True, exclude_none=True)
|
|
1157
1161
|
|
|
1158
|
-
def save_as_json(self, filename: Path, indent: int = 2):
|
|
1162
|
+
def save_as_json(self, filename: Union[str, Path], indent: int = 2):
|
|
1159
1163
|
"""Save the table of contents as a JSON file.
|
|
1160
1164
|
|
|
1161
1165
|
Args:
|
|
1162
1166
|
filename: Path to save the JSON file
|
|
1163
1167
|
indent: Indentation level for JSON formatting
|
|
1164
1168
|
"""
|
|
1169
|
+
if isinstance(filename, str):
|
|
1170
|
+
filename = Path(filename)
|
|
1165
1171
|
out = self.export_to_dict()
|
|
1166
1172
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
1167
1173
|
json.dump(out, fw, indent=indent)
|
|
1168
1174
|
|
|
1169
1175
|
@classmethod
|
|
1170
|
-
def load_from_json(cls, filename: Path) -> "PdfTableOfContents":
|
|
1176
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "PdfTableOfContents":
|
|
1171
1177
|
"""Load table of contents from a JSON file.
|
|
1172
1178
|
|
|
1173
1179
|
Args:
|
|
@@ -1176,6 +1182,8 @@ class PdfTableOfContents(BaseModel):
|
|
|
1176
1182
|
Returns:
|
|
1177
1183
|
Instantiated PdfTableOfContents object
|
|
1178
1184
|
"""
|
|
1185
|
+
if isinstance(filename, str):
|
|
1186
|
+
filename = Path(filename)
|
|
1179
1187
|
with open(filename, "r", encoding="utf-8") as f:
|
|
1180
1188
|
return cls.model_validate_json(f.read())
|
|
1181
1189
|
|
|
@@ -1213,19 +1221,21 @@ class ParsedPdfDocument(BaseModel):
|
|
|
1213
1221
|
"""
|
|
1214
1222
|
return self.model_dump(mode=mode, by_alias=True, exclude_none=True)
|
|
1215
1223
|
|
|
1216
|
-
def save_as_json(self, filename: Path, indent: int = 2):
|
|
1224
|
+
def save_as_json(self, filename: Union[str, Path], indent: int = 2):
|
|
1217
1225
|
"""Save the document as a JSON file.
|
|
1218
1226
|
|
|
1219
1227
|
Args:
|
|
1220
1228
|
filename: Path to save the JSON file
|
|
1221
1229
|
indent: Indentation level for JSON formatting
|
|
1222
1230
|
"""
|
|
1231
|
+
if isinstance(filename, str):
|
|
1232
|
+
filename = Path(filename)
|
|
1223
1233
|
out = self.export_to_dict()
|
|
1224
1234
|
with open(filename, "w", encoding="utf-8") as fw:
|
|
1225
1235
|
json.dump(out, fw, indent=indent)
|
|
1226
1236
|
|
|
1227
1237
|
@classmethod
|
|
1228
|
-
def load_from_json(cls, filename: Path) -> "ParsedPdfDocument":
|
|
1238
|
+
def load_from_json(cls, filename: Union[str, Path]) -> "ParsedPdfDocument":
|
|
1229
1239
|
"""Load document from a JSON file.
|
|
1230
1240
|
|
|
1231
1241
|
Args:
|
|
@@ -1234,5 +1244,7 @@ class ParsedPdfDocument(BaseModel):
|
|
|
1234
1244
|
Returns:
|
|
1235
1245
|
Instantiated ParsedPdfDocument object
|
|
1236
1246
|
"""
|
|
1247
|
+
if isinstance(filename, str):
|
|
1248
|
+
filename = Path(filename)
|
|
1237
1249
|
with open(filename, "r", encoding="utf-8") as f:
|
|
1238
1250
|
return cls.model_validate_json(f.read())
|
docling_core/types/doc/tokens.py
CHANGED
|
@@ -8,10 +8,10 @@
|
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from typing import Tuple
|
|
10
10
|
|
|
11
|
-
from docling_core.types.doc.labels import
|
|
11
|
+
from docling_core.types.doc.labels import DocItemLabel
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class TableToken(Enum):
|
|
14
|
+
class TableToken(str, Enum):
|
|
15
15
|
"""Class to represent an LLM friendly representation of a Table."""
|
|
16
16
|
|
|
17
17
|
CELL_LABEL_COLUMN_HEADER = "<column_header>"
|
|
@@ -41,41 +41,207 @@ class TableToken(Enum):
|
|
|
41
41
|
return label in TableToken.get_special_tokens()
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
_LOC_PREFIX = "loc_"
|
|
45
|
+
_SECTION_HEADER_PREFIX = "section_header_level_"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _PictureClassificationToken(str, Enum):
|
|
49
|
+
"""PictureClassificationToken."""
|
|
50
|
+
|
|
51
|
+
OTHER = "<other>"
|
|
52
|
+
|
|
53
|
+
# If more than one picture is grouped together, it
|
|
54
|
+
# is generally not possible to assign a label
|
|
55
|
+
PICTURE_GROUP = "<picture_group>"
|
|
56
|
+
|
|
57
|
+
# General
|
|
58
|
+
PIE_CHART = "<pie_chart>"
|
|
59
|
+
BAR_CHART = "<bar_chart>"
|
|
60
|
+
LINE_CHART = "<line_chart>"
|
|
61
|
+
FLOW_CHART = "<flow_chart>"
|
|
62
|
+
SCATTER_CHART = "<scatter_chart>"
|
|
63
|
+
HEATMAP = "<heatmap>"
|
|
64
|
+
REMOTE_SENSING = "<remote_sensing>"
|
|
65
|
+
|
|
66
|
+
NATURAL_IMAGE = "<natural_image>"
|
|
67
|
+
|
|
68
|
+
# Chemistry
|
|
69
|
+
MOLECULAR_STRUCTURE = "<chemistry_molecular_structure>"
|
|
70
|
+
MARKUSH_STRUCTURE = "<chemistry_markush_structure>"
|
|
71
|
+
|
|
72
|
+
# Company
|
|
73
|
+
ICON = "<icon>"
|
|
74
|
+
LOGO = "<logo>"
|
|
75
|
+
SIGNATURE = "<signature>"
|
|
76
|
+
STAMP = "<stamp>"
|
|
77
|
+
QR_CODE = "<qr_code>"
|
|
78
|
+
BAR_CODE = "<bar_code>"
|
|
79
|
+
SCREENSHOT = "<screenshot>"
|
|
80
|
+
|
|
81
|
+
# Geology/Geography
|
|
82
|
+
GEOGRAPHIC_MAP = "<map>"
|
|
83
|
+
STRATIGRAPHIC_CHART = "<stratigraphic_chart>"
|
|
84
|
+
|
|
85
|
+
# Engineering
|
|
86
|
+
CAD_DRAWING = "<cad_drawing>"
|
|
87
|
+
ELECTRICAL_DIAGRAM = "<electrical_diagram>"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _CodeLanguageToken(str, Enum):
|
|
91
|
+
"""CodeLanguageToken."""
|
|
92
|
+
|
|
93
|
+
ADA = "<_Ada_>"
|
|
94
|
+
AWK = "<_Awk_>"
|
|
95
|
+
BASH = "<_Bash_>"
|
|
96
|
+
BC = "<_bc_>"
|
|
97
|
+
C = "<_C_>"
|
|
98
|
+
C_SHARP = "<_C#_>"
|
|
99
|
+
C_PLUS_PLUS = "<_C++_>"
|
|
100
|
+
CMAKE = "<_CMake_>"
|
|
101
|
+
COBOL = "<_COBOL_>"
|
|
102
|
+
CSS = "<_CSS_>"
|
|
103
|
+
CEYLON = "<_Ceylon_>"
|
|
104
|
+
CLOJURE = "<_Clojure_>"
|
|
105
|
+
CRYSTAL = "<_Crystal_>"
|
|
106
|
+
CUDA = "<_Cuda_>"
|
|
107
|
+
CYTHON = "<_Cython_>"
|
|
108
|
+
D = "<_D_>"
|
|
109
|
+
DART = "<_Dart_>"
|
|
110
|
+
DC = "<_dc_>"
|
|
111
|
+
DOCKERFILE = "<_Dockerfile_>"
|
|
112
|
+
ELIXIR = "<_Elixir_>"
|
|
113
|
+
ERLANG = "<_Erlang_>"
|
|
114
|
+
FORTRAN = "<_FORTRAN_>"
|
|
115
|
+
FORTH = "<_Forth_>"
|
|
116
|
+
GO = "<_Go_>"
|
|
117
|
+
HTML = "<_HTML_>"
|
|
118
|
+
HASKELL = "<_Haskell_>"
|
|
119
|
+
HAXE = "<_Haxe_>"
|
|
120
|
+
JAVA = "<_Java_>"
|
|
121
|
+
JAVASCRIPT = "<_JavaScript_>"
|
|
122
|
+
JULIA = "<_Julia_>"
|
|
123
|
+
KOTLIN = "<_Kotlin_>"
|
|
124
|
+
LISP = "<_Lisp_>"
|
|
125
|
+
LUA = "<_Lua_>"
|
|
126
|
+
MATLAB = "<_Matlab_>"
|
|
127
|
+
MOONSCRIPT = "<_MoonScript_>"
|
|
128
|
+
NIM = "<_Nim_>"
|
|
129
|
+
OCAML = "<_OCaml_>"
|
|
130
|
+
OBJECTIVEC = "<_ObjectiveC_>"
|
|
131
|
+
OCTAVE = "<_Octave_>"
|
|
132
|
+
PHP = "<_PHP_>"
|
|
133
|
+
PASCAL = "<_Pascal_>"
|
|
134
|
+
PERL = "<_Perl_>"
|
|
135
|
+
PROLOG = "<_Prolog_>"
|
|
136
|
+
PYTHON = "<_Python_>"
|
|
137
|
+
RACKET = "<_Racket_>"
|
|
138
|
+
RUBY = "<_Ruby_>"
|
|
139
|
+
RUST = "<_Rust_>"
|
|
140
|
+
SML = "<_SML_>"
|
|
141
|
+
SQL = "<_SQL_>"
|
|
142
|
+
SCALA = "<_Scala_>"
|
|
143
|
+
SCHEME = "<_Scheme_>"
|
|
144
|
+
SWIFT = "<_Swift_>"
|
|
145
|
+
TYPESCRIPT = "<_TypeScript_>"
|
|
146
|
+
UNKNOWN = "<_unknown_>"
|
|
147
|
+
VISUALBASIC = "<_VisualBasic_>"
|
|
148
|
+
XML = "<_XML_>"
|
|
149
|
+
YAML = "<_YAML_>"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class DocumentToken(str, Enum):
|
|
45
153
|
"""Class to represent an LLM friendly representation of a Document."""
|
|
46
154
|
|
|
47
155
|
DOCUMENT = "doctag"
|
|
48
156
|
OTSL = "otsl"
|
|
49
157
|
ORDERED_LIST = "ordered_list"
|
|
50
158
|
UNORDERED_LIST = "unordered_list"
|
|
51
|
-
LOC = "loc_"
|
|
52
159
|
PAGE_BREAK = "page_break"
|
|
53
160
|
SMILES = "smiles"
|
|
161
|
+
INLINE = "inline"
|
|
162
|
+
|
|
163
|
+
CAPTION = "caption"
|
|
164
|
+
FOOTNOTE = "footnote"
|
|
165
|
+
FORMULA = "formula"
|
|
166
|
+
LIST_ITEM = "list_item"
|
|
167
|
+
PAGE_FOOTER = "page_footer"
|
|
168
|
+
PAGE_HEADER = "page_header"
|
|
169
|
+
PICTURE = "picture"
|
|
170
|
+
TABLE = "table"
|
|
171
|
+
TEXT = "text"
|
|
172
|
+
TITLE = "title"
|
|
173
|
+
DOCUMENT_INDEX = "document_index"
|
|
174
|
+
CODE = "code"
|
|
175
|
+
CHECKBOX_SELECTED = "checkbox_selected"
|
|
176
|
+
CHECKBOX_UNSELECTED = "checkbox_unselected"
|
|
177
|
+
FORM = "form"
|
|
178
|
+
KEY_VALUE_REGION = "key_value_region"
|
|
179
|
+
|
|
180
|
+
PARAGRAPH = "paragraph"
|
|
181
|
+
REFERENCE = "reference"
|
|
54
182
|
|
|
55
183
|
@classmethod
|
|
56
184
|
def get_special_tokens(
|
|
57
185
|
cls,
|
|
58
|
-
page_dimension: Tuple[int, int] = (
|
|
186
|
+
page_dimension: Tuple[int, int] = (500, 500),
|
|
59
187
|
):
|
|
60
188
|
"""Function to get all special document tokens."""
|
|
61
|
-
special_tokens = [
|
|
189
|
+
special_tokens: list[str] = []
|
|
190
|
+
for token in cls:
|
|
191
|
+
special_tokens.append(f"<{token.value}>")
|
|
192
|
+
special_tokens.append(f"</{token.value}>")
|
|
62
193
|
|
|
63
194
|
for i in range(6):
|
|
64
195
|
special_tokens += [
|
|
65
|
-
f"<
|
|
66
|
-
f"</
|
|
196
|
+
f"<{_SECTION_HEADER_PREFIX}{i}>",
|
|
197
|
+
f"</{_SECTION_HEADER_PREFIX}{i}>",
|
|
67
198
|
]
|
|
68
199
|
|
|
69
|
-
|
|
70
|
-
for
|
|
71
|
-
|
|
200
|
+
special_tokens.extend([t.value for t in _PictureClassificationToken])
|
|
201
|
+
special_tokens.extend([t.value for t in _CodeLanguageToken])
|
|
202
|
+
|
|
203
|
+
special_tokens.extend(TableToken.get_special_tokens())
|
|
72
204
|
|
|
73
205
|
# Adding dynamically generated location-tokens
|
|
74
|
-
for i in range(0, max(page_dimension[0]
|
|
75
|
-
special_tokens.append(f"<
|
|
206
|
+
for i in range(0, max(page_dimension[0], page_dimension[1])):
|
|
207
|
+
special_tokens.append(f"<{_LOC_PREFIX}{i}>")
|
|
76
208
|
|
|
77
209
|
return special_tokens
|
|
78
210
|
|
|
211
|
+
@classmethod
|
|
212
|
+
def create_token_name_from_doc_item_label(cls, label: str, level: int = 1) -> str:
|
|
213
|
+
"""Get token corresponding to passed doc item label."""
|
|
214
|
+
doc_token_by_item_label = {
|
|
215
|
+
DocItemLabel.CAPTION: DocumentToken.CAPTION,
|
|
216
|
+
DocItemLabel.FOOTNOTE: DocumentToken.FOOTNOTE,
|
|
217
|
+
DocItemLabel.FORMULA: DocumentToken.FORMULA,
|
|
218
|
+
DocItemLabel.LIST_ITEM: DocumentToken.LIST_ITEM,
|
|
219
|
+
DocItemLabel.PAGE_FOOTER: DocumentToken.PAGE_FOOTER,
|
|
220
|
+
DocItemLabel.PAGE_HEADER: DocumentToken.PAGE_HEADER,
|
|
221
|
+
DocItemLabel.PICTURE: DocumentToken.PICTURE,
|
|
222
|
+
DocItemLabel.TABLE: DocumentToken.TABLE,
|
|
223
|
+
DocItemLabel.TEXT: DocumentToken.TEXT,
|
|
224
|
+
DocItemLabel.TITLE: DocumentToken.TITLE,
|
|
225
|
+
DocItemLabel.DOCUMENT_INDEX: DocumentToken.DOCUMENT_INDEX,
|
|
226
|
+
DocItemLabel.CODE: DocumentToken.CODE,
|
|
227
|
+
DocItemLabel.CHECKBOX_SELECTED: DocumentToken.CHECKBOX_SELECTED,
|
|
228
|
+
DocItemLabel.CHECKBOX_UNSELECTED: DocumentToken.CHECKBOX_UNSELECTED,
|
|
229
|
+
DocItemLabel.FORM: DocumentToken.FORM,
|
|
230
|
+
DocItemLabel.KEY_VALUE_REGION: DocumentToken.KEY_VALUE_REGION,
|
|
231
|
+
DocItemLabel.PARAGRAPH: DocumentToken.PARAGRAPH,
|
|
232
|
+
DocItemLabel.REFERENCE: DocumentToken.REFERENCE,
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
res: str
|
|
236
|
+
if label == DocItemLabel.SECTION_HEADER:
|
|
237
|
+
res = f"{_SECTION_HEADER_PREFIX}{level}"
|
|
238
|
+
else:
|
|
239
|
+
try:
|
|
240
|
+
res = doc_token_by_item_label[DocItemLabel(label)].value
|
|
241
|
+
except KeyError as e:
|
|
242
|
+
raise RuntimeError(f"Unexpected DocItemLabel: {label}") from e
|
|
243
|
+
return res
|
|
244
|
+
|
|
79
245
|
@staticmethod
|
|
80
246
|
def is_known_token(label):
|
|
81
247
|
"""Function to check if label is in tokens."""
|
|
@@ -83,29 +249,29 @@ class DocumentToken(Enum):
|
|
|
83
249
|
|
|
84
250
|
@staticmethod
|
|
85
251
|
def get_picture_classification_token(classification: str) -> str:
|
|
86
|
-
"""Function to get picture classification
|
|
87
|
-
return f"<{classification}>"
|
|
252
|
+
"""Function to get the token for a given picture classification value."""
|
|
253
|
+
return _PictureClassificationToken(f"<{classification}>").value
|
|
254
|
+
|
|
255
|
+
@staticmethod
|
|
256
|
+
def get_code_language_token(code_language: str) -> str:
|
|
257
|
+
"""Function to get the token for a given code language."""
|
|
258
|
+
return _CodeLanguageToken(f"<_{code_language}_>").value
|
|
88
259
|
|
|
89
260
|
@staticmethod
|
|
90
|
-
def get_location_token(val: float, rnorm: int =
|
|
261
|
+
def get_location_token(val: float, rnorm: int = 500): # TODO review
|
|
91
262
|
"""Function to get location tokens."""
|
|
92
263
|
val_ = round(rnorm * val)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if val_ > rnorm:
|
|
98
|
-
return f"<loc_{rnorm}>"
|
|
99
|
-
|
|
100
|
-
return f"<loc_{val_}>"
|
|
264
|
+
val_ = max(val_, 0)
|
|
265
|
+
val_ = min(val_, rnorm - 1)
|
|
266
|
+
return f"<{_LOC_PREFIX}{val_}>"
|
|
101
267
|
|
|
102
268
|
@staticmethod
|
|
103
269
|
def get_location(
|
|
104
270
|
bbox: tuple[float, float, float, float],
|
|
105
271
|
page_w: float,
|
|
106
272
|
page_h: float,
|
|
107
|
-
xsize: int =
|
|
108
|
-
ysize: int =
|
|
273
|
+
xsize: int = 500, # TODO review
|
|
274
|
+
ysize: int = 500, # TODO review
|
|
109
275
|
):
|
|
110
276
|
"""Get the location string give bbox and page-dim."""
|
|
111
277
|
assert bbox[0] <= bbox[2], f"bbox[0]<=bbox[2] => {bbox[0]}<={bbox[2]}"
|
|
@@ -3,9 +3,10 @@ docling_core/cli/__init__.py,sha256=C63yWifzpA0IV7YWDatpAdrhoV8zjqxAKv0xMf09VdM,
|
|
|
3
3
|
docling_core/cli/view.py,sha256=gwxSBYhGqwznMR8pdXaEuAh2bjFD5X_g11xFYSgFgtM,1764
|
|
4
4
|
docling_core/experimental/__init__.py,sha256=XnAVSUHbA6OFhNSpoYqSD3u83-xVaUaki1DIKFw69Ew,99
|
|
5
5
|
docling_core/experimental/serializer/__init__.py,sha256=CECQlMoCDUxkg4RAUdC3itA3I3qFhKhe2HcYghN6_xw,105
|
|
6
|
-
docling_core/experimental/serializer/base.py,sha256=
|
|
7
|
-
docling_core/experimental/serializer/common.py,sha256=
|
|
8
|
-
docling_core/experimental/serializer/
|
|
6
|
+
docling_core/experimental/serializer/base.py,sha256=avNYy8Lgv45Gm0jfO1OV4wSRsv-O9Eeow2PkUAPY1pA,5152
|
|
7
|
+
docling_core/experimental/serializer/common.py,sha256=g_o-wSQONXIZM7YJF_ghlwc3W3_VkePpM6pDS4ZjrhI,13701
|
|
8
|
+
docling_core/experimental/serializer/doctags.py,sha256=bNUd5vOj1JnvIYFfSc_TSzQKQ7eQ34TY7NAUNK3C604,15953
|
|
9
|
+
docling_core/experimental/serializer/markdown.py,sha256=oEzuPXiooJPVL7yTbXPPFhWF8Phstmzm3mev3yqcqbo,15950
|
|
9
10
|
docling_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
11
|
docling_core/resources/schemas/doc/ANN.json,sha256=04U5j-PU9m5w7IagJ_rHcAx7qUtLkUuaWZO9GuYHnTA,4202
|
|
11
12
|
docling_core/resources/schemas/doc/DOC.json,sha256=9tVKpCqDGGq3074Nn5qlUCdTN-5k1Q0ri_scJblwnLE,6686
|
|
@@ -29,10 +30,10 @@ docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HX
|
|
|
29
30
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
30
31
|
docling_core/types/doc/__init__.py,sha256=bysJn2iwjAHwThSWDPXEdVUUij7p_ax12_nx2_0CMdg,653
|
|
31
32
|
docling_core/types/doc/base.py,sha256=22U1qDlD-2ICmgzbdZrjNayoPHnq4S1ks1GRoqB7y1Q,12542
|
|
32
|
-
docling_core/types/doc/document.py,sha256=
|
|
33
|
+
docling_core/types/doc/document.py,sha256=_FJtmp0yh6F_3AVLVN4Xpo7E1hz50gvS_-HrJmp8FOA,128806
|
|
33
34
|
docling_core/types/doc/labels.py,sha256=0J9Gsqz-jQ4FP2yxs9wOxoTr3qg97BniFX7MJVziUmk,5684
|
|
34
|
-
docling_core/types/doc/page.py,sha256=
|
|
35
|
-
docling_core/types/doc/tokens.py,sha256=
|
|
35
|
+
docling_core/types/doc/page.py,sha256=qCXp_s0cY3N1WWkICv6fjH52OVYYbjYiqRQit86FxG4,39989
|
|
36
|
+
docling_core/types/doc/tokens.py,sha256=fpPtVHfO5RXk8mkqZ7YrW5LyHipg697kbFBNqn6jXQU,9159
|
|
36
37
|
docling_core/types/doc/utils.py,sha256=SaiQD-WMMooFm1bMqwatU-IGhtG048iKJb-ppnJit_k,2250
|
|
37
38
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
38
39
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
@@ -62,8 +63,8 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
62
63
|
docling_core/utils/legacy.py,sha256=SqNQAxl97aHfoJEsC9vZcMJg5FNkmqKPFi-wdSrnfI0,24442
|
|
63
64
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
64
65
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
65
|
-
docling_core-2.
|
|
66
|
-
docling_core-2.
|
|
67
|
-
docling_core-2.
|
|
68
|
-
docling_core-2.
|
|
69
|
-
docling_core-2.
|
|
66
|
+
docling_core-2.24.0.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
67
|
+
docling_core-2.24.0.dist-info/METADATA,sha256=ycw0ioISQ7Uv0rL9_RU5zpsimerhh35wfKv0bul1e9g,5843
|
|
68
|
+
docling_core-2.24.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
69
|
+
docling_core-2.24.0.dist-info/entry_points.txt,sha256=oClcdb2L2RKx4jdqUykY16Kum_f0_whwWhGzIodyidc,216
|
|
70
|
+
docling_core-2.24.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|