docling-core 2.37.0__py3-none-any.whl → 2.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/transforms/chunker/hybrid_chunker.py +6 -3
- docling_core/transforms/visualizer/layout_visualizer.py +2 -2
- docling_core/transforms/visualizer/reading_order_visualizer.py +66 -5
- docling_core/types/doc/__init__.py +53 -1
- docling_core/types/doc/document.py +73 -1
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/METADATA +1 -1
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/RECORD +11 -11
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/WHEEL +0 -0
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/entry_points.txt +0 -0
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/licenses/LICENSE +0 -0
- {docling_core-2.37.0.dist-info → docling_core-2.38.0.dist-info}/top_level.txt +0 -0
|
@@ -234,10 +234,13 @@ class HybridChunker(BaseChunker):
|
|
|
234
234
|
if available_length <= 0:
|
|
235
235
|
warnings.warn(
|
|
236
236
|
"Headers and captions for this chunk are longer than the total "
|
|
237
|
-
"
|
|
238
|
-
f"{doc_chunk.text=}"
|
|
237
|
+
"available size for the chunk, so they will be ignored: "
|
|
238
|
+
f"{doc_chunk.text=}, {doc_chunk.meta=}"
|
|
239
239
|
)
|
|
240
|
-
|
|
240
|
+
new_chunk = DocChunk(**doc_chunk.export_json_dict())
|
|
241
|
+
new_chunk.meta.captions = None
|
|
242
|
+
new_chunk.meta.headings = None
|
|
243
|
+
return self._split_using_plain_text(doc_chunk=new_chunk)
|
|
241
244
|
text = doc_chunk.text
|
|
242
245
|
segments = sem_chunker.chunk(text)
|
|
243
246
|
chunks = [DocChunk(text=s, meta=doc_chunk.meta) for s in segments]
|
|
@@ -163,8 +163,8 @@ class LayoutVisualizer(BaseVisualizer):
|
|
|
163
163
|
else:
|
|
164
164
|
raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
|
|
165
165
|
|
|
166
|
-
if prev_page_nr is None or page_nr
|
|
167
|
-
#
|
|
166
|
+
if prev_page_nr is None or page_nr != prev_page_nr: # changing page
|
|
167
|
+
# dump previous drawing
|
|
168
168
|
if prev_page_nr is not None and prev_image and clusters:
|
|
169
169
|
self._draw_clusters(
|
|
170
170
|
image=prev_image,
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Define classes for reading order visualization."""
|
|
2
2
|
|
|
3
3
|
from copy import deepcopy
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Optional, Union
|
|
5
5
|
|
|
6
|
-
from PIL import ImageDraw
|
|
6
|
+
from PIL import ImageDraw, ImageFont
|
|
7
7
|
from PIL.Image import Image
|
|
8
|
+
from PIL.ImageFont import FreeTypeFont
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
from typing_extensions import override
|
|
10
11
|
|
|
@@ -12,6 +13,11 @@ from docling_core.transforms.visualizer.base import BaseVisualizer
|
|
|
12
13
|
from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocument
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
class _NumberDrawingData(BaseModel):
|
|
17
|
+
xy: tuple[float, float]
|
|
18
|
+
text: str
|
|
19
|
+
|
|
20
|
+
|
|
15
21
|
class ReadingOrderVisualizer(BaseVisualizer):
|
|
16
22
|
"""Reading order visualizer."""
|
|
17
23
|
|
|
@@ -19,6 +25,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
19
25
|
"""Layout visualization parameters."""
|
|
20
26
|
|
|
21
27
|
show_label: bool = True
|
|
28
|
+
show_branch_numbering: bool = False
|
|
22
29
|
content_layers: set[ContentLayer] = {
|
|
23
30
|
cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
|
|
24
31
|
}
|
|
@@ -76,10 +83,17 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
76
83
|
images: Optional[dict[Optional[int], Image]] = None,
|
|
77
84
|
):
|
|
78
85
|
"""Draw the reading order."""
|
|
79
|
-
|
|
86
|
+
font: Union[ImageFont.ImageFont, FreeTypeFont]
|
|
87
|
+
try:
|
|
88
|
+
font = ImageFont.truetype("arial.ttf", 12)
|
|
89
|
+
except OSError:
|
|
90
|
+
# Fallback to default font if arial is not available
|
|
91
|
+
font = ImageFont.load_default()
|
|
80
92
|
x0, y0 = None, None
|
|
93
|
+
number_data_to_draw: dict[Optional[int], list[_NumberDrawingData]] = {}
|
|
81
94
|
my_images: dict[Optional[int], Image] = images or {}
|
|
82
95
|
prev_page = None
|
|
96
|
+
i = 0
|
|
83
97
|
for elem, _ in doc.iterate_items(
|
|
84
98
|
included_content_layers=self.params.content_layers,
|
|
85
99
|
):
|
|
@@ -92,7 +106,10 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
92
106
|
page_no = prov.page_no
|
|
93
107
|
image = my_images.get(page_no)
|
|
94
108
|
|
|
95
|
-
if
|
|
109
|
+
if page_no not in number_data_to_draw:
|
|
110
|
+
number_data_to_draw[page_no] = []
|
|
111
|
+
|
|
112
|
+
if image is None or prev_page is None or page_no != prev_page:
|
|
96
113
|
# new page begins
|
|
97
114
|
prev_page = page_no
|
|
98
115
|
x0 = y0 = None
|
|
@@ -109,7 +126,7 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
109
126
|
else:
|
|
110
127
|
image = deepcopy(pil_img)
|
|
111
128
|
my_images[page_no] = image
|
|
112
|
-
draw = ImageDraw.Draw(image)
|
|
129
|
+
draw = ImageDraw.Draw(image, "RGBA")
|
|
113
130
|
|
|
114
131
|
tlo_bbox = prov.bbox.to_top_left_origin(
|
|
115
132
|
page_height=doc.pages[prov.page_no].size.height
|
|
@@ -124,9 +141,20 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
124
141
|
ro_bbox.b, ro_bbox.t = ro_bbox.t, ro_bbox.b
|
|
125
142
|
|
|
126
143
|
if x0 is None and y0 is None:
|
|
144
|
+
# is_root= True
|
|
127
145
|
x0 = (ro_bbox.l + ro_bbox.r) / 2.0
|
|
128
146
|
y0 = (ro_bbox.b + ro_bbox.t) / 2.0
|
|
147
|
+
|
|
148
|
+
number_data_to_draw[page_no].append(
|
|
149
|
+
_NumberDrawingData(
|
|
150
|
+
xy=(x0, y0),
|
|
151
|
+
text=f"{i}",
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
i += 1
|
|
155
|
+
|
|
129
156
|
else:
|
|
157
|
+
# is_root = False
|
|
130
158
|
assert x0 is not None
|
|
131
159
|
assert y0 is not None
|
|
132
160
|
|
|
@@ -139,7 +167,40 @@ class ReadingOrderVisualizer(BaseVisualizer):
|
|
|
139
167
|
line_width=2,
|
|
140
168
|
color="red",
|
|
141
169
|
)
|
|
170
|
+
|
|
142
171
|
x0, y0 = x1, y1
|
|
172
|
+
|
|
173
|
+
if self.params.show_branch_numbering:
|
|
174
|
+
# post-drawing the numbers to ensure they are rendered on top-layer
|
|
175
|
+
for page in number_data_to_draw:
|
|
176
|
+
if (image := my_images.get(page)) is None:
|
|
177
|
+
continue
|
|
178
|
+
draw = ImageDraw.Draw(image, "RGBA")
|
|
179
|
+
|
|
180
|
+
for num_item in number_data_to_draw[page]:
|
|
181
|
+
|
|
182
|
+
text_bbox = draw.textbbox(num_item.xy, num_item.text, font)
|
|
183
|
+
text_bg_padding = 5
|
|
184
|
+
draw.ellipse(
|
|
185
|
+
[
|
|
186
|
+
(
|
|
187
|
+
text_bbox[0] - text_bg_padding,
|
|
188
|
+
text_bbox[1] - text_bg_padding,
|
|
189
|
+
),
|
|
190
|
+
(
|
|
191
|
+
text_bbox[2] + text_bg_padding,
|
|
192
|
+
text_bbox[3] + text_bg_padding,
|
|
193
|
+
),
|
|
194
|
+
],
|
|
195
|
+
fill="orange",
|
|
196
|
+
)
|
|
197
|
+
draw.text(
|
|
198
|
+
num_item.xy,
|
|
199
|
+
text=num_item.text,
|
|
200
|
+
fill="black",
|
|
201
|
+
font=font,
|
|
202
|
+
)
|
|
203
|
+
|
|
143
204
|
return my_images
|
|
144
205
|
|
|
145
206
|
@override
|
|
@@ -7,26 +7,78 @@
|
|
|
7
7
|
|
|
8
8
|
from .base import BoundingBox, CoordOrigin, ImageRefMode, Size
|
|
9
9
|
from .document import (
|
|
10
|
+
BaseAnnotation,
|
|
11
|
+
ChartBar,
|
|
12
|
+
ChartLine,
|
|
13
|
+
ChartPoint,
|
|
14
|
+
ChartSlice,
|
|
15
|
+
ChartStackedBar,
|
|
10
16
|
CodeItem,
|
|
17
|
+
ContentLayer,
|
|
18
|
+
DescriptionAnnotation,
|
|
11
19
|
DocItem,
|
|
12
20
|
DoclingDocument,
|
|
21
|
+
DocTagsDocument,
|
|
22
|
+
DocTagsPage,
|
|
13
23
|
DocumentOrigin,
|
|
14
24
|
FloatingItem,
|
|
25
|
+
Formatting,
|
|
26
|
+
FormItem,
|
|
27
|
+
FormulaItem,
|
|
28
|
+
GraphCell,
|
|
29
|
+
GraphData,
|
|
30
|
+
GraphLink,
|
|
15
31
|
GroupItem,
|
|
16
32
|
ImageRef,
|
|
33
|
+
InlineGroup,
|
|
17
34
|
KeyValueItem,
|
|
35
|
+
ListItem,
|
|
36
|
+
MiscAnnotation,
|
|
18
37
|
NodeItem,
|
|
38
|
+
OrderedList,
|
|
19
39
|
PageItem,
|
|
40
|
+
PictureBarChartData,
|
|
41
|
+
PictureChartData,
|
|
20
42
|
PictureClassificationClass,
|
|
21
43
|
PictureClassificationData,
|
|
22
44
|
PictureDataType,
|
|
23
45
|
PictureItem,
|
|
46
|
+
PictureLineChartData,
|
|
47
|
+
PictureMoleculeData,
|
|
48
|
+
PicturePieChartData,
|
|
49
|
+
PictureScatterChartData,
|
|
50
|
+
PictureStackedBarChartData,
|
|
51
|
+
PictureTabularChartData,
|
|
24
52
|
ProvenanceItem,
|
|
25
53
|
RefItem,
|
|
54
|
+
Script,
|
|
26
55
|
SectionHeaderItem,
|
|
27
56
|
TableCell,
|
|
28
57
|
TableData,
|
|
29
58
|
TableItem,
|
|
30
59
|
TextItem,
|
|
60
|
+
TitleItem,
|
|
61
|
+
UnorderedList,
|
|
31
62
|
)
|
|
32
|
-
from .labels import
|
|
63
|
+
from .labels import (
|
|
64
|
+
CodeLanguageLabel,
|
|
65
|
+
DocItemLabel,
|
|
66
|
+
GraphCellLabel,
|
|
67
|
+
GraphLinkLabel,
|
|
68
|
+
GroupLabel,
|
|
69
|
+
PictureClassificationLabel,
|
|
70
|
+
TableCellLabel,
|
|
71
|
+
)
|
|
72
|
+
from .page import (
|
|
73
|
+
BoundingRectangle,
|
|
74
|
+
ColorMixin,
|
|
75
|
+
ColorRGBA,
|
|
76
|
+
Coord2D,
|
|
77
|
+
OrderedElement,
|
|
78
|
+
PdfCellRenderingMode,
|
|
79
|
+
PdfPageBoundaryType,
|
|
80
|
+
TextCell,
|
|
81
|
+
TextCellUnit,
|
|
82
|
+
TextDirection,
|
|
83
|
+
)
|
|
84
|
+
from .tokens import DocumentToken, TableToken
|
|
@@ -4169,6 +4169,7 @@ class DoclingDocument(BaseModel):
|
|
|
4169
4169
|
add_table_cell_location: bool = False,
|
|
4170
4170
|
add_table_cell_text: bool = True,
|
|
4171
4171
|
minified: bool = False,
|
|
4172
|
+
pages: Optional[set[int]] = None,
|
|
4172
4173
|
) -> str:
|
|
4173
4174
|
r"""Exports the document content to a DocumentToken format.
|
|
4174
4175
|
|
|
@@ -4187,6 +4188,7 @@ class DoclingDocument(BaseModel):
|
|
|
4187
4188
|
:param # table specific flagsadd_table_cell_location: bool
|
|
4188
4189
|
:param add_table_cell_text: bool: (Default value = True)
|
|
4189
4190
|
:param minified: bool: (Default value = False)
|
|
4191
|
+
:param pages: set[int]: (Default value = None)
|
|
4190
4192
|
:returns: The content of the document formatted as a DocTags string.
|
|
4191
4193
|
:rtype: str
|
|
4192
4194
|
"""
|
|
@@ -4211,6 +4213,7 @@ class DoclingDocument(BaseModel):
|
|
|
4211
4213
|
add_page_break=add_page_index,
|
|
4212
4214
|
add_table_cell_location=add_table_cell_location,
|
|
4213
4215
|
add_table_cell_text=add_table_cell_text,
|
|
4216
|
+
pages=pages,
|
|
4214
4217
|
mode=(
|
|
4215
4218
|
DocTagsParams.Mode.MINIFIED
|
|
4216
4219
|
if minified
|
|
@@ -4350,7 +4353,9 @@ class DoclingDocument(BaseModel):
|
|
|
4350
4353
|
return pitem
|
|
4351
4354
|
|
|
4352
4355
|
def get_visualization(
|
|
4353
|
-
self,
|
|
4356
|
+
self,
|
|
4357
|
+
show_label: bool = True,
|
|
4358
|
+
show_branch_numbering: bool = False,
|
|
4354
4359
|
) -> dict[Optional[int], PILImage.Image]:
|
|
4355
4360
|
"""Get visualization of the document as images by page."""
|
|
4356
4361
|
from docling_core.transforms.visualizer.layout_visualizer import (
|
|
@@ -4366,6 +4371,9 @@ class DoclingDocument(BaseModel):
|
|
|
4366
4371
|
show_label=show_label,
|
|
4367
4372
|
),
|
|
4368
4373
|
),
|
|
4374
|
+
params=ReadingOrderVisualizer.Params(
|
|
4375
|
+
show_branch_numbering=show_branch_numbering,
|
|
4376
|
+
),
|
|
4369
4377
|
)
|
|
4370
4378
|
images = visualizer.get_visualization(doc=self)
|
|
4371
4379
|
|
|
@@ -4456,3 +4464,67 @@ class DoclingDocument(BaseModel):
|
|
|
4456
4464
|
hyperlink=li.hyperlink,
|
|
4457
4465
|
)
|
|
4458
4466
|
return self
|
|
4467
|
+
|
|
4468
|
+
def _normalize_references(self) -> None:
|
|
4469
|
+
"""Normalize ref numbering by ordering node items as per iterate_items()."""
|
|
4470
|
+
new_body = GroupItem(**self.body.model_dump(exclude={"children"}))
|
|
4471
|
+
|
|
4472
|
+
item_lists: dict[str, list[NodeItem]] = {
|
|
4473
|
+
"groups": [],
|
|
4474
|
+
"texts": [],
|
|
4475
|
+
"pictures": [],
|
|
4476
|
+
"tables": [],
|
|
4477
|
+
"key_value_items": [],
|
|
4478
|
+
"form_items": [],
|
|
4479
|
+
}
|
|
4480
|
+
orig_ref_to_new_ref: dict[str, str] = {}
|
|
4481
|
+
|
|
4482
|
+
# collect items in traversal order
|
|
4483
|
+
for item, _ in self.iterate_items(
|
|
4484
|
+
with_groups=True,
|
|
4485
|
+
traverse_pictures=True,
|
|
4486
|
+
included_content_layers={c for c in ContentLayer},
|
|
4487
|
+
):
|
|
4488
|
+
key = item.self_ref.split("/")[1]
|
|
4489
|
+
is_body = key == "body"
|
|
4490
|
+
new_cref = "#/body" if is_body else f"#/{key}/{len(item_lists[key])}"
|
|
4491
|
+
# register cref mapping:
|
|
4492
|
+
orig_ref_to_new_ref[item.self_ref] = new_cref
|
|
4493
|
+
|
|
4494
|
+
if not is_body:
|
|
4495
|
+
new_item = copy.deepcopy(item)
|
|
4496
|
+
new_item.children = []
|
|
4497
|
+
|
|
4498
|
+
# put item in the right list
|
|
4499
|
+
item_lists[key].append(new_item)
|
|
4500
|
+
|
|
4501
|
+
# update item's self reference
|
|
4502
|
+
new_item.self_ref = new_cref
|
|
4503
|
+
|
|
4504
|
+
if item.parent:
|
|
4505
|
+
# set item's parent
|
|
4506
|
+
new_parent_cref = orig_ref_to_new_ref[item.parent.cref]
|
|
4507
|
+
new_item.parent = RefItem(cref=new_parent_cref)
|
|
4508
|
+
|
|
4509
|
+
# add item to parent's children
|
|
4510
|
+
path_components = new_parent_cref.split("/")
|
|
4511
|
+
num_components = len(path_components)
|
|
4512
|
+
parent_node: NodeItem
|
|
4513
|
+
if num_components == 3:
|
|
4514
|
+
_, parent_key, parent_index_str = path_components
|
|
4515
|
+
parent_index = int(parent_index_str)
|
|
4516
|
+
parent_node = item_lists[parent_key][parent_index]
|
|
4517
|
+
elif num_components == 2 and path_components[1] == "body":
|
|
4518
|
+
parent_node = new_body
|
|
4519
|
+
else:
|
|
4520
|
+
raise RuntimeError(f"Unsupported ref format: {new_parent_cref}")
|
|
4521
|
+
parent_node.children.append(RefItem(cref=new_cref))
|
|
4522
|
+
|
|
4523
|
+
# update document
|
|
4524
|
+
self.groups = item_lists["groups"] # type: ignore
|
|
4525
|
+
self.texts = item_lists["texts"] # type: ignore
|
|
4526
|
+
self.pictures = item_lists["pictures"] # type: ignore
|
|
4527
|
+
self.tables = item_lists["tables"] # type: ignore
|
|
4528
|
+
self.key_value_items = item_lists["key_value_items"] # type: ignore
|
|
4529
|
+
self.form_items = item_lists["form_items"] # type: ignore
|
|
4530
|
+
self.body = new_body
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.38.0
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -20,7 +20,7 @@ docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9AC
|
|
|
20
20
|
docling_core/transforms/chunker/__init__.py,sha256=YdizSKXLmmK9eyYBsarHWr8Mx_AoA0PT7c0absibZMk,306
|
|
21
21
|
docling_core/transforms/chunker/base.py,sha256=kJaRrGQynglG9wpy0IaAYTf4MKheWH5BAPzx4LE9yIg,2824
|
|
22
22
|
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=7Fpwwsn2BoiR12KGPrn8fU1uuhqBLp85MRLMF0aIsL8,8281
|
|
23
|
-
docling_core/transforms/chunker/hybrid_chunker.py,sha256=
|
|
23
|
+
docling_core/transforms/chunker/hybrid_chunker.py,sha256=xjkz8hy3tXXzkJzf7QMFOEq_v8V7Jcs9tCY0Mxjge74,12548
|
|
24
24
|
docling_core/transforms/chunker/tokenizer/__init__.py,sha256=-bhXOTpoI7SYk7vn47z8Ek-RZFjJk4TfZawxsFuNHnE,34
|
|
25
25
|
docling_core/transforms/chunker/tokenizer/base.py,sha256=2gOBQPYJYC0iWXOgMG3DiNP7xEBtii7DYcib0iECq5o,575
|
|
26
26
|
docling_core/transforms/chunker/tokenizer/huggingface.py,sha256=aZ_RNQIzcNkAHGHZw3SBCoqJHM2Ihb65eiM29O9BR6o,2506
|
|
@@ -34,14 +34,14 @@ docling_core/transforms/serializer/html_styles.py,sha256=-jBwS4EU7yfKoz0GSoxhwx9
|
|
|
34
34
|
docling_core/transforms/serializer/markdown.py,sha256=wfMNrjA4wMehWLCejAhEN1eQPRixUO1SyL6ojkKkzZY,20614
|
|
35
35
|
docling_core/transforms/visualizer/__init__.py,sha256=gUfF25yiJ_KO46ZIUNqZQOZGy2PLx6gnnr6AZYxKHXI,35
|
|
36
36
|
docling_core/transforms/visualizer/base.py,sha256=aEF7b3rHq6DVdX8zDYEPoq55BHDYe4Hh_97lBdcW4lY,555
|
|
37
|
-
docling_core/transforms/visualizer/layout_visualizer.py,sha256=
|
|
38
|
-
docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=
|
|
37
|
+
docling_core/transforms/visualizer/layout_visualizer.py,sha256=zHzQTWcy-z1J2BcsjvakLkrp8pgStgnxhDl8YqIAotY,8035
|
|
38
|
+
docling_core/transforms/visualizer/reading_order_visualizer.py,sha256=muqmaxOBao39X3Dut0934NAjU3I4v3JN5VzzdjmoGRY,7776
|
|
39
39
|
docling_core/transforms/visualizer/table_visualizer.py,sha256=iJPjk-XQSSCH3oujcjPMz-redAwNNHseZ41lFyd-u3k,8097
|
|
40
40
|
docling_core/types/__init__.py,sha256=MVRSgsk5focwGyAplh_TRR3dEecIXpd98g_u3zZ5HXo,260
|
|
41
41
|
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
42
|
-
docling_core/types/doc/__init__.py,sha256=
|
|
42
|
+
docling_core/types/doc/__init__.py,sha256=pchsIq-9FH_kCTyuyDdB8L4yV77pmnxPwT7399xrqxI,1626
|
|
43
43
|
docling_core/types/doc/base.py,sha256=ndXquBrOKTFQApIJ5s2-zstj3xlVKRbJDSId0KOQnUg,14817
|
|
44
|
-
docling_core/types/doc/document.py,sha256=
|
|
44
|
+
docling_core/types/doc/document.py,sha256=JPh-9MqfOxThP5njvXZAY8sxQyhiPJLjDsSJviggItc,156829
|
|
45
45
|
docling_core/types/doc/labels.py,sha256=JiciRK7_DOkebsrfQ6PVCvS__TsKgWn1ANk84BeB14k,7359
|
|
46
46
|
docling_core/types/doc/page.py,sha256=1JMPwglaTITBvg959L_pcWPb-fXoDYGh-e_tGZMzVMQ,41060
|
|
47
47
|
docling_core/types/doc/tokens.py,sha256=z22l9J81_sg9CYMvOuLmPuLsNT7h_s7wao2UT89DvI8,9278
|
|
@@ -74,9 +74,9 @@ docling_core/utils/generate_jsonschema.py,sha256=uNX1O5XnjyB5nA66XqZXTt3YbGuR2ty
|
|
|
74
74
|
docling_core/utils/legacy.py,sha256=DrI3QGoL755ZCIoKHF74-pTWm8R0zfFo2C2vB5dT2aY,24463
|
|
75
75
|
docling_core/utils/validate.py,sha256=aQ11UbFyl8iD_N7yTTZmm_VVeXz8KcCyn3GLXgkfYRM,2049
|
|
76
76
|
docling_core/utils/validators.py,sha256=azcrndLzhNkTWnbFSu9shJ5D3j_znnLrIFA5R8hzmGU,2798
|
|
77
|
-
docling_core-2.
|
|
78
|
-
docling_core-2.
|
|
79
|
-
docling_core-2.
|
|
80
|
-
docling_core-2.
|
|
81
|
-
docling_core-2.
|
|
82
|
-
docling_core-2.
|
|
77
|
+
docling_core-2.38.0.dist-info/licenses/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
78
|
+
docling_core-2.38.0.dist-info/METADATA,sha256=llcycAVzvc09CX0igt4VIGrGWT8UuMjnWN5rrQoEJ6s,6453
|
|
79
|
+
docling_core-2.38.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
80
|
+
docling_core-2.38.0.dist-info/entry_points.txt,sha256=ER4zROQWkFMHIrY-oqY5E4HeCcCIg8dLkNztYGxdb7c,59
|
|
81
|
+
docling_core-2.38.0.dist-info/top_level.txt,sha256=O-tcXpGiurlud-1ZxMq1b-OmrfAVA4sajcgWU32RtfA,13
|
|
82
|
+
docling_core-2.38.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|