docling 2.12.0__py3-none-any.whl → 2.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/xml/__init__.py +0 -0
- docling/backend/xml/uspto_backend.py +1888 -0
- docling/datamodel/base_models.py +18 -4
- docling/datamodel/document.py +77 -13
- docling/datamodel/pipeline_options.py +3 -0
- docling/datamodel/settings.py +1 -0
- docling/document_converter.py +11 -2
- docling/models/ds_glm_model.py +34 -4
- docling/models/easyocr_model.py +2 -0
- docling/models/layout_model.py +134 -280
- docling/models/page_assemble_model.py +11 -1
- docling/models/table_structure_model.py +25 -29
- docling/pipeline/base_pipeline.py +3 -1
- docling/utils/glm_utils.py +11 -3
- docling/utils/layout_postprocessor.py +666 -0
- {docling-2.12.0.dist-info → docling-2.13.0.dist-info}/METADATA +2 -2
- {docling-2.12.0.dist-info → docling-2.13.0.dist-info}/RECORD +20 -18
- docling/utils/layout_utils.py +0 -812
- {docling-2.12.0.dist-info → docling-2.13.0.dist-info}/LICENSE +0 -0
- {docling-2.12.0.dist-info → docling-2.13.0.dist-info}/WHEEL +0 -0
- {docling-2.12.0.dist-info → docling-2.13.0.dist-info}/entry_points.txt +0 -0
docling/models/layout_model.py
CHANGED
@@ -7,9 +7,8 @@ from typing import Iterable, List
|
|
7
7
|
|
8
8
|
from docling_core.types.doc import CoordOrigin, DocItemLabel
|
9
9
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
10
|
-
from PIL import ImageDraw
|
10
|
+
from PIL import Image, ImageDraw, ImageFont
|
11
11
|
|
12
|
-
import docling.utils.layout_utils as lu
|
13
12
|
from docling.datamodel.base_models import (
|
14
13
|
BoundingBox,
|
15
14
|
Cell,
|
@@ -22,6 +21,7 @@ from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOpt
|
|
22
21
|
from docling.datamodel.settings import settings
|
23
22
|
from docling.models.base_model import BasePageModel
|
24
23
|
from docling.utils.accelerator_utils import decide_device
|
24
|
+
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
25
25
|
from docling.utils.profiling import TimeRecorder
|
26
26
|
|
27
27
|
_log = logging.getLogger(__name__)
|
@@ -44,9 +44,10 @@ class LayoutModel(BasePageModel):
|
|
44
44
|
]
|
45
45
|
PAGE_HEADER_LABELS = [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]
|
46
46
|
|
47
|
-
|
47
|
+
TABLE_LABELS = [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
|
48
48
|
FIGURE_LABEL = DocItemLabel.PICTURE
|
49
49
|
FORMULA_LABEL = DocItemLabel.FORMULA
|
50
|
+
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
|
50
51
|
|
51
52
|
def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
|
52
53
|
device = decide_device(accelerator_options.device)
|
@@ -55,234 +56,127 @@ class LayoutModel(BasePageModel):
|
|
55
56
|
artifact_path=str(artifacts_path),
|
56
57
|
device=device,
|
57
58
|
num_threads=accelerator_options.num_threads,
|
58
|
-
base_threshold=0.6,
|
59
|
-
blacklist_classes={"Form", "Key-Value Region"},
|
60
59
|
)
|
61
60
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
DocItemLabel.
|
73
|
-
DocItemLabel.
|
74
|
-
DocItemLabel.
|
75
|
-
DocItemLabel.
|
76
|
-
DocItemLabel.
|
77
|
-
DocItemLabel.
|
78
|
-
DocItemLabel.
|
79
|
-
DocItemLabel.
|
80
|
-
DocItemLabel.
|
81
|
-
|
61
|
+
def draw_clusters_and_cells_side_by_side(
|
62
|
+
self, conv_res, page, clusters, mode_prefix: str, show: bool = False
|
63
|
+
):
|
64
|
+
"""
|
65
|
+
Draws a page image side by side with clusters filtered into two categories:
|
66
|
+
- Left: Clusters excluding FORM, KEY_VALUE_REGION, and PICTURE.
|
67
|
+
- Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
|
68
|
+
Includes label names and confidence scores for each cluster.
|
69
|
+
"""
|
70
|
+
label_to_color = {
|
71
|
+
DocItemLabel.TEXT: (255, 255, 153), # Light Yellow
|
72
|
+
DocItemLabel.CAPTION: (255, 204, 153), # Light Orange
|
73
|
+
DocItemLabel.LIST_ITEM: (153, 153, 255), # Light Purple
|
74
|
+
DocItemLabel.FORMULA: (192, 192, 192), # Gray
|
75
|
+
DocItemLabel.TABLE: (255, 204, 204), # Light Pink
|
76
|
+
DocItemLabel.PICTURE: (255, 204, 164), # Light Beige
|
77
|
+
DocItemLabel.SECTION_HEADER: (255, 153, 153), # Light Red
|
78
|
+
DocItemLabel.PAGE_HEADER: (204, 255, 204), # Light Green
|
79
|
+
DocItemLabel.PAGE_FOOTER: (
|
80
|
+
204,
|
81
|
+
255,
|
82
|
+
204,
|
83
|
+
), # Light Green (same as Page-Header)
|
84
|
+
DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header)
|
85
|
+
DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue
|
86
|
+
DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray
|
87
|
+
DocItemLabel.CODE: (125, 125, 125), # Gray
|
88
|
+
DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green
|
89
|
+
DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink
|
90
|
+
DocItemLabel.FORM: (200, 255, 255), # Light Cyan
|
91
|
+
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), # Rusty orange
|
82
92
|
}
|
83
|
-
|
84
|
-
|
85
|
-
DocItemLabel.
|
86
|
-
DocItemLabel.
|
93
|
+
# Filter clusters for left and right images
|
94
|
+
exclude_labels = {
|
95
|
+
DocItemLabel.FORM,
|
96
|
+
DocItemLabel.KEY_VALUE_REGION,
|
97
|
+
DocItemLabel.PICTURE,
|
87
98
|
}
|
99
|
+
left_clusters = [c for c in clusters if c.label not in exclude_labels]
|
100
|
+
right_clusters = [c for c in clusters if c.label in exclude_labels]
|
101
|
+
# Create a deep copy of the original image for both sides
|
102
|
+
left_image = copy.deepcopy(page.image)
|
103
|
+
right_image = copy.deepcopy(page.image)
|
104
|
+
|
105
|
+
# Function to draw clusters on an image
|
106
|
+
def draw_clusters(image, clusters):
|
107
|
+
draw = ImageDraw.Draw(image, "RGBA")
|
108
|
+
# Create a smaller font for the labels
|
109
|
+
try:
|
110
|
+
font = ImageFont.truetype("arial.ttf", 12)
|
111
|
+
except OSError:
|
112
|
+
# Fallback to default font if arial is not available
|
113
|
+
font = ImageFont.load_default()
|
114
|
+
for c_tl in clusters:
|
115
|
+
all_clusters = [c_tl, *c_tl.children]
|
116
|
+
for c in all_clusters:
|
117
|
+
# Draw cells first (underneath)
|
118
|
+
cell_color = (0, 0, 0, 40) # Transparent black for cells
|
119
|
+
for tc in c.cells:
|
120
|
+
cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
|
121
|
+
draw.rectangle(
|
122
|
+
[(cx0, cy0), (cx1, cy1)],
|
123
|
+
outline=None,
|
124
|
+
fill=cell_color,
|
125
|
+
)
|
126
|
+
# Draw cluster rectangle
|
127
|
+
x0, y0, x1, y1 = c.bbox.as_tuple()
|
128
|
+
cluster_fill_color = (*list(label_to_color.get(c.label)), 70)
|
129
|
+
cluster_outline_color = (*list(label_to_color.get(c.label)), 255)
|
130
|
+
draw.rectangle(
|
131
|
+
[(x0, y0), (x1, y1)],
|
132
|
+
outline=cluster_outline_color,
|
133
|
+
fill=cluster_fill_color,
|
134
|
+
)
|
135
|
+
# Add label name and confidence
|
136
|
+
label_text = f"{c.label.name} ({c.confidence:.2f})"
|
137
|
+
# Create semi-transparent background for text
|
138
|
+
text_bbox = draw.textbbox((x0, y0), label_text, font=font)
|
139
|
+
text_bg_padding = 2
|
140
|
+
draw.rectangle(
|
141
|
+
[
|
142
|
+
(
|
143
|
+
text_bbox[0] - text_bg_padding,
|
144
|
+
text_bbox[1] - text_bg_padding,
|
145
|
+
),
|
146
|
+
(
|
147
|
+
text_bbox[2] + text_bg_padding,
|
148
|
+
text_bbox[3] + text_bg_padding,
|
149
|
+
),
|
150
|
+
],
|
151
|
+
fill=(255, 255, 255, 180), # Semi-transparent white
|
152
|
+
)
|
153
|
+
# Draw text
|
154
|
+
draw.text(
|
155
|
+
(x0, y0),
|
156
|
+
label_text,
|
157
|
+
fill=(0, 0, 0, 255), # Solid black
|
158
|
+
font=font,
|
159
|
+
)
|
88
160
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
# map to dictionary clusters and cells, with bottom left origin
|
106
|
-
clusters_orig = [
|
107
|
-
{
|
108
|
-
"id": c.id,
|
109
|
-
"bbox": list(
|
110
|
-
c.bbox.to_bottom_left_origin(page_height).as_tuple()
|
111
|
-
), # TODO
|
112
|
-
"confidence": c.confidence,
|
113
|
-
"cell_ids": [],
|
114
|
-
"type": c.label,
|
115
|
-
}
|
116
|
-
for c in clusters_in
|
117
|
-
]
|
118
|
-
|
119
|
-
clusters_out = [
|
120
|
-
{
|
121
|
-
"id": c.id,
|
122
|
-
"bbox": list(
|
123
|
-
c.bbox.to_bottom_left_origin(page_height).as_tuple()
|
124
|
-
), # TODO
|
125
|
-
"confidence": c.confidence,
|
126
|
-
"created_by": "high_conf_pred",
|
127
|
-
"cell_ids": [],
|
128
|
-
"type": c.label,
|
129
|
-
}
|
130
|
-
for c in clusters_mod
|
131
|
-
]
|
132
|
-
|
133
|
-
del clusters_mod
|
134
|
-
|
135
|
-
raw_cells = [
|
136
|
-
{
|
137
|
-
"id": c.id,
|
138
|
-
"bbox": list(
|
139
|
-
c.bbox.to_bottom_left_origin(page_height).as_tuple()
|
140
|
-
), # TODO
|
141
|
-
"text": c.text,
|
142
|
-
}
|
143
|
-
for c in cells
|
144
|
-
]
|
145
|
-
cell_count = len(raw_cells)
|
146
|
-
|
147
|
-
_log.debug("---- 0. Treat cluster overlaps ------")
|
148
|
-
clusters_out = lu.remove_cluster_duplicates_by_conf(clusters_out, 0.8)
|
149
|
-
|
150
|
-
_log.debug(
|
151
|
-
"---- 1. Initially assign cells to clusters based on minimum intersection ------"
|
152
|
-
)
|
153
|
-
## Check for cells included in or touched by clusters:
|
154
|
-
clusters_out = lu.assigning_cell_ids_to_clusters(
|
155
|
-
clusters_out, raw_cells, MIN_INTERSECTION
|
156
|
-
)
|
157
|
-
|
158
|
-
_log.debug("---- 2. Assign Orphans with Low Confidence Detections")
|
159
|
-
# Creates a map of cell_id->cluster_id
|
160
|
-
(
|
161
|
-
clusters_around_cells,
|
162
|
-
orphan_cell_indices,
|
163
|
-
ambiguous_cell_indices,
|
164
|
-
) = lu.cell_id_state_map(clusters_out, cell_count)
|
165
|
-
|
166
|
-
# Assign orphan cells with lower confidence predictions
|
167
|
-
clusters_out, orphan_cell_indices = lu.assign_orphans_with_low_conf_pred(
|
168
|
-
clusters_out, clusters_orig, raw_cells, orphan_cell_indices
|
169
|
-
)
|
170
|
-
|
171
|
-
# Refresh the cell_ids assignment, after creating new clusters using low conf predictions
|
172
|
-
clusters_out = lu.assigning_cell_ids_to_clusters(
|
173
|
-
clusters_out, raw_cells, MIN_INTERSECTION
|
174
|
-
)
|
175
|
-
|
176
|
-
_log.debug("---- 3. Settle Ambigous Cells")
|
177
|
-
# Creates an update map after assignment of cell_id->cluster_id
|
178
|
-
(
|
179
|
-
clusters_around_cells,
|
180
|
-
orphan_cell_indices,
|
181
|
-
ambiguous_cell_indices,
|
182
|
-
) = lu.cell_id_state_map(clusters_out, cell_count)
|
183
|
-
|
184
|
-
# Settle pdf cells that belong to multiple clusters
|
185
|
-
clusters_out, ambiguous_cell_indices = lu.remove_ambigous_pdf_cell_by_conf(
|
186
|
-
clusters_out, raw_cells, ambiguous_cell_indices
|
187
|
-
)
|
188
|
-
|
189
|
-
_log.debug("---- 4. Set Orphans as Text")
|
190
|
-
(
|
191
|
-
clusters_around_cells,
|
192
|
-
orphan_cell_indices,
|
193
|
-
ambiguous_cell_indices,
|
194
|
-
) = lu.cell_id_state_map(clusters_out, cell_count)
|
195
|
-
|
196
|
-
clusters_out, orphan_cell_indices = lu.set_orphan_as_text(
|
197
|
-
clusters_out, clusters_orig, raw_cells, orphan_cell_indices
|
198
|
-
)
|
199
|
-
|
200
|
-
_log.debug("---- 5. Merge Cells & and adapt the bounding boxes")
|
201
|
-
# Merge cells orphan cells
|
202
|
-
clusters_out = lu.merge_cells(clusters_out)
|
203
|
-
|
204
|
-
# Clean up clusters that remain from merged and unreasonable clusters
|
205
|
-
clusters_out = lu.clean_up_clusters(
|
206
|
-
clusters_out,
|
207
|
-
raw_cells,
|
208
|
-
merge_cells=True,
|
209
|
-
img_table=True,
|
210
|
-
one_cell_table=True,
|
211
|
-
)
|
212
|
-
|
213
|
-
new_clusters = lu.adapt_bboxes(raw_cells, clusters_out, orphan_cell_indices)
|
214
|
-
clusters_out = new_clusters
|
215
|
-
|
216
|
-
## We first rebuild where every cell is now:
|
217
|
-
## Now we write into a prediction cells list, not into the raw cells list.
|
218
|
-
## As we don't need previous labels, we best overwrite any old list, because that might
|
219
|
-
## have been sorted differently.
|
220
|
-
(
|
221
|
-
clusters_around_cells,
|
222
|
-
orphan_cell_indices,
|
223
|
-
ambiguous_cell_indices,
|
224
|
-
) = lu.cell_id_state_map(clusters_out, cell_count)
|
225
|
-
|
226
|
-
target_cells = []
|
227
|
-
for ix, cell in enumerate(raw_cells):
|
228
|
-
new_cell = {
|
229
|
-
"id": ix,
|
230
|
-
"rawcell_id": ix,
|
231
|
-
"label": "None",
|
232
|
-
"bbox": cell["bbox"],
|
233
|
-
"text": cell["text"],
|
234
|
-
}
|
235
|
-
for cluster_index in clusters_around_cells[
|
236
|
-
ix
|
237
|
-
]: # By previous analysis, this is always 1 cluster.
|
238
|
-
new_cell["label"] = clusters_out[cluster_index]["type"]
|
239
|
-
target_cells.append(new_cell)
|
240
|
-
# _log.debug("New label of cell " + str(ix) + " is " + str(new_cell["label"]))
|
241
|
-
cells_out = target_cells
|
242
|
-
|
243
|
-
## -------------------------------
|
244
|
-
## Sort clusters into reasonable reading order, and sort the cells inside each cluster
|
245
|
-
_log.debug("---- 5. Sort clusters in reading order ------")
|
246
|
-
sorted_clusters = lu.produce_reading_order(
|
247
|
-
clusters_out, "raw_cell_ids", "raw_cell_ids", True
|
248
|
-
)
|
249
|
-
clusters_out = sorted_clusters
|
250
|
-
|
251
|
-
# end_time = timer()
|
252
|
-
_log.debug("---- End of postprocessing function ------")
|
253
|
-
end_time = time.time() - start_time
|
254
|
-
_log.debug(f"Finished post processing in seconds={end_time:.3f}")
|
255
|
-
|
256
|
-
cells_out_new = [
|
257
|
-
Cell(
|
258
|
-
id=c["id"], # type: ignore
|
259
|
-
bbox=BoundingBox.from_tuple(
|
260
|
-
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT # type: ignore
|
261
|
-
).to_top_left_origin(page_height),
|
262
|
-
text=c["text"], # type: ignore
|
263
|
-
)
|
264
|
-
for c in cells_out
|
265
|
-
]
|
266
|
-
|
267
|
-
del cells_out
|
268
|
-
|
269
|
-
clusters_out_new = []
|
270
|
-
for c in clusters_out:
|
271
|
-
cluster_cells = [
|
272
|
-
ccell for ccell in cells_out_new if ccell.id in c["cell_ids"] # type: ignore
|
273
|
-
]
|
274
|
-
c_new = Cluster(
|
275
|
-
id=c["id"], # type: ignore
|
276
|
-
bbox=BoundingBox.from_tuple(
|
277
|
-
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT # type: ignore
|
278
|
-
).to_top_left_origin(page_height),
|
279
|
-
confidence=c["confidence"], # type: ignore
|
280
|
-
label=DocItemLabel(c["type"]),
|
281
|
-
cells=cluster_cells,
|
161
|
+
# Draw clusters on both images
|
162
|
+
draw_clusters(left_image, left_clusters)
|
163
|
+
draw_clusters(right_image, right_clusters)
|
164
|
+
# Combine the images side by side
|
165
|
+
combined_width = left_image.width * 2
|
166
|
+
combined_height = left_image.height
|
167
|
+
combined_image = Image.new("RGB", (combined_width, combined_height))
|
168
|
+
combined_image.paste(left_image, (0, 0))
|
169
|
+
combined_image.paste(right_image, (left_image.width, 0))
|
170
|
+
if show:
|
171
|
+
combined_image.show()
|
172
|
+
else:
|
173
|
+
out_path: Path = (
|
174
|
+
Path(settings.debug.debug_output_path)
|
175
|
+
/ f"debug_{conv_res.input.file.stem}"
|
282
176
|
)
|
283
|
-
|
284
|
-
|
285
|
-
|
177
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
178
|
+
out_file = out_path / f"{mode_prefix}_layout_page_{page.page_no:05}.png"
|
179
|
+
combined_image.save(str(out_file), format="png")
|
286
180
|
|
287
181
|
def __call__(
|
288
182
|
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
@@ -315,66 +209,26 @@ class LayoutModel(BasePageModel):
|
|
315
209
|
)
|
316
210
|
clusters.append(cluster)
|
317
211
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
if not cell.bbox.area() > 0:
|
323
|
-
overlap_frac = 0.0
|
324
|
-
else:
|
325
|
-
overlap_frac = (
|
326
|
-
cell.bbox.intersection_area_with(cluster.bbox)
|
327
|
-
/ cell.bbox.area()
|
328
|
-
)
|
329
|
-
|
330
|
-
if overlap_frac > 0.5:
|
331
|
-
cluster.cells.append(cell)
|
332
|
-
|
333
|
-
# Pre-sort clusters
|
334
|
-
# clusters = self.sort_clusters_by_cell_order(clusters)
|
335
|
-
|
336
|
-
# DEBUG code:
|
337
|
-
def draw_clusters_and_cells(show: bool = False):
|
338
|
-
image = copy.deepcopy(page.image)
|
339
|
-
if image is not None:
|
340
|
-
draw = ImageDraw.Draw(image)
|
341
|
-
for c in clusters:
|
342
|
-
x0, y0, x1, y1 = c.bbox.as_tuple()
|
343
|
-
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
344
|
-
|
345
|
-
cell_color = (
|
346
|
-
random.randint(30, 140),
|
347
|
-
random.randint(30, 140),
|
348
|
-
random.randint(30, 140),
|
349
|
-
)
|
350
|
-
for tc in c.cells: # [:1]:
|
351
|
-
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
352
|
-
draw.rectangle(
|
353
|
-
[(x0, y0), (x1, y1)], outline=cell_color
|
354
|
-
)
|
355
|
-
if show:
|
356
|
-
image.show()
|
357
|
-
else:
|
358
|
-
out_path: Path = (
|
359
|
-
Path(settings.debug.debug_output_path)
|
360
|
-
/ f"debug_{conv_res.input.file.stem}"
|
361
|
-
)
|
362
|
-
out_path.mkdir(parents=True, exist_ok=True)
|
212
|
+
if settings.debug.visualize_raw_layout:
|
213
|
+
self.draw_clusters_and_cells_side_by_side(
|
214
|
+
conv_res, page, clusters, mode_prefix="raw"
|
215
|
+
)
|
363
216
|
|
364
|
-
|
365
|
-
out_path / f"layout_page_{page.page_no:05}.png"
|
366
|
-
)
|
367
|
-
image.save(str(out_file), format="png")
|
217
|
+
# Apply postprocessing
|
368
218
|
|
369
|
-
|
219
|
+
processed_clusters, processed_cells = LayoutPostprocessor(
|
220
|
+
page.cells, clusters, page.size
|
221
|
+
).postprocess()
|
222
|
+
# processed_clusters, processed_cells = clusters, page.cells
|
370
223
|
|
371
|
-
|
372
|
-
|
224
|
+
page.cells = processed_cells
|
225
|
+
page.predictions.layout = LayoutPrediction(
|
226
|
+
clusters=processed_clusters
|
373
227
|
)
|
374
228
|
|
375
|
-
page.predictions.layout = LayoutPrediction(clusters=clusters)
|
376
|
-
|
377
229
|
if settings.debug.visualize_layout:
|
378
|
-
|
230
|
+
self.draw_clusters_and_cells_side_by_side(
|
231
|
+
conv_res, page, processed_clusters, mode_prefix="postprocessed"
|
232
|
+
)
|
379
233
|
|
380
234
|
yield page
|
@@ -6,6 +6,7 @@ from pydantic import BaseModel
|
|
6
6
|
|
7
7
|
from docling.datamodel.base_models import (
|
8
8
|
AssembledUnit,
|
9
|
+
ContainerElement,
|
9
10
|
FigureElement,
|
10
11
|
Page,
|
11
12
|
PageElement,
|
@@ -94,7 +95,7 @@ class PageAssembleModel(BasePageModel):
|
|
94
95
|
headers.append(text_el)
|
95
96
|
else:
|
96
97
|
body.append(text_el)
|
97
|
-
elif cluster.label
|
98
|
+
elif cluster.label in LayoutModel.TABLE_LABELS:
|
98
99
|
tbl = None
|
99
100
|
if page.predictions.tablestructure:
|
100
101
|
tbl = page.predictions.tablestructure.table_map.get(
|
@@ -159,6 +160,15 @@ class PageAssembleModel(BasePageModel):
|
|
159
160
|
)
|
160
161
|
elements.append(equation)
|
161
162
|
body.append(equation)
|
163
|
+
elif cluster.label in LayoutModel.CONTAINER_LABELS:
|
164
|
+
container_el = ContainerElement(
|
165
|
+
label=cluster.label,
|
166
|
+
id=cluster.id,
|
167
|
+
page_no=page.page_no,
|
168
|
+
cluster=cluster,
|
169
|
+
)
|
170
|
+
elements.append(container_el)
|
171
|
+
body.append(container_el)
|
162
172
|
|
163
173
|
page.assembled = AssembledUnit(
|
164
174
|
elements=elements, headers=headers, body=body
|
@@ -76,6 +76,10 @@ class TableStructureModel(BasePageModel):
|
|
76
76
|
x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
|
77
77
|
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
78
78
|
|
79
|
+
for cell in table_element.cluster.cells:
|
80
|
+
x0, y0, x1, y1 = cell.bbox.as_tuple()
|
81
|
+
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
82
|
+
|
79
83
|
for tc in table_element.table_cells:
|
80
84
|
if tc.bbox is not None:
|
81
85
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
@@ -89,7 +93,6 @@ class TableStructureModel(BasePageModel):
|
|
89
93
|
text=f"{tc.start_row_offset_idx}, {tc.start_col_offset_idx}",
|
90
94
|
fill="black",
|
91
95
|
)
|
92
|
-
|
93
96
|
if show:
|
94
97
|
image.show()
|
95
98
|
else:
|
@@ -135,47 +138,40 @@ class TableStructureModel(BasePageModel):
|
|
135
138
|
],
|
136
139
|
)
|
137
140
|
for cluster in page.predictions.layout.clusters
|
138
|
-
if cluster.label
|
141
|
+
if cluster.label
|
142
|
+
in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
|
139
143
|
]
|
140
144
|
if not len(in_tables):
|
141
145
|
yield page
|
142
146
|
continue
|
143
147
|
|
144
|
-
tokens = []
|
145
|
-
for c in page.cells:
|
146
|
-
for cluster, _ in in_tables:
|
147
|
-
if c.bbox.area() > 0:
|
148
|
-
if (
|
149
|
-
c.bbox.intersection_area_with(cluster.bbox)
|
150
|
-
/ c.bbox.area()
|
151
|
-
> 0.2
|
152
|
-
):
|
153
|
-
# Only allow non empty stings (spaces) into the cells of a table
|
154
|
-
if len(c.text.strip()) > 0:
|
155
|
-
new_cell = copy.deepcopy(c)
|
156
|
-
new_cell.bbox = new_cell.bbox.scaled(
|
157
|
-
scale=self.scale
|
158
|
-
)
|
159
|
-
|
160
|
-
tokens.append(new_cell.model_dump())
|
161
|
-
|
162
148
|
page_input = {
|
163
|
-
"tokens": tokens,
|
164
149
|
"width": page.size.width * self.scale,
|
165
150
|
"height": page.size.height * self.scale,
|
151
|
+
"image": numpy.asarray(page.get_image(scale=self.scale)),
|
166
152
|
}
|
167
|
-
page_input["image"] = numpy.asarray(
|
168
|
-
page.get_image(scale=self.scale)
|
169
|
-
)
|
170
153
|
|
171
154
|
table_clusters, table_bboxes = zip(*in_tables)
|
172
155
|
|
173
156
|
if len(table_bboxes):
|
174
|
-
|
175
|
-
|
176
|
-
|
157
|
+
for table_cluster, tbl_box in in_tables:
|
158
|
+
|
159
|
+
tokens = []
|
160
|
+
for c in table_cluster.cells:
|
161
|
+
# Only allow non empty stings (spaces) into the cells of a table
|
162
|
+
if len(c.text.strip()) > 0:
|
163
|
+
new_cell = copy.deepcopy(c)
|
164
|
+
new_cell.bbox = new_cell.bbox.scaled(
|
165
|
+
scale=self.scale
|
166
|
+
)
|
177
167
|
|
178
|
-
|
168
|
+
tokens.append(new_cell.model_dump())
|
169
|
+
page_input["tokens"] = tokens
|
170
|
+
|
171
|
+
tf_output = self.tf_predictor.multi_table_predict(
|
172
|
+
page_input, [tbl_box], do_matching=self.do_cell_matching
|
173
|
+
)
|
174
|
+
table_out = tf_output[0]
|
179
175
|
table_cells = []
|
180
176
|
for element in table_out["tf_responses"]:
|
181
177
|
|
@@ -208,7 +204,7 @@ class TableStructureModel(BasePageModel):
|
|
208
204
|
id=table_cluster.id,
|
209
205
|
page_no=page.page_no,
|
210
206
|
cluster=table_cluster,
|
211
|
-
label=
|
207
|
+
label=table_cluster.label,
|
212
208
|
)
|
213
209
|
|
214
210
|
page.predictions.tablestructure.table_map[
|
@@ -168,7 +168,9 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|
168
168
|
|
169
169
|
except Exception as e:
|
170
170
|
conv_res.status = ConversionStatus.FAILURE
|
171
|
-
trace = "\n".join(
|
171
|
+
trace = "\n".join(
|
172
|
+
traceback.format_exception(type(e), e, e.__traceback__)
|
173
|
+
)
|
172
174
|
_log.warning(
|
173
175
|
f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
|
174
176
|
f"{trace}"
|
docling/utils/glm_utils.py
CHANGED
@@ -169,6 +169,8 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
|
|
169
169
|
current_list = None
|
170
170
|
text = ""
|
171
171
|
caption_refs = []
|
172
|
+
item_label = DocItemLabel(pelem["name"])
|
173
|
+
|
172
174
|
for caption in obj["captions"]:
|
173
175
|
text += caption["text"]
|
174
176
|
|
@@ -254,12 +256,18 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
|
|
254
256
|
),
|
255
257
|
)
|
256
258
|
|
257
|
-
tbl = doc.add_table(data=tbl_data, prov=prov)
|
259
|
+
tbl = doc.add_table(data=tbl_data, prov=prov, label=item_label)
|
258
260
|
tbl.captions.extend(caption_refs)
|
259
261
|
|
260
|
-
elif ptype in [
|
262
|
+
elif ptype in [DocItemLabel.FORM.value, DocItemLabel.KEY_VALUE_REGION.value]:
|
261
263
|
label = DocItemLabel(ptype)
|
262
|
-
|
264
|
+
group_label = GroupLabel.UNSPECIFIED
|
265
|
+
if label == DocItemLabel.FORM:
|
266
|
+
group_label = GroupLabel.FORM_AREA
|
267
|
+
elif label == DocItemLabel.KEY_VALUE_REGION:
|
268
|
+
group_label = GroupLabel.KEY_VALUE_AREA
|
269
|
+
|
270
|
+
container_el = doc.add_group(label=group_label)
|
263
271
|
|
264
272
|
_add_child_elements(container_el, doc, obj, pelem)
|
265
273
|
|