docling-ibm-models 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/tableformer/data_management/matching_post_processor.py +44 -24
- docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +23 -17
- docling_ibm_models/tableformer/data_management/tf_predictor.py +39 -10
- docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +5 -0
- docling_ibm_models/tableformer/otsl.py +3 -0
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.7.dist-info}/METADATA +1 -1
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.7.dist-info}/RECORD +9 -9
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.7.dist-info}/LICENSE +0 -0
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.7.dist-info}/WHEEL +0 -0
@@ -4,6 +4,7 @@
|
|
4
4
|
#
|
5
5
|
import json
|
6
6
|
import logging
|
7
|
+
import math
|
7
8
|
import statistics
|
8
9
|
|
9
10
|
import docling_ibm_models.tableformer.settings as s
|
@@ -403,45 +404,63 @@ class MatchingPostProcessor:
|
|
403
404
|
# Push horizontally
|
404
405
|
if x1_min < x2_min:
|
405
406
|
# Move box1 to the left and box2 to the right
|
406
|
-
box1["bbox"][2] -= overlap_x
|
407
|
-
box2["bbox"][0] += overlap_x
|
407
|
+
box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
408
|
+
box2["bbox"][0] += math.floor(overlap_x / 2)
|
408
409
|
else:
|
409
410
|
# Move box2 to the left and box1 to the right
|
410
|
-
box2["bbox"][2] -= overlap_x
|
411
|
-
box1["bbox"][0] += overlap_x
|
411
|
+
box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
412
|
+
box1["bbox"][0] += math.floor(overlap_x / 2)
|
412
413
|
else:
|
413
414
|
# Push vertically
|
414
415
|
if y1_min < y2_min:
|
415
416
|
# Move box1 up and box2 down
|
416
|
-
box1["bbox"][3] -= overlap_y
|
417
|
-
box2["bbox"][1] += overlap_y
|
417
|
+
box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
418
|
+
box2["bbox"][1] += math.floor(overlap_y / 2)
|
418
419
|
else:
|
419
420
|
# Move box2 up and box1 down
|
420
|
-
box2["bbox"][3] -= overlap_y
|
421
|
-
box1["bbox"][1] += overlap_y
|
421
|
+
box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
422
|
+
box1["bbox"][1] += math.floor(overlap_y / 2)
|
423
|
+
|
424
|
+
# Will flip coordinates in proper order, if previous operations reversed it
|
425
|
+
box1["bbox"] = [
|
426
|
+
min(box1["bbox"][0], box1["bbox"][2]),
|
427
|
+
min(box1["bbox"][1], box1["bbox"][3]),
|
428
|
+
max(box1["bbox"][0], box1["bbox"][2]),
|
429
|
+
max(box1["bbox"][1], box1["bbox"][3]),
|
430
|
+
]
|
431
|
+
box2["bbox"] = [
|
432
|
+
min(box2["bbox"][0], box2["bbox"][2]),
|
433
|
+
min(box2["bbox"][1], box2["bbox"][3]),
|
434
|
+
max(box2["bbox"][0], box2["bbox"][2]),
|
435
|
+
max(box2["bbox"][1], box2["bbox"][3]),
|
436
|
+
]
|
422
437
|
|
423
438
|
return box1, box2
|
424
439
|
|
425
440
|
def do_boxes_overlap(box1, box2):
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
441
|
+
B1 = box1["bbox"]
|
442
|
+
B2 = box2["bbox"]
|
443
|
+
if (
|
444
|
+
(B1[0] >= B2[2])
|
445
|
+
or (B1[2] <= B2[0])
|
446
|
+
or (B1[3] <= B2[1])
|
447
|
+
or (B1[1] >= B2[3])
|
448
|
+
):
|
432
449
|
return False
|
433
|
-
|
434
|
-
|
435
|
-
return False
|
436
|
-
return True
|
450
|
+
else:
|
451
|
+
return True
|
437
452
|
|
438
453
|
def find_overlapping_pairs_indexes(bboxes):
|
439
454
|
overlapping_indexes = []
|
440
455
|
# Compare each box with every other box (combinations)
|
441
456
|
for i in range(len(bboxes)):
|
442
457
|
for j in range(i + 1, len(bboxes)):
|
443
|
-
if
|
444
|
-
bboxes[i]
|
458
|
+
if i != j:
|
459
|
+
if bboxes[i] != bboxes[j]:
|
460
|
+
if do_boxes_overlap(bboxes[i], bboxes[j]):
|
461
|
+
bboxes[i], bboxes[j] = correct_overlap(
|
462
|
+
bboxes[i], bboxes[j]
|
463
|
+
)
|
445
464
|
|
446
465
|
return overlapping_indexes, bboxes
|
447
466
|
|
@@ -1144,7 +1163,7 @@ class MatchingPostProcessor:
|
|
1144
1163
|
new_pdf_cells.append(pdf_cells[i])
|
1145
1164
|
return new_pdf_cells
|
1146
1165
|
|
1147
|
-
def process(self, matching_details):
|
1166
|
+
def process(self, matching_details, correct_overlapping_cells=False):
|
1148
1167
|
r"""
|
1149
1168
|
Do post processing, see details in the comments below
|
1150
1169
|
|
@@ -1348,9 +1367,10 @@ class MatchingPostProcessor:
|
|
1348
1367
|
table_cells_wo = po2
|
1349
1368
|
max_cell_id = po3
|
1350
1369
|
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1370
|
+
if correct_overlapping_cells:
|
1371
|
+
# As the last step - correct cell bboxes in a way that they don't overlap:
|
1372
|
+
if len(table_cells_wo) <= 300: # For performance reasons
|
1373
|
+
table_cells_wo = self._find_overlapping(table_cells_wo)
|
1354
1374
|
|
1355
1375
|
self._log().debug("*** final_matches_wo")
|
1356
1376
|
self._log().debug(final_matches_wo)
|
@@ -127,13 +127,14 @@ class CellMatcher:
|
|
127
127
|
Dictionary with all details about the mathings between the table and pdf cells
|
128
128
|
"""
|
129
129
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
130
|
-
|
131
|
-
word
|
132
|
-
word["bbox"][
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
130
|
+
if len(pdf_cells) > 0:
|
131
|
+
for word in pdf_cells:
|
132
|
+
word["bbox"] = [
|
133
|
+
word["bbox"]["l"],
|
134
|
+
word["bbox"]["t"],
|
135
|
+
word["bbox"]["r"],
|
136
|
+
word["bbox"]["b"],
|
137
|
+
]
|
137
138
|
table_bboxes = prediction["bboxes"]
|
138
139
|
table_classes = prediction["classes"]
|
139
140
|
# BBOXES transformed...
|
@@ -145,9 +146,13 @@ class CellMatcher:
|
|
145
146
|
table_cells = self._build_table_cells(
|
146
147
|
html_seq, otsl_seq, table_bboxes_page, table_classes
|
147
148
|
)
|
148
|
-
|
149
|
-
|
150
|
-
|
149
|
+
|
150
|
+
matches = {}
|
151
|
+
matches_counter = 0
|
152
|
+
if len(pdf_cells) > 0:
|
153
|
+
matches, matches_counter = self._intersection_over_pdf_match(
|
154
|
+
table_cells, pdf_cells
|
155
|
+
)
|
151
156
|
|
152
157
|
self._log().debug("matches_counter: {}".format(matches_counter))
|
153
158
|
|
@@ -188,13 +193,14 @@ class CellMatcher:
|
|
188
193
|
Dictionary with all details about the mathings between the table and pdf cells
|
189
194
|
"""
|
190
195
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
191
|
-
|
192
|
-
word
|
193
|
-
word["bbox"][
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
196
|
+
if len(pdf_cells) > 0:
|
197
|
+
for word in pdf_cells:
|
198
|
+
word["bbox"] = [
|
199
|
+
word["bbox"]["l"],
|
200
|
+
word["bbox"]["t"],
|
201
|
+
word["bbox"]["r"],
|
202
|
+
word["bbox"]["b"],
|
203
|
+
]
|
198
204
|
|
199
205
|
table_bboxes = prediction["bboxes"]
|
200
206
|
table_classes = prediction["classes"]
|
@@ -523,8 +523,9 @@ class TFPredictor:
|
|
523
523
|
# return the resized image
|
524
524
|
return resized, sf
|
525
525
|
|
526
|
-
def multi_table_predict(
|
527
|
-
|
526
|
+
def multi_table_predict(
|
527
|
+
self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
|
528
|
+
):
|
528
529
|
multi_tf_output = []
|
529
530
|
page_image = iocr_page["image"]
|
530
531
|
|
@@ -546,7 +547,12 @@ class TFPredictor:
|
|
546
547
|
# Predict
|
547
548
|
if do_matching:
|
548
549
|
tf_responses, predict_details = self.predict(
|
549
|
-
iocr_page,
|
550
|
+
iocr_page,
|
551
|
+
table_bbox,
|
552
|
+
table_image,
|
553
|
+
scale_factor,
|
554
|
+
None,
|
555
|
+
correct_overlapping_cells,
|
550
556
|
)
|
551
557
|
else:
|
552
558
|
tf_responses, predict_details = self.predict_dummy(
|
@@ -696,7 +702,12 @@ class TFPredictor:
|
|
696
702
|
prediction["bboxes"] = corrected_bboxes
|
697
703
|
|
698
704
|
# Match the cells
|
699
|
-
matching_details = {
|
705
|
+
matching_details = {
|
706
|
+
"table_cells": [],
|
707
|
+
"matches": {},
|
708
|
+
"pdf_cells": [],
|
709
|
+
"prediction_bboxes_page": [],
|
710
|
+
}
|
700
711
|
|
701
712
|
# Table bbox upscaling will scale predicted bboxes too within cell matcher
|
702
713
|
scaled_table_bbox = [
|
@@ -728,7 +739,13 @@ class TFPredictor:
|
|
728
739
|
return tf_output, matching_details
|
729
740
|
|
730
741
|
def predict(
|
731
|
-
self,
|
742
|
+
self,
|
743
|
+
iocr_page,
|
744
|
+
table_bbox,
|
745
|
+
table_image,
|
746
|
+
scale_factor,
|
747
|
+
eval_res_preds=None,
|
748
|
+
correct_overlapping_cells=False,
|
732
749
|
):
|
733
750
|
r"""
|
734
751
|
Predict the table out of an image in memory
|
@@ -739,6 +756,8 @@ class TFPredictor:
|
|
739
756
|
Docling provided table data
|
740
757
|
eval_res_preds : dict
|
741
758
|
Ready predictions provided by the evaluation results
|
759
|
+
correct_overlapping_cells : boolean
|
760
|
+
Enables or disables last post-processing step, that fixes cell bboxes to remove overlap
|
742
761
|
|
743
762
|
Returns
|
744
763
|
-------
|
@@ -803,7 +822,12 @@ class TFPredictor:
|
|
803
822
|
prediction["bboxes"] = corrected_bboxes
|
804
823
|
|
805
824
|
# Match the cells
|
806
|
-
matching_details = {
|
825
|
+
matching_details = {
|
826
|
+
"table_cells": [],
|
827
|
+
"matches": {},
|
828
|
+
"pdf_cells": [],
|
829
|
+
"prediction_bboxes_page": [],
|
830
|
+
}
|
807
831
|
|
808
832
|
# Table bbox upscaling will scale predicted bboxes too within cell matcher
|
809
833
|
scaled_table_bbox = [
|
@@ -819,10 +843,15 @@ class TFPredictor:
|
|
819
843
|
)
|
820
844
|
# Post-processing
|
821
845
|
if len(prediction["bboxes"]) > 0:
|
822
|
-
if
|
823
|
-
|
824
|
-
|
825
|
-
|
846
|
+
if (
|
847
|
+
len(iocr_page["tokens"]) > 0
|
848
|
+
): # There are at least some pdf cells to match with
|
849
|
+
if self.enable_post_process:
|
850
|
+
AggProfiler().begin("post_process", self._prof)
|
851
|
+
matching_details = self._post_processor.process(
|
852
|
+
matching_details, correct_overlapping_cells
|
853
|
+
)
|
854
|
+
AggProfiler().end("post_process", self._prof)
|
826
855
|
|
827
856
|
# Generate the expected Docling responses
|
828
857
|
AggProfiler().begin("generate_docling_response", self._prof)
|
@@ -157,7 +157,12 @@ class BBoxDecoder(nn.Module):
|
|
157
157
|
predictions_classes.append(self._class_embed(h))
|
158
158
|
if len(predictions_bboxes) > 0:
|
159
159
|
predictions_bboxes = torch.stack([x[0] for x in predictions_bboxes])
|
160
|
+
else:
|
161
|
+
predictions_bboxes = torch.empty(0)
|
162
|
+
|
160
163
|
if len(predictions_classes) > 0:
|
161
164
|
predictions_classes = torch.stack([x[0] for x in predictions_classes])
|
165
|
+
else:
|
166
|
+
predictions_classes = torch.empty(0)
|
162
167
|
|
163
168
|
return predictions_classes, predictions_bboxes
|
@@ -123,6 +123,9 @@ def otsl_check_right(rs_split, x, y):
|
|
123
123
|
|
124
124
|
|
125
125
|
def otsl_to_html(rs_list, logdebug):
|
126
|
+
if len(rs_list) == 0:
|
127
|
+
return []
|
128
|
+
|
126
129
|
if rs_list[0] not in ["fcel", "ched", "rhed", "srow", "ecel"]:
|
127
130
|
# Most likely already HTML...
|
128
131
|
return rs_list
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.7
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -4,20 +4,20 @@ docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO
|
|
4
4
|
docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
|
6
6
|
docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
|
7
|
-
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256
|
8
|
-
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=
|
7
|
+
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
|
8
|
+
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=kzOjSmXkYrxc0de8wHbDJMvwKXelxYf4OccHTRqnpco,21081
|
9
9
|
docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
|
10
|
-
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=
|
10
|
+
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=Ha--59Rfs3V78p3q__q5cuEoewrTld18qhX8VqAQrYc,39730
|
11
11
|
docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
|
12
12
|
docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
|
15
15
|
docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=
|
16
|
+
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
|
17
17
|
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
|
18
18
|
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=7iGkrTNLzjC1yn1zuA3N6DvBvbrcO_BR5tmHG3RKmXs,12159
|
19
19
|
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=4106qxxH0w92CVOFzFuCb87tRMvqAUP3X3F1WT5Z47A,6371
|
20
|
-
docling_ibm_models/tableformer/otsl.py,sha256=
|
20
|
+
docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
|
21
21
|
docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
|
22
22
|
docling_ibm_models/tableformer/test_dataset_cache.py,sha256=zvVJvUnYz4GxAQfPUmLTHUbqj0Yhi2vwgOBnsRgt1rI,818
|
23
23
|
docling_ibm_models/tableformer/test_prepare_image.py,sha256=oPmU93-yWIkCeUYulGQ1p676Vq-zcjw2EX24WA5lspA,3155
|
@@ -26,7 +26,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFw
|
|
26
26
|
docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
|
27
27
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
28
28
|
docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
|
29
|
-
docling_ibm_models-1.1.
|
30
|
-
docling_ibm_models-1.1.
|
31
|
-
docling_ibm_models-1.1.
|
32
|
-
docling_ibm_models-1.1.
|
29
|
+
docling_ibm_models-1.1.7.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
30
|
+
docling_ibm_models-1.1.7.dist-info/METADATA,sha256=o2f2zLxzqrkoDZ0gdBXDoCJNNJ3FyACeKIMemFE0LBs,7172
|
31
|
+
docling_ibm_models-1.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
32
|
+
docling_ibm_models-1.1.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|