docling-ibm-models 1.1.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/tableformer/data_management/matching_post_processor.py +72 -1
- docling_ibm_models/tableformer/models/common/base_model.py +3 -1
- docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +1 -1
- {docling_ibm_models-1.1.2.dist-info → docling_ibm_models-1.1.3.dist-info}/METADATA +2 -2
- {docling_ibm_models-1.1.2.dist-info → docling_ibm_models-1.1.3.dist-info}/RECORD +7 -7
- {docling_ibm_models-1.1.2.dist-info → docling_ibm_models-1.1.3.dist-info}/LICENSE +0 -0
- {docling_ibm_models-1.1.2.dist-info → docling_ibm_models-1.1.3.dist-info}/WHEEL +0 -0
@@ -383,6 +383,71 @@ class MatchingPostProcessor:
|
|
383
383
|
clean_matches = json.loads(clean_matches_string)
|
384
384
|
return clean_matches
|
385
385
|
|
386
|
+
def _find_overlapping(self, table_cells):
|
387
|
+
|
388
|
+
def correct_overlap(box1, box2):
|
389
|
+
# Extract coordinates from the bounding boxes
|
390
|
+
x1_min, y1_min, x1_max, y1_max = box1["bbox"]
|
391
|
+
x2_min, y2_min, x2_max, y2_max = box2["bbox"]
|
392
|
+
|
393
|
+
# Calculate the overlap in both x and y directions
|
394
|
+
overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
|
395
|
+
overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)
|
396
|
+
|
397
|
+
# If there is no overlap, return the original boxes
|
398
|
+
if overlap_x <= 0 or overlap_y <= 0:
|
399
|
+
return box1, box2
|
400
|
+
|
401
|
+
# Decide how to push the boxes apart
|
402
|
+
if overlap_x < overlap_y:
|
403
|
+
# Push horizontally
|
404
|
+
if x1_min < x2_min:
|
405
|
+
# Move box1 to the left and box2 to the right
|
406
|
+
box1["bbox"][2] -= overlap_x
|
407
|
+
box2["bbox"][0] += overlap_x
|
408
|
+
else:
|
409
|
+
# Move box2 to the left and box1 to the right
|
410
|
+
box2["bbox"][2] -= overlap_x
|
411
|
+
box1["bbox"][0] += overlap_x
|
412
|
+
else:
|
413
|
+
# Push vertically
|
414
|
+
if y1_min < y2_min:
|
415
|
+
# Move box1 up and box2 down
|
416
|
+
box1["bbox"][3] -= overlap_y
|
417
|
+
box2["bbox"][1] += overlap_y
|
418
|
+
else:
|
419
|
+
# Move box2 up and box1 down
|
420
|
+
box2["bbox"][3] -= overlap_y
|
421
|
+
box1["bbox"][1] += overlap_y
|
422
|
+
|
423
|
+
return box1, box2
|
424
|
+
|
425
|
+
def do_boxes_overlap(box1, box2):
|
426
|
+
# print("{} - {}".format(box1["bbox"], box2["bbox"]))
|
427
|
+
# Extract coordinates from the bounding boxes
|
428
|
+
x1_min, y1_min, x1_max, y1_max = box1["bbox"]
|
429
|
+
x2_min, y2_min, x2_max, y2_max = box2["bbox"]
|
430
|
+
# Check if one box is to the left of the other
|
431
|
+
if x1_max < x2_min or x2_max < x1_min:
|
432
|
+
return False
|
433
|
+
# Check if one box is above the other
|
434
|
+
if y1_max < y2_min or y2_max < y1_min:
|
435
|
+
return False
|
436
|
+
return True
|
437
|
+
|
438
|
+
def find_overlapping_pairs_indexes(bboxes):
|
439
|
+
overlapping_indexes = []
|
440
|
+
# Compare each box with every other box (combinations)
|
441
|
+
for i in range(len(bboxes)):
|
442
|
+
for j in range(i + 1, len(bboxes)):
|
443
|
+
if do_boxes_overlap(bboxes[i], bboxes[j]):
|
444
|
+
bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
|
445
|
+
|
446
|
+
return overlapping_indexes, bboxes
|
447
|
+
|
448
|
+
overlapping_indexes, table_cells = find_overlapping_pairs_indexes(table_cells)
|
449
|
+
return table_cells
|
450
|
+
|
386
451
|
def _align_table_cells_to_pdf(self, table_cells, pdf_cells, matches):
|
387
452
|
r"""
|
388
453
|
USED in 8.a step
|
@@ -1261,7 +1326,9 @@ class MatchingPostProcessor:
|
|
1261
1326
|
dedupl_table_cells, key=lambda k: k["cell_id"]
|
1262
1327
|
)
|
1263
1328
|
|
1264
|
-
if
|
1329
|
+
if (
|
1330
|
+
len(pdf_cells) > 300
|
1331
|
+
): # For performance, skip this step if there are too many pdf_cells
|
1265
1332
|
aligned_table_cells2 = dedupl_table_cells_sorted
|
1266
1333
|
else:
|
1267
1334
|
aligned_table_cells2 = self._align_table_cells_to_pdf(
|
@@ -1281,6 +1348,10 @@ class MatchingPostProcessor:
|
|
1281
1348
|
table_cells_wo = po2
|
1282
1349
|
max_cell_id = po3
|
1283
1350
|
|
1351
|
+
# As the last step - correct cell bboxes in a way that they don't overlap:
|
1352
|
+
if len(table_cells_wo) <= 300: # For performance reasons
|
1353
|
+
table_cells_wo = self._find_overlapping(table_cells_wo)
|
1354
|
+
|
1284
1355
|
self._log().debug("*** final_matches_wo")
|
1285
1356
|
self._log().debug(final_matches_wo)
|
1286
1357
|
self._log().debug("*** table_cells_wo")
|
@@ -257,7 +257,9 @@ class BaseModel(ABC):
|
|
257
257
|
self._log().info(
|
258
258
|
"Loading model checkpoint file: {}".format(checkpoint_file)
|
259
259
|
)
|
260
|
-
saved_model = torch.load(
|
260
|
+
saved_model = torch.load(
|
261
|
+
checkpoint_file, map_location=self._device, weights_only=False
|
262
|
+
)
|
261
263
|
return saved_model, checkpoint_file
|
262
264
|
except RuntimeError:
|
263
265
|
self._log().error("Cannot load file: {}".format(checkpoint_file))
|
@@ -30,7 +30,7 @@ class Encoder04(nn.Module):
|
|
30
30
|
self.enc_image_size = enc_image_size
|
31
31
|
self._encoder_dim = enc_dim
|
32
32
|
|
33
|
-
resnet = torchvision.models.resnet18(
|
33
|
+
resnet = torchvision.models.resnet18()
|
34
34
|
modules = list(resnet.children())[:-3]
|
35
35
|
|
36
36
|
self._resnet = nn.Sequential(*modules)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.3
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -43,7 +43,7 @@ Description-Content-Type: text/markdown
|
|
43
43
|
|
44
44
|
# Docling IBM models
|
45
45
|
|
46
|
-
AI modules to support the
|
46
|
+
AI modules to support the Docling PDF document conversion project.
|
47
47
|
|
48
48
|
- TableFormer is an AI module that recognizes the structure of a table and the bounding boxes of the table content.
|
49
49
|
- Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
|
@@ -4,17 +4,17 @@ docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO
|
|
4
4
|
docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
|
6
6
|
docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
|
7
|
-
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256
|
7
|
+
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=-82B4xUJ9uxMDcsX2DJINTy3J0OB9rKXzHKtf-J3GHI,57205
|
8
8
|
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=DFu428Cr84maT9WehdoZkpkJKeahwe5JlclvTC6fuVY,20870
|
9
9
|
docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
|
10
10
|
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=LxRme9AWLZhQw7xP5Tpxwa5XFTY66m5IFS5v9VC30GA,38978
|
11
11
|
docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
|
12
12
|
docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
docling_ibm_models/tableformer/models/common/base_model.py,sha256=
|
14
|
+
docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
|
15
15
|
docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=mMZSGk0PjQ4_fhuP44_WJVzfxyLky6S3zlVxCrNKRgc,5991
|
17
|
-
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=
|
17
|
+
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
|
18
18
|
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=7iGkrTNLzjC1yn1zuA3N6DvBvbrcO_BR5tmHG3RKmXs,12159
|
19
19
|
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=4106qxxH0w92CVOFzFuCb87tRMvqAUP3X3F1WT5Z47A,6371
|
20
20
|
docling_ibm_models/tableformer/otsl.py,sha256=k8l1hYWvcCkcnWbLxuBUYEcigYBFTRqiM2GBAHcUDok,21024
|
@@ -26,7 +26,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFw
|
|
26
26
|
docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
|
27
27
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
28
28
|
docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
|
29
|
-
docling_ibm_models-1.1.
|
30
|
-
docling_ibm_models-1.1.
|
31
|
-
docling_ibm_models-1.1.
|
32
|
-
docling_ibm_models-1.1.
|
29
|
+
docling_ibm_models-1.1.3.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
30
|
+
docling_ibm_models-1.1.3.dist-info/METADATA,sha256=omUhpVOQHmnNTPqIyHfFWDlzelyrLvDumi6bCu_tCNA,6931
|
31
|
+
docling_ibm_models-1.1.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
32
|
+
docling_ibm_models-1.1.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|