docling-ibm-models 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -383,6 +383,71 @@ class MatchingPostProcessor:
383
383
  clean_matches = json.loads(clean_matches_string)
384
384
  return clean_matches
385
385
 
386
+ def _find_overlapping(self, table_cells):
387
+
388
+ def correct_overlap(box1, box2):
389
+ # Extract coordinates from the bounding boxes
390
+ x1_min, y1_min, x1_max, y1_max = box1["bbox"]
391
+ x2_min, y2_min, x2_max, y2_max = box2["bbox"]
392
+
393
+ # Calculate the overlap in both x and y directions
394
+ overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
395
+ overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)
396
+
397
+ # If there is no overlap, return the original boxes
398
+ if overlap_x <= 0 or overlap_y <= 0:
399
+ return box1, box2
400
+
401
+ # Decide how to push the boxes apart
402
+ if overlap_x < overlap_y:
403
+ # Push horizontally
404
+ if x1_min < x2_min:
405
+ # Move box1 to the left and box2 to the right
406
+ box1["bbox"][2] -= overlap_x
407
+ box2["bbox"][0] += overlap_x
408
+ else:
409
+ # Move box2 to the left and box1 to the right
410
+ box2["bbox"][2] -= overlap_x
411
+ box1["bbox"][0] += overlap_x
412
+ else:
413
+ # Push vertically
414
+ if y1_min < y2_min:
415
+ # Move box1 up and box2 down
416
+ box1["bbox"][3] -= overlap_y
417
+ box2["bbox"][1] += overlap_y
418
+ else:
419
+ # Move box2 up and box1 down
420
+ box2["bbox"][3] -= overlap_y
421
+ box1["bbox"][1] += overlap_y
422
+
423
+ return box1, box2
424
+
425
+ def do_boxes_overlap(box1, box2):
426
+ # print("{} - {}".format(box1["bbox"], box2["bbox"]))
427
+ # Extract coordinates from the bounding boxes
428
+ x1_min, y1_min, x1_max, y1_max = box1["bbox"]
429
+ x2_min, y2_min, x2_max, y2_max = box2["bbox"]
430
+ # Check if one box is to the left of the other
431
+ if x1_max < x2_min or x2_max < x1_min:
432
+ return False
433
+ # Check if one box is above the other
434
+ if y1_max < y2_min or y2_max < y1_min:
435
+ return False
436
+ return True
437
+
438
+ def find_overlapping_pairs_indexes(bboxes):
439
+ overlapping_indexes = []
440
+ # Compare each box with every other box (combinations)
441
+ for i in range(len(bboxes)):
442
+ for j in range(i + 1, len(bboxes)):
443
+ if do_boxes_overlap(bboxes[i], bboxes[j]):
444
+ bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
445
+
446
+ return overlapping_indexes, bboxes
447
+
448
+ overlapping_indexes, table_cells = find_overlapping_pairs_indexes(table_cells)
449
+ return table_cells
450
+
386
451
  def _align_table_cells_to_pdf(self, table_cells, pdf_cells, matches):
387
452
  r"""
388
453
  USED in 8.a step
@@ -1261,7 +1326,9 @@ class MatchingPostProcessor:
1261
1326
  dedupl_table_cells, key=lambda k: k["cell_id"]
1262
1327
  )
1263
1328
 
1264
- if len(pdf_cells) > 300:
1329
+ if (
1330
+ len(pdf_cells) > 300
1331
+ ): # For performance, skip this step if there are too many pdf_cells
1265
1332
  aligned_table_cells2 = dedupl_table_cells_sorted
1266
1333
  else:
1267
1334
  aligned_table_cells2 = self._align_table_cells_to_pdf(
@@ -1281,6 +1348,10 @@ class MatchingPostProcessor:
1281
1348
  table_cells_wo = po2
1282
1349
  max_cell_id = po3
1283
1350
 
1351
+ # As the last step - correct cell bboxes in a way that they don't overlap:
1352
+ if len(table_cells_wo) <= 300: # For performance reasons
1353
+ table_cells_wo = self._find_overlapping(table_cells_wo)
1354
+
1284
1355
  self._log().debug("*** final_matches_wo")
1285
1356
  self._log().debug(final_matches_wo)
1286
1357
  self._log().debug("*** table_cells_wo")
@@ -99,8 +99,13 @@ class TFPredictor:
99
99
  Table predictions for the in-memory Docling API
100
100
  """
101
101
 
102
- def __init__(self, config):
102
+ def __init__(self, config, num_threads: int = None):
103
103
  r"""
104
+ The number of threads is decided, in the following order, by:
105
+ 1. The init method parameter `num_threads`, if it is set.
106
+ 2. The envvar "OMP_NUM_THREADS", if it is set.
107
+ 3. The default value 4.
108
+
104
109
  Parameters
105
110
  ----------
106
111
  config : dict
@@ -123,6 +128,13 @@ class TFPredictor:
123
128
  self._post_processor = MatchingPostProcessor(config)
124
129
 
125
130
  self._init_word_map()
131
+
132
+ # Set the number of torch threads
133
+ if num_threads is None:
134
+ num_threads = int(os.environ.get("OMP_NUM_THREADS", 4))
135
+ self._num_threads = num_threads
136
+ torch.set_num_threads(num_threads)
137
+
126
138
  # Load the model
127
139
  self._model = self._load_model()
128
140
  self._model.eval()
@@ -257,7 +257,9 @@ class BaseModel(ABC):
257
257
  self._log().info(
258
258
  "Loading model checkpoint file: {}".format(checkpoint_file)
259
259
  )
260
- saved_model = torch.load(checkpoint_file, map_location=self._device)
260
+ saved_model = torch.load(
261
+ checkpoint_file, map_location=self._device, weights_only=False
262
+ )
261
263
  return saved_model, checkpoint_file
262
264
  except RuntimeError:
263
265
  self._log().error("Cannot load file: {}".format(checkpoint_file))
@@ -30,7 +30,7 @@ class Encoder04(nn.Module):
30
30
  self.enc_image_size = enc_image_size
31
31
  self._encoder_dim = enc_dim
32
32
 
33
- resnet = torchvision.models.resnet18(pretrained=False)
33
+ resnet = torchvision.models.resnet18()
34
34
  modules = list(resnet.children())[:-3]
35
35
 
36
36
  self._resnet = nn.Sequential(*modules)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.1.1
3
+ Version: 1.1.3
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -26,8 +26,7 @@ Requires-Dist: lxml (>=4.9.1,<5.0.0)
26
26
  Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
27
27
  Requires-Dist: numpy (>=1.24.4,<2.0.0)
28
28
  Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
29
- Requires-Dist: opencv-python (>=4.9.0.80,<5.0.0.0) ; sys_platform != "linux"
30
- Requires-Dist: opencv-python-headless (>=4.9.0.80,<5.0.0.0) ; sys_platform == "linux"
29
+ Requires-Dist: opencv-python-headless (>=4.9.0.80,<5.0.0.0)
31
30
  Requires-Dist: torch (>=2.2.2,<3.0.0)
32
31
  Requires-Dist: torchvision (>=0.17.2)
33
32
  Requires-Dist: tqdm (>=4.64.0,<5.0.0)
@@ -44,7 +43,7 @@ Description-Content-Type: text/markdown
44
43
 
45
44
  # Docling IBM models
46
45
 
47
- AI modules to support the Dockling PDF document conversion project.
46
+ AI modules to support the Docling PDF document conversion project.
48
47
 
49
48
  - TableFormer is an AI module that recognizes the structure of a table and the bounding boxes of the table content.
50
49
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
@@ -4,17 +4,17 @@ docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO
4
4
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
6
6
  docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
7
- docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=HYG-wx5PQC38hTQfyXr3zUnZ6--aSs55x8g9PdsROGU,54207
7
+ docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=-82B4xUJ9uxMDcsX2DJINTy3J0OB9rKXzHKtf-J3GHI,57205
8
8
  docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=DFu428Cr84maT9WehdoZkpkJKeahwe5JlclvTC6fuVY,20870
9
9
  docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
10
- docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=3e7ICgs3r9YclOLWuoOvqSveQHcERvZYmOBMtmslJqU,38502
10
+ docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=LxRme9AWLZhQw7xP5Tpxwa5XFTY66m5IFS5v9VC30GA,38978
11
11
  docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
12
12
  docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- docling_ibm_models/tableformer/models/common/base_model.py,sha256=_Pn6hjIx49DVTU-po6qsR788RhD7Q4FhVyBqaGl0tMw,9972
14
+ docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
15
15
  docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=mMZSGk0PjQ4_fhuP44_WJVzfxyLky6S3zlVxCrNKRgc,5991
17
- docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=vlDW890mCIzHdgaGXFZ4avlnCmuUCPHUaJ30P9N6eWo,1991
17
+ docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
18
18
  docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=7iGkrTNLzjC1yn1zuA3N6DvBvbrcO_BR5tmHG3RKmXs,12159
19
19
  docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=4106qxxH0w92CVOFzFuCb87tRMvqAUP3X3F1WT5Z47A,6371
20
20
  docling_ibm_models/tableformer/otsl.py,sha256=k8l1hYWvcCkcnWbLxuBUYEcigYBFTRqiM2GBAHcUDok,21024
@@ -26,7 +26,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFw
26
26
  docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
27
27
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
28
28
  docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
29
- docling_ibm_models-1.1.1.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
30
- docling_ibm_models-1.1.1.dist-info/METADATA,sha256=u1QQMa4RVoxKF7rW30Aa7WeY-JIll0fWxQtLUZNJbCU,7035
31
- docling_ibm_models-1.1.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
- docling_ibm_models-1.1.1.dist-info/RECORD,,
29
+ docling_ibm_models-1.1.3.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
30
+ docling_ibm_models-1.1.3.dist-info/METADATA,sha256=omUhpVOQHmnNTPqIyHfFWDlzelyrLvDumi6bCu_tCNA,6931
31
+ docling_ibm_models-1.1.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
+ docling_ibm_models-1.1.3.dist-info/RECORD,,