docling-ibm-models 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,13 +127,14 @@ class CellMatcher:
127
127
  Dictionary with all details about the mathings between the table and pdf cells
128
128
  """
129
129
  pdf_cells = copy.deepcopy(iocr_page["tokens"])
130
- for word in pdf_cells:
131
- word["bbox"] = [
132
- word["bbox"]["l"],
133
- word["bbox"]["t"],
134
- word["bbox"]["r"],
135
- word["bbox"]["b"],
136
- ]
130
+ if len(pdf_cells) > 0:
131
+ for word in pdf_cells:
132
+ word["bbox"] = [
133
+ word["bbox"]["l"],
134
+ word["bbox"]["t"],
135
+ word["bbox"]["r"],
136
+ word["bbox"]["b"],
137
+ ]
137
138
  table_bboxes = prediction["bboxes"]
138
139
  table_classes = prediction["classes"]
139
140
  # BBOXES transformed...
@@ -145,9 +146,13 @@ class CellMatcher:
145
146
  table_cells = self._build_table_cells(
146
147
  html_seq, otsl_seq, table_bboxes_page, table_classes
147
148
  )
148
- matches, matches_counter = self._intersection_over_pdf_match(
149
- table_cells, pdf_cells
150
- )
149
+
150
+ matches = {}
151
+ matches_counter = 0
152
+ if len(pdf_cells) > 0:
153
+ matches, matches_counter = self._intersection_over_pdf_match(
154
+ table_cells, pdf_cells
155
+ )
151
156
 
152
157
  self._log().debug("matches_counter: {}".format(matches_counter))
153
158
 
@@ -188,13 +193,14 @@ class CellMatcher:
188
193
  Dictionary with all details about the mathings between the table and pdf cells
189
194
  """
190
195
  pdf_cells = copy.deepcopy(iocr_page["tokens"])
191
- for word in pdf_cells:
192
- word["bbox"] = [
193
- word["bbox"]["l"],
194
- word["bbox"]["t"],
195
- word["bbox"]["r"],
196
- word["bbox"]["b"],
197
- ]
196
+ if len(pdf_cells) > 0:
197
+ for word in pdf_cells:
198
+ word["bbox"] = [
199
+ word["bbox"]["l"],
200
+ word["bbox"]["t"],
201
+ word["bbox"]["r"],
202
+ word["bbox"]["b"],
203
+ ]
198
204
 
199
205
  table_bboxes = prediction["bboxes"]
200
206
  table_classes = prediction["classes"]
@@ -696,7 +696,12 @@ class TFPredictor:
696
696
  prediction["bboxes"] = corrected_bboxes
697
697
 
698
698
  # Match the cells
699
- matching_details = {"table_cells": [], "matches": {}}
699
+ matching_details = {
700
+ "table_cells": [],
701
+ "matches": {},
702
+ "pdf_cells": [],
703
+ "prediction_bboxes_page": [],
704
+ }
700
705
 
701
706
  # Table bbox upscaling will scale predicted bboxes too within cell matcher
702
707
  scaled_table_bbox = [
@@ -803,7 +808,12 @@ class TFPredictor:
803
808
  prediction["bboxes"] = corrected_bboxes
804
809
 
805
810
  # Match the cells
806
- matching_details = {"table_cells": [], "matches": {}}
811
+ matching_details = {
812
+ "table_cells": [],
813
+ "matches": {},
814
+ "pdf_cells": [],
815
+ "prediction_bboxes_page": [],
816
+ }
807
817
 
808
818
  # Table bbox upscaling will scale predicted bboxes too within cell matcher
809
819
  scaled_table_bbox = [
@@ -819,10 +829,13 @@ class TFPredictor:
819
829
  )
820
830
  # Post-processing
821
831
  if len(prediction["bboxes"]) > 0:
822
- if self.enable_post_process:
823
- AggProfiler().begin("post_process", self._prof)
824
- matching_details = self._post_processor.process(matching_details)
825
- AggProfiler().end("post_process", self._prof)
832
+ if (
833
+ len(iocr_page["tokens"]) > 0
834
+ ): # There are at least some pdf cells to match with
835
+ if self.enable_post_process:
836
+ AggProfiler().begin("post_process", self._prof)
837
+ matching_details = self._post_processor.process(matching_details)
838
+ AggProfiler().end("post_process", self._prof)
826
839
 
827
840
  # Generate the expected Docling responses
828
841
  AggProfiler().begin("generate_docling_response", self._prof)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.1.4
3
+ Version: 1.1.6
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -18,9 +18,7 @@ Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
19
  Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
- Requires-Dist: Distance (>=0.1.3,<0.2.0)
22
21
  Requires-Dist: Pillow (>=10.0.0,<11.0.0)
23
- Requires-Dist: apted (>=1.0.3,<2.0.0)
24
22
  Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
25
23
  Requires-Dist: lxml (>=4.9.1,<5.0.0)
26
24
  Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
@@ -5,9 +5,9 @@ docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_
5
5
  docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
6
6
  docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
7
7
  docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=-82B4xUJ9uxMDcsX2DJINTy3J0OB9rKXzHKtf-J3GHI,57205
8
- docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=DFu428Cr84maT9WehdoZkpkJKeahwe5JlclvTC6fuVY,20870
8
+ docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=kzOjSmXkYrxc0de8wHbDJMvwKXelxYf4OccHTRqnpco,21081
9
9
  docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
10
- docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=LxRme9AWLZhQw7xP5Tpxwa5XFTY66m5IFS5v9VC30GA,38978
10
+ docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=O0cDcOWJU39aKoQi1oDKAdLmilhmNBoTZzt1TWdJ8m4,39334
11
11
  docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
12
12
  docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,7 +26,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFw
26
26
  docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
27
27
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
28
28
  docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
29
- docling_ibm_models-1.1.4.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
30
- docling_ibm_models-1.1.4.dist-info/METADATA,sha256=l9nER_iz8jIVbACT8dVjW9nVvXo9pOTqQ9g8ojH5-I8,7251
31
- docling_ibm_models-1.1.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
- docling_ibm_models-1.1.4.dist-info/RECORD,,
29
+ docling_ibm_models-1.1.6.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
30
+ docling_ibm_models-1.1.6.dist-info/METADATA,sha256=3TIbhffZPFPwIH8oaKnEviMergfpg4Wppoas7bC36yU,7172
31
+ docling_ibm_models-1.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
+ docling_ibm_models-1.1.6.dist-info/RECORD,,