docling-ibm-models 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +23 -17
- docling_ibm_models/tableformer/data_management/tf_predictor.py +19 -6
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.6.dist-info}/METADATA +1 -1
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.6.dist-info}/RECORD +6 -6
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.6.dist-info}/LICENSE +0 -0
- {docling_ibm_models-1.1.5.dist-info → docling_ibm_models-1.1.6.dist-info}/WHEEL +0 -0
@@ -127,13 +127,14 @@ class CellMatcher:
|
|
127
127
|
Dictionary with all details about the mathings between the table and pdf cells
|
128
128
|
"""
|
129
129
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
130
|
-
|
131
|
-
word
|
132
|
-
word["bbox"][
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
130
|
+
if len(pdf_cells) > 0:
|
131
|
+
for word in pdf_cells:
|
132
|
+
word["bbox"] = [
|
133
|
+
word["bbox"]["l"],
|
134
|
+
word["bbox"]["t"],
|
135
|
+
word["bbox"]["r"],
|
136
|
+
word["bbox"]["b"],
|
137
|
+
]
|
137
138
|
table_bboxes = prediction["bboxes"]
|
138
139
|
table_classes = prediction["classes"]
|
139
140
|
# BBOXES transformed...
|
@@ -145,9 +146,13 @@ class CellMatcher:
|
|
145
146
|
table_cells = self._build_table_cells(
|
146
147
|
html_seq, otsl_seq, table_bboxes_page, table_classes
|
147
148
|
)
|
148
|
-
|
149
|
-
|
150
|
-
|
149
|
+
|
150
|
+
matches = {}
|
151
|
+
matches_counter = 0
|
152
|
+
if len(pdf_cells) > 0:
|
153
|
+
matches, matches_counter = self._intersection_over_pdf_match(
|
154
|
+
table_cells, pdf_cells
|
155
|
+
)
|
151
156
|
|
152
157
|
self._log().debug("matches_counter: {}".format(matches_counter))
|
153
158
|
|
@@ -188,13 +193,14 @@ class CellMatcher:
|
|
188
193
|
Dictionary with all details about the mathings between the table and pdf cells
|
189
194
|
"""
|
190
195
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
191
|
-
|
192
|
-
word
|
193
|
-
word["bbox"][
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
196
|
+
if len(pdf_cells) > 0:
|
197
|
+
for word in pdf_cells:
|
198
|
+
word["bbox"] = [
|
199
|
+
word["bbox"]["l"],
|
200
|
+
word["bbox"]["t"],
|
201
|
+
word["bbox"]["r"],
|
202
|
+
word["bbox"]["b"],
|
203
|
+
]
|
198
204
|
|
199
205
|
table_bboxes = prediction["bboxes"]
|
200
206
|
table_classes = prediction["classes"]
|
@@ -696,7 +696,12 @@ class TFPredictor:
|
|
696
696
|
prediction["bboxes"] = corrected_bboxes
|
697
697
|
|
698
698
|
# Match the cells
|
699
|
-
matching_details = {
|
699
|
+
matching_details = {
|
700
|
+
"table_cells": [],
|
701
|
+
"matches": {},
|
702
|
+
"pdf_cells": [],
|
703
|
+
"prediction_bboxes_page": [],
|
704
|
+
}
|
700
705
|
|
701
706
|
# Table bbox upscaling will scale predicted bboxes too within cell matcher
|
702
707
|
scaled_table_bbox = [
|
@@ -803,7 +808,12 @@ class TFPredictor:
|
|
803
808
|
prediction["bboxes"] = corrected_bboxes
|
804
809
|
|
805
810
|
# Match the cells
|
806
|
-
matching_details = {
|
811
|
+
matching_details = {
|
812
|
+
"table_cells": [],
|
813
|
+
"matches": {},
|
814
|
+
"pdf_cells": [],
|
815
|
+
"prediction_bboxes_page": [],
|
816
|
+
}
|
807
817
|
|
808
818
|
# Table bbox upscaling will scale predicted bboxes too within cell matcher
|
809
819
|
scaled_table_bbox = [
|
@@ -819,10 +829,13 @@ class TFPredictor:
|
|
819
829
|
)
|
820
830
|
# Post-processing
|
821
831
|
if len(prediction["bboxes"]) > 0:
|
822
|
-
if
|
823
|
-
|
824
|
-
|
825
|
-
|
832
|
+
if (
|
833
|
+
len(iocr_page["tokens"]) > 0
|
834
|
+
): # There are at least some pdf cells to match with
|
835
|
+
if self.enable_post_process:
|
836
|
+
AggProfiler().begin("post_process", self._prof)
|
837
|
+
matching_details = self._post_processor.process(matching_details)
|
838
|
+
AggProfiler().end("post_process", self._prof)
|
826
839
|
|
827
840
|
# Generate the expected Docling responses
|
828
841
|
AggProfiler().begin("generate_docling_response", self._prof)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.6
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -5,9 +5,9 @@ docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_
|
|
5
5
|
docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
|
6
6
|
docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
|
7
7
|
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=-82B4xUJ9uxMDcsX2DJINTy3J0OB9rKXzHKtf-J3GHI,57205
|
8
|
-
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=
|
8
|
+
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=kzOjSmXkYrxc0de8wHbDJMvwKXelxYf4OccHTRqnpco,21081
|
9
9
|
docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
|
10
|
-
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=
|
10
|
+
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=O0cDcOWJU39aKoQi1oDKAdLmilhmNBoTZzt1TWdJ8m4,39334
|
11
11
|
docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
|
12
12
|
docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -26,7 +26,7 @@ docling_ibm_models/tableformer/utils/app_profiler.py,sha256=13dvwo5byzfP2ejqGBFw
|
|
26
26
|
docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
|
27
27
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
28
28
|
docling_ibm_models/tableformer/utils/variance.py,sha256=USjRwaMsCmzvc6PeWskaAJnUjbliRVd_MqNKLjMDQw8,4675
|
29
|
-
docling_ibm_models-1.1.
|
30
|
-
docling_ibm_models-1.1.
|
31
|
-
docling_ibm_models-1.1.
|
32
|
-
docling_ibm_models-1.1.
|
29
|
+
docling_ibm_models-1.1.6.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
30
|
+
docling_ibm_models-1.1.6.dist-info/METADATA,sha256=3TIbhffZPFPwIH8oaKnEviMergfpg4Wppoas7bC36yU,7172
|
31
|
+
docling_ibm_models-1.1.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
32
|
+
docling_ibm_models-1.1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|