docling-ibm-models 1.1.3__tar.gz → 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/LICENSE +1 -1
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/PKG-INFO +10 -12
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/README.md +3 -6
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/layoutmodel/layout_predictor.py +33 -25
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/common.py +0 -94
- docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/functional.py +97 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +44 -24
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +26 -17
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_predictor.py +135 -139
- docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/transforms.py +91 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +5 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +5 -1
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +4 -4
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/otsl.py +3 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/app_profiler.py +12 -1
- docling_ibm_models-1.3.2/docling_ibm_models/tableformer/utils/mem_monitor.py +175 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/pyproject.toml +11 -6
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -504
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/data_management/functional.py +0 -574
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -1233
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/data_management/transforms.py +0 -396
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/test_dataset_cache.py +0 -37
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/test_prepare_image.py +0 -99
- docling_ibm_models-1.1.3/docling_ibm_models/tableformer/utils/variance.py +0 -175
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/settings.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
- {docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.2
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -18,17 +18,18 @@ Classifier: Programming Language :: Python :: 3.10
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.11
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
20
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
21
|
-
Requires-Dist: Distance (>=0.1.3,<0.2.0)
|
22
21
|
Requires-Dist: Pillow (>=10.0.0,<11.0.0)
|
23
|
-
Requires-Dist:
|
22
|
+
Requires-Dist: huggingface_hub (>=0.23,<1)
|
24
23
|
Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
|
25
24
|
Requires-Dist: lxml (>=4.9.1,<5.0.0)
|
26
25
|
Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
|
27
26
|
Requires-Dist: numpy (>=1.24.4,<2.0.0)
|
28
27
|
Requires-Dist: onnxruntime (>=1.16.2,<2.0.0)
|
29
|
-
Requires-Dist: opencv-python-headless (>=4.
|
30
|
-
Requires-Dist: torch (>=2.2.2,<3.0
|
31
|
-
Requires-Dist:
|
28
|
+
Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
|
29
|
+
Requires-Dist: torch (>=2.2.2,<2.3.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
30
|
+
Requires-Dist: torch (>=2.2.2,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
31
|
+
Requires-Dist: torchvision (>=0,<1) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
32
|
+
Requires-Dist: torchvision (>=0.17.2,<0.18.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
32
33
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
33
34
|
Description-Content-Type: text/markdown
|
34
35
|
|
@@ -110,7 +111,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
|
|
110
111
|
|
111
112
|
## Configuration file
|
112
113
|
|
113
|
-
Example configuration can be
|
114
|
+
Example configuration can be found inside test `tests/test_tf_predictor.py`
|
114
115
|
These are the main sections of the configuration file:
|
115
116
|
|
116
117
|
- `dataset`: The directory for prepared data and the parameters used during the data loading.
|
@@ -128,16 +129,13 @@ You can download the model weights and config files from the links:
|
|
128
129
|
- [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
|
129
130
|
- [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
|
130
131
|
|
131
|
-
Place the downloaded files into `tests/test_data/model_artifacts/` directory.
|
132
|
-
|
133
132
|
|
134
133
|
## Inference Tests
|
135
134
|
|
136
|
-
|
135
|
+
You can run the inference tests for the models with:
|
137
136
|
|
138
|
-
First download the model weights (see above), then run:
|
139
137
|
```
|
140
|
-
|
138
|
+
python -m pytest tests/
|
141
139
|
```
|
142
140
|
|
143
141
|
This will also generate prediction and matching visualizations that can be found here:
|
@@ -76,7 +76,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
|
|
76
76
|
|
77
77
|
## Configuration file
|
78
78
|
|
79
|
-
Example configuration can be
|
79
|
+
Example configuration can be found inside test `tests/test_tf_predictor.py`
|
80
80
|
These are the main sections of the configuration file:
|
81
81
|
|
82
82
|
- `dataset`: The directory for prepared data and the parameters used during the data loading.
|
@@ -94,16 +94,13 @@ You can download the model weights and config files from the links:
|
|
94
94
|
- [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
|
95
95
|
- [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
|
96
96
|
|
97
|
-
Place the downloaded files into `tests/test_data/model_artifacts/` directory.
|
98
|
-
|
99
97
|
|
100
98
|
## Inference Tests
|
101
99
|
|
102
|
-
|
100
|
+
You can run the inference tests for the models with:
|
103
101
|
|
104
|
-
First download the model weights (see above), then run:
|
105
102
|
```
|
106
|
-
|
103
|
+
python -m pytest tests/
|
107
104
|
```
|
108
105
|
|
109
106
|
This will also generate prediction and matching visualizations that can be found here:
|
@@ -14,29 +14,6 @@ MODEL_CHECKPOINT_FN = "model.pt"
|
|
14
14
|
DEFAULT_NUM_THREADS = 4
|
15
15
|
|
16
16
|
|
17
|
-
# Classes:
|
18
|
-
CLASSES_MAP = {
|
19
|
-
0: "background",
|
20
|
-
1: "Caption",
|
21
|
-
2: "Footnote",
|
22
|
-
3: "Formula",
|
23
|
-
4: "List-item",
|
24
|
-
5: "Page-footer",
|
25
|
-
6: "Page-header",
|
26
|
-
7: "Picture",
|
27
|
-
8: "Section-header",
|
28
|
-
9: "Table",
|
29
|
-
10: "Text",
|
30
|
-
11: "Title",
|
31
|
-
12: "Document Index",
|
32
|
-
13: "Code",
|
33
|
-
14: "Checkbox-Selected",
|
34
|
-
15: "Checkbox-Unselected",
|
35
|
-
16: "Form",
|
36
|
-
17: "Key-Value Region",
|
37
|
-
}
|
38
|
-
|
39
|
-
|
40
17
|
class LayoutPredictor:
|
41
18
|
r"""
|
42
19
|
Document layout prediction using ONNX
|
@@ -69,6 +46,31 @@ class LayoutPredictor:
|
|
69
46
|
------
|
70
47
|
FileNotFoundError when the model's ONNX file is missing
|
71
48
|
"""
|
49
|
+
# Initialize classes map:
|
50
|
+
self._classes_map = {
|
51
|
+
0: "background",
|
52
|
+
1: "Caption",
|
53
|
+
2: "Footnote",
|
54
|
+
3: "Formula",
|
55
|
+
4: "List-item",
|
56
|
+
5: "Page-footer",
|
57
|
+
6: "Page-header",
|
58
|
+
7: "Picture",
|
59
|
+
8: "Section-header",
|
60
|
+
9: "Table",
|
61
|
+
10: "Text",
|
62
|
+
11: "Title",
|
63
|
+
12: "Document Index",
|
64
|
+
13: "Code",
|
65
|
+
14: "Checkbox-Selected",
|
66
|
+
15: "Checkbox-Unselected",
|
67
|
+
16: "Form",
|
68
|
+
17: "Key-Value Region",
|
69
|
+
}
|
70
|
+
|
71
|
+
# Blacklisted classes
|
72
|
+
self._black_classes = set(["Form", "Key-Value Region"])
|
73
|
+
|
72
74
|
# Set basic params
|
73
75
|
self._threshold = 0.6 # Score threshold
|
74
76
|
self._image_size = 640
|
@@ -159,13 +161,19 @@ class LayoutPredictor:
|
|
159
161
|
)
|
160
162
|
|
161
163
|
# Yield output
|
162
|
-
for
|
164
|
+
for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
|
165
|
+
# Filter out blacklisted classes
|
166
|
+
label = self._classes_map[label_idx]
|
167
|
+
if label in self._black_classes:
|
168
|
+
continue
|
169
|
+
|
170
|
+
# Check against threshold
|
163
171
|
if score > self._threshold:
|
164
172
|
yield {
|
165
173
|
"l": box[0] / self._image_size * w,
|
166
174
|
"t": box[1] / self._image_size * h,
|
167
175
|
"r": box[2] / self._image_size * w,
|
168
176
|
"b": box[3] / self._image_size * h,
|
169
|
-
"label":
|
177
|
+
"label": label,
|
170
178
|
"confidence": score,
|
171
179
|
}
|
{docling_ibm_models-1.1.3 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/common.py
RENAMED
@@ -48,32 +48,6 @@ def validate_config(config):
|
|
48
48
|
return True
|
49
49
|
|
50
50
|
|
51
|
-
def parse_arguments():
|
52
|
-
r"""
|
53
|
-
Parse the input arguments
|
54
|
-
A ValueError exception will be thrown in case the config file is invalid
|
55
|
-
"""
|
56
|
-
parser = argparse.ArgumentParser(description="Train the TableModel")
|
57
|
-
parser.add_argument(
|
58
|
-
"-c", "--config", required=True, default=None, help="configuration file (JSON)"
|
59
|
-
)
|
60
|
-
args = parser.parse_args()
|
61
|
-
config_filename = args.config
|
62
|
-
|
63
|
-
assert os.path.isfile(config_filename), "FAILURE: Config file not found."
|
64
|
-
return read_config(config_filename)
|
65
|
-
|
66
|
-
|
67
|
-
def read_config(config_filename):
|
68
|
-
with open(config_filename, "r") as fd:
|
69
|
-
config = json.load(fd)
|
70
|
-
|
71
|
-
# Validate the config file
|
72
|
-
validate_config(config)
|
73
|
-
|
74
|
-
return config
|
75
|
-
|
76
|
-
|
77
51
|
def safe_get_parameter(input_dict, index_path, default=None, required=False):
|
78
52
|
r"""
|
79
53
|
Safe get parameter from a nested dictionary.
|
@@ -130,71 +104,3 @@ def get_prepared_data_filename(prepared_data_part, dataset_name):
|
|
130
104
|
if "<POSTFIX>" in template:
|
131
105
|
template = template.replace("<POSTFIX>", dataset_name)
|
132
106
|
return template
|
133
|
-
|
134
|
-
|
135
|
-
def create_dataset_and_model(config, purpose, fixed_padding=False):
|
136
|
-
r"""
|
137
|
-
Gets a model from configuration
|
138
|
-
|
139
|
-
Parameters
|
140
|
-
---------
|
141
|
-
config : Dictionary
|
142
|
-
The configuration of the model
|
143
|
-
purpose : string
|
144
|
-
One of "train", "eval", "predict"
|
145
|
-
fixed_padding : bool
|
146
|
-
Parameter passed to the constructor of the DataLoader
|
147
|
-
|
148
|
-
Returns
|
149
|
-
-------
|
150
|
-
In case a Model cannot be initialized return None, None, None. Otherwise:
|
151
|
-
|
152
|
-
device : selected device
|
153
|
-
dataset : Instance of the DataLoader
|
154
|
-
model : Instance of the model
|
155
|
-
"""
|
156
|
-
from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
|
157
|
-
|
158
|
-
model_type = config["model"]["type"]
|
159
|
-
model = None
|
160
|
-
|
161
|
-
# Get env vars:
|
162
|
-
use_cpu_only = os.environ.get("USE_CPU_ONLY", False)
|
163
|
-
use_cuda_only = not use_cpu_only
|
164
|
-
|
165
|
-
# Use the cpu for the evaluation
|
166
|
-
device = "cpu" # Default, run on CPU
|
167
|
-
num_gpus = torch.cuda.device_count() # Check if GPU is available
|
168
|
-
if use_cuda_only:
|
169
|
-
device = "cuda:0" if num_gpus > 0 else "cpu" # Run on first available GPU
|
170
|
-
else:
|
171
|
-
device = "cpu"
|
172
|
-
|
173
|
-
# Create the DataLoader
|
174
|
-
# loader = DataLoader(config, purpose, fixed_padding=fixed_padding)
|
175
|
-
dataset = TFDataset(config, purpose, fixed_padding=fixed_padding)
|
176
|
-
dataset.set_device(device)
|
177
|
-
dataset_val = None
|
178
|
-
if config["train"]["validation"] and purpose == "train":
|
179
|
-
dataset_val = TFDataset(config, "val", fixed_padding=fixed_padding)
|
180
|
-
dataset_val.set_device(device)
|
181
|
-
if model_type == "TableModel04_rs":
|
182
|
-
from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa: F401
|
183
|
-
TableModel04_rs,
|
184
|
-
)
|
185
|
-
# Find the model class and create an instance of it
|
186
|
-
for candidate in BaseModel.__subclasses__():
|
187
|
-
if candidate.__name__ == model_type:
|
188
|
-
init_data = dataset.get_init_data()
|
189
|
-
model = candidate(config, init_data, purpose, device)
|
190
|
-
|
191
|
-
if model is None:
|
192
|
-
logger.warn("Not found model: " + str(model_type))
|
193
|
-
return None, None, None
|
194
|
-
|
195
|
-
logger.info("Found model: " + str(model_type))
|
196
|
-
|
197
|
-
if purpose == s.PREDICT_PURPOSE:
|
198
|
-
return device, dataset, model
|
199
|
-
else:
|
200
|
-
return device, dataset, dataset_val, model
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#
|
2
|
+
# Copyright IBM Corp. 2024 - 2024
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
#
|
5
|
+
import numbers
|
6
|
+
from collections.abc import Iterable, Sequence
|
7
|
+
|
8
|
+
import cv2
|
9
|
+
import numpy as np
|
10
|
+
import torch
|
11
|
+
from torchvision.transforms import functional
|
12
|
+
|
13
|
+
cv2.setNumThreads(0)
|
14
|
+
cv2.ocl.setUseOpenCL(False)
|
15
|
+
|
16
|
+
INTER_MODE = {
|
17
|
+
"NEAREST": cv2.INTER_NEAREST,
|
18
|
+
"BILINEAR": cv2.INTER_LINEAR,
|
19
|
+
"BICUBIC": cv2.INTER_CUBIC,
|
20
|
+
}
|
21
|
+
|
22
|
+
PAD_MOD = {
|
23
|
+
"constant": cv2.BORDER_CONSTANT,
|
24
|
+
"edge": cv2.BORDER_REPLICATE,
|
25
|
+
"reflect": cv2.BORDER_DEFAULT,
|
26
|
+
"symmetric": cv2.BORDER_REFLECT,
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
def _is_tensor_image(img):
|
31
|
+
return torch.is_tensor(img) and img.ndimension() == 3
|
32
|
+
|
33
|
+
|
34
|
+
def _is_numpy_image(img):
|
35
|
+
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
36
|
+
|
37
|
+
|
38
|
+
def normalize(tensor, mean, std):
|
39
|
+
"""Normalize a tensor image with mean and standard deviation.
|
40
|
+
See ``Normalize`` for more details.
|
41
|
+
Args:
|
42
|
+
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
43
|
+
mean (sequence): Sequence of means for each channel.
|
44
|
+
std (sequence): Sequence of standard deviations for each channely.
|
45
|
+
Returns:
|
46
|
+
Tensor: Normalized Tensor image.
|
47
|
+
"""
|
48
|
+
if _is_tensor_image(tensor):
|
49
|
+
for t, m, s in zip(tensor, mean, std, strict=False):
|
50
|
+
t.sub_(m).div_(s)
|
51
|
+
return tensor
|
52
|
+
elif _is_numpy_image(tensor):
|
53
|
+
return (tensor.astype(np.float32) - 255.0 * np.array(mean)) / np.array(std)
|
54
|
+
else:
|
55
|
+
raise RuntimeError("Undefined type")
|
56
|
+
|
57
|
+
|
58
|
+
def resize(img, size, interpolation="BILINEAR"):
|
59
|
+
"""Resize the input CV Image to the given size.
|
60
|
+
Args:
|
61
|
+
img (np.ndarray): Image to be resized.
|
62
|
+
size (tuple or int): Desired output size. If size is a sequence like
|
63
|
+
(h, w), the output size will be matched to this. If size is an int,
|
64
|
+
the smaller edge of the image will be matched to this number maintaing
|
65
|
+
the aspect ratio. i.e, if height > width, then image will be rescaled to
|
66
|
+
(size * height / width, size)
|
67
|
+
interpolation (str, optional): Desired interpolation. Default is ``BILINEAR``
|
68
|
+
Returns:
|
69
|
+
cv Image: Resized image.
|
70
|
+
"""
|
71
|
+
if not _is_numpy_image(img):
|
72
|
+
raise TypeError("img should be CV Image. Got {}".format(type(img)))
|
73
|
+
if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
|
74
|
+
raise TypeError("Got inappropriate size arg: {}".format(size))
|
75
|
+
|
76
|
+
# TODO(Nikos): Try to remove the opencv dependency
|
77
|
+
if isinstance(size, int):
|
78
|
+
h, w, c = img.shape
|
79
|
+
if (w <= h and w == size) or (h <= w and h == size):
|
80
|
+
return img
|
81
|
+
if w < h:
|
82
|
+
ow = size
|
83
|
+
oh = int(size * h / w)
|
84
|
+
return cv2.resize(
|
85
|
+
img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
|
86
|
+
)
|
87
|
+
else:
|
88
|
+
oh = size
|
89
|
+
ow = int(size * w / h)
|
90
|
+
return cv2.resize(
|
91
|
+
img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
|
92
|
+
)
|
93
|
+
else:
|
94
|
+
oh, ow = size
|
95
|
+
return cv2.resize(
|
96
|
+
img, dsize=(int(ow), int(oh)), interpolation=INTER_MODE[interpolation]
|
97
|
+
)
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#
|
5
5
|
import json
|
6
6
|
import logging
|
7
|
+
import math
|
7
8
|
import statistics
|
8
9
|
|
9
10
|
import docling_ibm_models.tableformer.settings as s
|
@@ -403,45 +404,63 @@ class MatchingPostProcessor:
|
|
403
404
|
# Push horizontally
|
404
405
|
if x1_min < x2_min:
|
405
406
|
# Move box1 to the left and box2 to the right
|
406
|
-
box1["bbox"][2] -= overlap_x
|
407
|
-
box2["bbox"][0] += overlap_x
|
407
|
+
box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
408
|
+
box2["bbox"][0] += math.floor(overlap_x / 2)
|
408
409
|
else:
|
409
410
|
# Move box2 to the left and box1 to the right
|
410
|
-
box2["bbox"][2] -= overlap_x
|
411
|
-
box1["bbox"][0] += overlap_x
|
411
|
+
box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
412
|
+
box1["bbox"][0] += math.floor(overlap_x / 2)
|
412
413
|
else:
|
413
414
|
# Push vertically
|
414
415
|
if y1_min < y2_min:
|
415
416
|
# Move box1 up and box2 down
|
416
|
-
box1["bbox"][3] -= overlap_y
|
417
|
-
box2["bbox"][1] += overlap_y
|
417
|
+
box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
418
|
+
box2["bbox"][1] += math.floor(overlap_y / 2)
|
418
419
|
else:
|
419
420
|
# Move box2 up and box1 down
|
420
|
-
box2["bbox"][3] -= overlap_y
|
421
|
-
box1["bbox"][1] += overlap_y
|
421
|
+
box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
422
|
+
box1["bbox"][1] += math.floor(overlap_y / 2)
|
423
|
+
|
424
|
+
# Will flip coordinates in proper order, if previous operations reversed it
|
425
|
+
box1["bbox"] = [
|
426
|
+
min(box1["bbox"][0], box1["bbox"][2]),
|
427
|
+
min(box1["bbox"][1], box1["bbox"][3]),
|
428
|
+
max(box1["bbox"][0], box1["bbox"][2]),
|
429
|
+
max(box1["bbox"][1], box1["bbox"][3]),
|
430
|
+
]
|
431
|
+
box2["bbox"] = [
|
432
|
+
min(box2["bbox"][0], box2["bbox"][2]),
|
433
|
+
min(box2["bbox"][1], box2["bbox"][3]),
|
434
|
+
max(box2["bbox"][0], box2["bbox"][2]),
|
435
|
+
max(box2["bbox"][1], box2["bbox"][3]),
|
436
|
+
]
|
422
437
|
|
423
438
|
return box1, box2
|
424
439
|
|
425
440
|
def do_boxes_overlap(box1, box2):
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
441
|
+
B1 = box1["bbox"]
|
442
|
+
B2 = box2["bbox"]
|
443
|
+
if (
|
444
|
+
(B1[0] >= B2[2])
|
445
|
+
or (B1[2] <= B2[0])
|
446
|
+
or (B1[3] <= B2[1])
|
447
|
+
or (B1[1] >= B2[3])
|
448
|
+
):
|
432
449
|
return False
|
433
|
-
|
434
|
-
|
435
|
-
return False
|
436
|
-
return True
|
450
|
+
else:
|
451
|
+
return True
|
437
452
|
|
438
453
|
def find_overlapping_pairs_indexes(bboxes):
|
439
454
|
overlapping_indexes = []
|
440
455
|
# Compare each box with every other box (combinations)
|
441
456
|
for i in range(len(bboxes)):
|
442
457
|
for j in range(i + 1, len(bboxes)):
|
443
|
-
if
|
444
|
-
bboxes[i]
|
458
|
+
if i != j:
|
459
|
+
if bboxes[i] != bboxes[j]:
|
460
|
+
if do_boxes_overlap(bboxes[i], bboxes[j]):
|
461
|
+
bboxes[i], bboxes[j] = correct_overlap(
|
462
|
+
bboxes[i], bboxes[j]
|
463
|
+
)
|
445
464
|
|
446
465
|
return overlapping_indexes, bboxes
|
447
466
|
|
@@ -1144,7 +1163,7 @@ class MatchingPostProcessor:
|
|
1144
1163
|
new_pdf_cells.append(pdf_cells[i])
|
1145
1164
|
return new_pdf_cells
|
1146
1165
|
|
1147
|
-
def process(self, matching_details):
|
1166
|
+
def process(self, matching_details, correct_overlapping_cells=False):
|
1148
1167
|
r"""
|
1149
1168
|
Do post processing, see details in the comments below
|
1150
1169
|
|
@@ -1348,9 +1367,10 @@ class MatchingPostProcessor:
|
|
1348
1367
|
table_cells_wo = po2
|
1349
1368
|
max_cell_id = po3
|
1350
1369
|
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1370
|
+
if correct_overlapping_cells:
|
1371
|
+
# As the last step - correct cell bboxes in a way that they don't overlap:
|
1372
|
+
if len(table_cells_wo) <= 300: # For performance reasons
|
1373
|
+
table_cells_wo = self._find_overlapping(table_cells_wo)
|
1354
1374
|
|
1355
1375
|
self._log().debug("*** final_matches_wo")
|
1356
1376
|
self._log().debug(final_matches_wo)
|
@@ -127,13 +127,17 @@ class CellMatcher:
|
|
127
127
|
Dictionary with all details about the mathings between the table and pdf cells
|
128
128
|
"""
|
129
129
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
130
|
-
|
131
|
-
word
|
132
|
-
word["bbox"]
|
133
|
-
|
134
|
-
word["bbox"]
|
135
|
-
|
136
|
-
|
130
|
+
if len(pdf_cells) > 0:
|
131
|
+
for word in pdf_cells:
|
132
|
+
if isinstance(word["bbox"], list):
|
133
|
+
continue
|
134
|
+
elif isinstance(word["bbox"], dict):
|
135
|
+
word["bbox"] = [
|
136
|
+
word["bbox"]["l"],
|
137
|
+
word["bbox"]["t"],
|
138
|
+
word["bbox"]["r"],
|
139
|
+
word["bbox"]["b"],
|
140
|
+
]
|
137
141
|
table_bboxes = prediction["bboxes"]
|
138
142
|
table_classes = prediction["classes"]
|
139
143
|
# BBOXES transformed...
|
@@ -145,9 +149,13 @@ class CellMatcher:
|
|
145
149
|
table_cells = self._build_table_cells(
|
146
150
|
html_seq, otsl_seq, table_bboxes_page, table_classes
|
147
151
|
)
|
148
|
-
|
149
|
-
|
150
|
-
|
152
|
+
|
153
|
+
matches = {}
|
154
|
+
matches_counter = 0
|
155
|
+
if len(pdf_cells) > 0:
|
156
|
+
matches, matches_counter = self._intersection_over_pdf_match(
|
157
|
+
table_cells, pdf_cells
|
158
|
+
)
|
151
159
|
|
152
160
|
self._log().debug("matches_counter: {}".format(matches_counter))
|
153
161
|
|
@@ -188,13 +196,14 @@ class CellMatcher:
|
|
188
196
|
Dictionary with all details about the mathings between the table and pdf cells
|
189
197
|
"""
|
190
198
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
191
|
-
|
192
|
-
word
|
193
|
-
word["bbox"][
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
199
|
+
if len(pdf_cells) > 0:
|
200
|
+
for word in pdf_cells:
|
201
|
+
word["bbox"] = [
|
202
|
+
word["bbox"]["l"],
|
203
|
+
word["bbox"]["t"],
|
204
|
+
word["bbox"]["r"],
|
205
|
+
word["bbox"]["b"],
|
206
|
+
]
|
198
207
|
|
199
208
|
table_bboxes = prediction["bboxes"]
|
200
209
|
table_classes = prediction["classes"]
|