docling-ibm-models 1.3.1__tar.gz → 1.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/PKG-INFO +1 -1
  2. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/common.py +0 -94
  3. docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/functional.py +97 -0
  4. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_predictor.py +5 -71
  5. docling_ibm_models-1.3.2/docling_ibm_models/tableformer/data_management/transforms.py +91 -0
  6. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +1 -1
  7. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/pyproject.toml +1 -1
  8. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -504
  9. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/data_management/functional.py +0 -574
  10. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -1233
  11. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/data_management/transforms.py +0 -396
  12. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/test_dataset_cache.py +0 -37
  13. docling_ibm_models-1.3.1/docling_ibm_models/tableformer/test_prepare_image.py +0 -99
  14. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/LICENSE +0 -0
  15. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/README.md +0 -0
  16. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  17. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/__init__.py +0 -0
  18. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  19. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  20. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  21. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  22. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  23. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  24. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  25. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  26. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  27. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  28. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/otsl.py +0 -0
  29. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/settings.py +0 -0
  30. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  31. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  32. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
  33. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  34. {docling_ibm_models-1.3.1 → docling_ibm_models-1.3.2}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -48,32 +48,6 @@ def validate_config(config):
48
48
  return True
49
49
 
50
50
 
51
- def parse_arguments():
52
- r"""
53
- Parse the input arguments
54
- A ValueError exception will be thrown in case the config file is invalid
55
- """
56
- parser = argparse.ArgumentParser(description="Train the TableModel")
57
- parser.add_argument(
58
- "-c", "--config", required=True, default=None, help="configuration file (JSON)"
59
- )
60
- args = parser.parse_args()
61
- config_filename = args.config
62
-
63
- assert os.path.isfile(config_filename), "FAILURE: Config file not found."
64
- return read_config(config_filename)
65
-
66
-
67
- def read_config(config_filename):
68
- with open(config_filename, "r") as fd:
69
- config = json.load(fd)
70
-
71
- # Validate the config file
72
- validate_config(config)
73
-
74
- return config
75
-
76
-
77
51
  def safe_get_parameter(input_dict, index_path, default=None, required=False):
78
52
  r"""
79
53
  Safe get parameter from a nested dictionary.
@@ -130,71 +104,3 @@ def get_prepared_data_filename(prepared_data_part, dataset_name):
130
104
  if "<POSTFIX>" in template:
131
105
  template = template.replace("<POSTFIX>", dataset_name)
132
106
  return template
133
-
134
-
135
- def create_dataset_and_model(config, purpose, fixed_padding=False):
136
- r"""
137
- Gets a model from configuration
138
-
139
- Parameters
140
- ---------
141
- config : Dictionary
142
- The configuration of the model
143
- purpose : string
144
- One of "train", "eval", "predict"
145
- fixed_padding : bool
146
- Parameter passed to the constructor of the DataLoader
147
-
148
- Returns
149
- -------
150
- In case a Model cannot be initialized return None, None, None. Otherwise:
151
-
152
- device : selected device
153
- dataset : Instance of the DataLoader
154
- model : Instance of the model
155
- """
156
- from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
157
-
158
- model_type = config["model"]["type"]
159
- model = None
160
-
161
- # Get env vars:
162
- use_cpu_only = os.environ.get("USE_CPU_ONLY", False)
163
- use_cuda_only = not use_cpu_only
164
-
165
- # Use the cpu for the evaluation
166
- device = "cpu" # Default, run on CPU
167
- num_gpus = torch.cuda.device_count() # Check if GPU is available
168
- if use_cuda_only:
169
- device = "cuda:0" if num_gpus > 0 else "cpu" # Run on first available GPU
170
- else:
171
- device = "cpu"
172
-
173
- # Create the DataLoader
174
- # loader = DataLoader(config, purpose, fixed_padding=fixed_padding)
175
- dataset = TFDataset(config, purpose, fixed_padding=fixed_padding)
176
- dataset.set_device(device)
177
- dataset_val = None
178
- if config["train"]["validation"] and purpose == "train":
179
- dataset_val = TFDataset(config, "val", fixed_padding=fixed_padding)
180
- dataset_val.set_device(device)
181
- if model_type == "TableModel04_rs":
182
- from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa: F401
183
- TableModel04_rs,
184
- )
185
- # Find the model class and create an instance of it
186
- for candidate in BaseModel.__subclasses__():
187
- if candidate.__name__ == model_type:
188
- init_data = dataset.get_init_data()
189
- model = candidate(config, init_data, purpose, device)
190
-
191
- if model is None:
192
- logger.warn("Not found model: " + str(model_type))
193
- return None, None, None
194
-
195
- logger.info("Found model: " + str(model_type))
196
-
197
- if purpose == s.PREDICT_PURPOSE:
198
- return device, dataset, model
199
- else:
200
- return device, dataset, dataset_val, model
@@ -0,0 +1,97 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ import numbers
6
+ from collections.abc import Iterable, Sequence
7
+
8
+ import cv2
9
+ import numpy as np
10
+ import torch
11
+ from torchvision.transforms import functional
12
+
13
+ cv2.setNumThreads(0)
14
+ cv2.ocl.setUseOpenCL(False)
15
+
16
+ INTER_MODE = {
17
+ "NEAREST": cv2.INTER_NEAREST,
18
+ "BILINEAR": cv2.INTER_LINEAR,
19
+ "BICUBIC": cv2.INTER_CUBIC,
20
+ }
21
+
22
+ PAD_MOD = {
23
+ "constant": cv2.BORDER_CONSTANT,
24
+ "edge": cv2.BORDER_REPLICATE,
25
+ "reflect": cv2.BORDER_DEFAULT,
26
+ "symmetric": cv2.BORDER_REFLECT,
27
+ }
28
+
29
+
30
+ def _is_tensor_image(img):
31
+ return torch.is_tensor(img) and img.ndimension() == 3
32
+
33
+
34
+ def _is_numpy_image(img):
35
+ return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
36
+
37
+
38
+ def normalize(tensor, mean, std):
39
+ """Normalize a tensor image with mean and standard deviation.
40
+ See ``Normalize`` for more details.
41
+ Args:
42
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
43
+ mean (sequence): Sequence of means for each channel.
44
+ std (sequence): Sequence of standard deviations for each channely.
45
+ Returns:
46
+ Tensor: Normalized Tensor image.
47
+ """
48
+ if _is_tensor_image(tensor):
49
+ for t, m, s in zip(tensor, mean, std, strict=False):
50
+ t.sub_(m).div_(s)
51
+ return tensor
52
+ elif _is_numpy_image(tensor):
53
+ return (tensor.astype(np.float32) - 255.0 * np.array(mean)) / np.array(std)
54
+ else:
55
+ raise RuntimeError("Undefined type")
56
+
57
+
58
+ def resize(img, size, interpolation="BILINEAR"):
59
+ """Resize the input CV Image to the given size.
60
+ Args:
61
+ img (np.ndarray): Image to be resized.
62
+ size (tuple or int): Desired output size. If size is a sequence like
63
+ (h, w), the output size will be matched to this. If size is an int,
64
+ the smaller edge of the image will be matched to this number maintaing
65
+ the aspect ratio. i.e, if height > width, then image will be rescaled to
66
+ (size * height / width, size)
67
+ interpolation (str, optional): Desired interpolation. Default is ``BILINEAR``
68
+ Returns:
69
+ cv Image: Resized image.
70
+ """
71
+ if not _is_numpy_image(img):
72
+ raise TypeError("img should be CV Image. Got {}".format(type(img)))
73
+ if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
74
+ raise TypeError("Got inappropriate size arg: {}".format(size))
75
+
76
+ # TODO(Nikos): Try to remove the opencv dependency
77
+ if isinstance(size, int):
78
+ h, w, c = img.shape
79
+ if (w <= h and w == size) or (h <= w and h == size):
80
+ return img
81
+ if w < h:
82
+ ow = size
83
+ oh = int(size * h / w)
84
+ return cv2.resize(
85
+ img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
86
+ )
87
+ else:
88
+ oh = size
89
+ ow = int(size * w / h)
90
+ return cv2.resize(
91
+ img, dsize=(ow, oh), interpolation=INTER_MODE[interpolation]
92
+ )
93
+ else:
94
+ oh, ow = size
95
+ return cv2.resize(
96
+ img, dsize=(int(ow), int(oh)), interpolation=INTER_MODE[interpolation]
97
+ )
@@ -12,7 +12,6 @@ import numpy as np
12
12
  import torch
13
13
 
14
14
  import docling_ibm_models.tableformer.common as c
15
- import docling_ibm_models.tableformer.data_management.functional as F
16
15
  import docling_ibm_models.tableformer.data_management.transforms as T
17
16
  import docling_ibm_models.tableformer.settings as s
18
17
  import docling_ibm_models.tableformer.utils.utils as u
@@ -21,6 +20,9 @@ from docling_ibm_models.tableformer.data_management.matching_post_processor impo
21
20
  )
22
21
  from docling_ibm_models.tableformer.data_management.tf_cell_matcher import CellMatcher
23
22
  from docling_ibm_models.tableformer.models.common.base_model import BaseModel
23
+ from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (
24
+ TableModel04_rs,
25
+ )
24
26
  from docling_ibm_models.tableformer.otsl import otsl_to_html
25
27
  from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
26
28
 
@@ -187,16 +189,7 @@ class TFPredictor:
187
189
  """
188
190
 
189
191
  self._model_type = self._config["model"]["type"]
190
- # Added import here to avoid loading turbotransformer library unnecessarily
191
- if self._model_type == "TableModel04_rs":
192
- from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa
193
- TableModel04_rs,
194
- )
195
- for candidate in BaseModel.__subclasses__():
196
- if candidate.__name__ == self._model_type:
197
- model = candidate(
198
- self._config, self._init_data, s.PREDICT_PURPOSE, self._device
199
- )
192
+ model = TableModel04_rs(self._config, self._init_data, self._device)
200
193
 
201
194
  if model is None:
202
195
  err_msg = "Not able to initiate a model for {}".format(self._model_type)
@@ -376,66 +369,6 @@ class TFPredictor:
376
369
 
377
370
  return new_bboxes
378
371
 
379
- def _pad_image(self, iocr_page):
380
- r"""
381
- Adds padding to the image
382
-
383
- Parameters
384
- ----------
385
- iocr_page : dict
386
- Docling provided table data
387
-
388
- Returns
389
- -------
390
- new_im: PIL image
391
- new, padded image
392
- new_image_ratio : float
393
- Ratio of padded image size to the original image size
394
- """
395
- _, old_iw, old_ih = iocr_page["image"].shape
396
-
397
- margin_i = self._padding_size # pixels
398
-
399
- desired_iw = old_iw + (margin_i * 2)
400
- desired_ih = old_ih + (margin_i * 2)
401
-
402
- # Ratio of new image size to the original image size
403
- new_image_ratio = desired_iw / old_iw
404
-
405
- bcolor = (255, 255, 255)
406
- # Create empty canvas of background color and desired size
407
- padded_image = F.pad(
408
- iocr_page["image"],
409
- (desired_iw, desired_ih, desired_iw, desired_ih),
410
- fill=bcolor,
411
- )
412
- return padded_image, new_image_ratio
413
-
414
- def _pre_process_image(self, iocr_page):
415
- r"""
416
- Pre-process table image in memory, before doing prediction
417
- Currently just removes from the image separate PDF cells that only contain "$" sign
418
- This is done to remove model confusion when dealing with financial reports
419
-
420
- Parameters
421
- ----------
422
- iocr_page : dict
423
- Docling provided table data
424
-
425
- Returns
426
- -------
427
- iocr_page["image"] : PIL image
428
- updated table image with "$" repainted
429
- new_image_ratio : float
430
- Ratio of padded image size to the original image size
431
- """
432
-
433
- new_image_ratio = 1.0
434
-
435
- ic, iw, ih = iocr_page["image"].shape
436
-
437
- return iocr_page["image"], new_image_ratio
438
-
439
372
  def _merge_tf_output(self, docling_output, pdf_cells):
440
373
  tf_output = []
441
374
  tf_cells_map = {}
@@ -519,6 +452,7 @@ class TFPredictor:
519
452
  sf = r
520
453
  dim = (width, int(h * r))
521
454
  # resize the image
455
+ # TODO(Nikos): Try to remove cv2 dependency
522
456
  resized = cv2.resize(image, dim, interpolation=inter)
523
457
  # return the resized image
524
458
  return resized, sf
@@ -0,0 +1,91 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ from __future__ import division
6
+
7
+ import collections
8
+ import numbers
9
+ import random
10
+
11
+ import torch
12
+
13
+ from docling_ibm_models.tableformer.data_management import functional as F
14
+
15
+
16
+ class Normalize(object):
17
+ """Normalize a tensor image with mean and standard deviation.
18
+ Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
19
+ will normalize each channel of the input ``torch.*Tensor`` i.e.
20
+ ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
21
+ Args:
22
+ mean (sequence): Sequence of means for each channel.
23
+ std (sequence): Sequence of standard deviations for each channel.
24
+ """
25
+
26
+ def __init__(self, mean, std):
27
+ self.mean = mean
28
+ self.std = std
29
+
30
+ def __call__(self, tensor, target=None):
31
+ """
32
+ Args:
33
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
34
+ Returns:
35
+ Tensor: Normalized Tensor image.
36
+ """
37
+ return F.normalize(tensor, self.mean, self.std), target
38
+
39
+ def __repr__(self):
40
+ return self.__class__.__name__ + "(mean={0}, std={1})".format(
41
+ self.mean, self.std
42
+ )
43
+
44
+
45
+ class Resize(object):
46
+ """Resize the input PIL Image to the given size.
47
+ Args:
48
+ size (sequence or int): Desired output size. If size is a sequence like
49
+ (h, w), output size will be matched to this. If size is an int,
50
+ smaller edge of the image will be matched to this number.
51
+ i.e, if height > width, then image will be rescaled to
52
+ (size * height / width, size)
53
+ interpolation (int, optional): Desired interpolation. Default is
54
+ ``BILINEAR``
55
+ """
56
+
57
+ def __init__(self, size, interpolation="BILINEAR"):
58
+ self.size = size
59
+ self.interpolation = interpolation
60
+
61
+ def __call__(self, img, target=None):
62
+ """
63
+ Args:
64
+ img (np.ndarray): Image to be scaled.
65
+ Returns:
66
+ np.ndarray: Rescaled image.
67
+ """
68
+ # Resize bboxes (in pixels)
69
+ x_scale = 0
70
+ y_scale = 0
71
+
72
+ if img.shape[1] > 0:
73
+ x_scale = self.size[0] / img.shape[1]
74
+ if img.shape[0] > 0:
75
+ y_scale = self.size[1] / img.shape[0]
76
+
77
+ # loop over bboxes
78
+ if target is not None:
79
+ if target["boxes"] is not None:
80
+ target_ = target.copy()
81
+ target_["boxes"][:, 0] = x_scale * target_["boxes"][:, 0]
82
+ target_["boxes"][:, 1] = y_scale * target_["boxes"][:, 1]
83
+ target_["boxes"][:, 2] = x_scale * target_["boxes"][:, 2]
84
+ target_["boxes"][:, 3] = y_scale * target_["boxes"][:, 3]
85
+ return F.resize(img, self.size, self.interpolation), target
86
+
87
+ def __repr__(self):
88
+ interpolate_str = self.interpolation
89
+ return self.__class__.__name__ + "(size={0}, interpolation={1})".format(
90
+ self.size, interpolate_str
91
+ )
@@ -26,7 +26,7 @@ class TableModel04_rs(BaseModel, nn.Module):
26
26
  TableNet04Model encoder, dual-decoder model with OTSL+ support
27
27
  """
28
28
 
29
- def __init__(self, config, init_data, purpose, device):
29
+ def __init__(self, config, init_data, device):
30
30
  super(TableModel04_rs, self).__init__(config, init_data, device)
31
31
 
32
32
  self._prof = config["predict"].get("profiling", False)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "1.3.1" # DO NOT EDIT, updated automatically
3
+ version = "1.3.2" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"