docling-ibm-models 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,6 @@ import numpy as np
12
12
  import torch
13
13
 
14
14
  import docling_ibm_models.tableformer.common as c
15
- import docling_ibm_models.tableformer.data_management.functional as F
16
15
  import docling_ibm_models.tableformer.data_management.transforms as T
17
16
  import docling_ibm_models.tableformer.settings as s
18
17
  import docling_ibm_models.tableformer.utils.utils as u
@@ -21,6 +20,9 @@ from docling_ibm_models.tableformer.data_management.matching_post_processor impo
21
20
  )
22
21
  from docling_ibm_models.tableformer.data_management.tf_cell_matcher import CellMatcher
23
22
  from docling_ibm_models.tableformer.models.common.base_model import BaseModel
23
+ from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (
24
+ TableModel04_rs,
25
+ )
24
26
  from docling_ibm_models.tableformer.otsl import otsl_to_html
25
27
  from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
26
28
 
@@ -187,16 +189,7 @@ class TFPredictor:
187
189
  """
188
190
 
189
191
  self._model_type = self._config["model"]["type"]
190
- # Added import here to avoid loading turbotransformer library unnecessarily
191
- if self._model_type == "TableModel04_rs":
192
- from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa
193
- TableModel04_rs,
194
- )
195
- for candidate in BaseModel.__subclasses__():
196
- if candidate.__name__ == self._model_type:
197
- model = candidate(
198
- self._config, self._init_data, s.PREDICT_PURPOSE, self._device
199
- )
192
+ model = TableModel04_rs(self._config, self._init_data, self._device)
200
193
 
201
194
  if model is None:
202
195
  err_msg = "Not able to initiate a model for {}".format(self._model_type)
@@ -376,66 +369,6 @@ class TFPredictor:
376
369
 
377
370
  return new_bboxes
378
371
 
379
- def _pad_image(self, iocr_page):
380
- r"""
381
- Adds padding to the image
382
-
383
- Parameters
384
- ----------
385
- iocr_page : dict
386
- Docling provided table data
387
-
388
- Returns
389
- -------
390
- new_im: PIL image
391
- new, padded image
392
- new_image_ratio : float
393
- Ratio of padded image size to the original image size
394
- """
395
- _, old_iw, old_ih = iocr_page["image"].shape
396
-
397
- margin_i = self._padding_size # pixels
398
-
399
- desired_iw = old_iw + (margin_i * 2)
400
- desired_ih = old_ih + (margin_i * 2)
401
-
402
- # Ratio of new image size to the original image size
403
- new_image_ratio = desired_iw / old_iw
404
-
405
- bcolor = (255, 255, 255)
406
- # Create empty canvas of background color and desired size
407
- padded_image = F.pad(
408
- iocr_page["image"],
409
- (desired_iw, desired_ih, desired_iw, desired_ih),
410
- fill=bcolor,
411
- )
412
- return padded_image, new_image_ratio
413
-
414
- def _pre_process_image(self, iocr_page):
415
- r"""
416
- Pre-process table image in memory, before doing prediction
417
- Currently just removes from the image separate PDF cells that only contain "$" sign
418
- This is done to remove model confusion when dealing with financial reports
419
-
420
- Parameters
421
- ----------
422
- iocr_page : dict
423
- Docling provided table data
424
-
425
- Returns
426
- -------
427
- iocr_page["image"] : PIL image
428
- updated table image with "$" repainted
429
- new_image_ratio : float
430
- Ratio of padded image size to the original image size
431
- """
432
-
433
- new_image_ratio = 1.0
434
-
435
- ic, iw, ih = iocr_page["image"].shape
436
-
437
- return iocr_page["image"], new_image_ratio
438
-
439
372
  def _merge_tf_output(self, docling_output, pdf_cells):
440
373
  tf_output = []
441
374
  tf_cells_map = {}
@@ -519,6 +452,7 @@ class TFPredictor:
519
452
  sf = r
520
453
  dim = (width, int(h * r))
521
454
  # resize the image
455
+ # TODO(Nikos): Try to remove cv2 dependency
522
456
  resized = cv2.resize(image, dim, interpolation=inter)
523
457
  # return the resized image
524
458
  return resized, sf
@@ -13,248 +13,6 @@ import torch
13
13
  from docling_ibm_models.tableformer.data_management import functional as F
14
14
 
15
15
 
16
- def box_cxcywh_to_xyxy(x):
17
- x_c, y_c, w, h = x.unbind(-1)
18
- b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
19
- return torch.stack(b, dim=-1)
20
-
21
-
22
- def box_xyxy_to_cxcywh(x):
23
- x0, y0, x1, y1 = x.unbind(-1)
24
- b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
25
- return torch.stack(b, dim=-1)
26
-
27
-
28
- class Lambda(object):
29
- """Apply a user-defined lambda as a transform.
30
- Attention: The multiprocessing used in dataloader of pytorch
31
- is not friendly with lambda function in Windows
32
- Args:
33
- lambd (function): Lambda/function to be used for transform.
34
- """
35
-
36
- def __init__(self, lambd):
37
- # assert isinstance(lambd, types.LambdaType)
38
- self.lambd = lambd
39
- # if 'Windows' in platform.system():
40
- # raise RuntimeError("Can't pickle lambda funciton in windows system")
41
-
42
- def __call__(self, img):
43
- return self.lambd(img)
44
-
45
- def __repr__(self):
46
- return self.__class__.__name__ + "()"
47
-
48
-
49
- class RandomTransforms(object):
50
- """Base class for a list of transformations with randomness
51
- Args:
52
- transforms (list or tuple): list of transformations
53
- """
54
-
55
- def __init__(self, transforms):
56
- assert isinstance(transforms, (list, tuple))
57
- self.transforms = transforms
58
-
59
- def __call__(self, *args, **kwargs):
60
- raise NotImplementedError()
61
-
62
- def __repr__(self):
63
- format_string = self.__class__.__name__ + "("
64
- for t in self.transforms:
65
- format_string += "\n"
66
- format_string += " {0}".format(t)
67
- format_string += "\n)"
68
- return format_string
69
-
70
-
71
- class RandomChoice(RandomTransforms):
72
- """Apply single transformation randomly picked from a list"""
73
-
74
- def __call__(self, img, target):
75
- t = random.choice(self.transforms)
76
- return t(img, target)
77
-
78
-
79
- class RandomCrop(object):
80
- def __init__(self, size, margin_crop):
81
- self.size = list(size)
82
- self.margin_crop = list(margin_crop)
83
- # margin_crop: w, h
84
-
85
- def __call__(self, img, target):
86
- # img (w,h,ch)
87
- image_height, image_width = img.shape[0], img.shape[1]
88
- """
89
- img (np.ndarray): Image to be cropped.
90
- x: Upper pixel coordinate.
91
- y: Left pixel coordinate.
92
- h: Height of the cropped image.
93
- w: Width of the cropped image.
94
- """
95
- if image_width > 0 and image_height > 0:
96
- cropped_image = F.crop(
97
- img,
98
- self.margin_crop[1],
99
- self.margin_crop[0],
100
- image_height - (self.margin_crop[1] * 2),
101
- image_width - (self.margin_crop[0] * 2),
102
- )
103
-
104
- target_ = target.copy()
105
- target_["boxes"][:, 0] = target_["boxes"][:, 0] - self.margin_crop[0]
106
- target_["boxes"][:, 1] = target_["boxes"][:, 1] - self.margin_crop[1]
107
- target_["boxes"][:, 2] = target_["boxes"][:, 2] - self.margin_crop[0]
108
- target_["boxes"][:, 3] = target_["boxes"][:, 3] - self.margin_crop[1]
109
- else:
110
- cropped_image = img
111
- return cropped_image, target_
112
-
113
-
114
- class RandomPad(object):
115
- def __init__(self, max_pad):
116
- self.max_pad = max_pad
117
-
118
- def __call__(self, img, target):
119
- pad_x = random.randint(0, self.max_pad)
120
- pad_y = random.randint(0, self.max_pad)
121
- pad_x1 = random.randint(0, self.max_pad)
122
- pad_y1 = random.randint(0, self.max_pad)
123
- img = img.copy()
124
- padded_image = F.pad(img, (pad_x, pad_y, pad_x1, pad_y1), fill=(255, 255, 255))
125
- target_ = target.copy()
126
- if target["boxes"] is not None:
127
- target_["boxes"][:, 0] = target_["boxes"][:, 0] + pad_x
128
- target_["boxes"][:, 1] = target_["boxes"][:, 1] + pad_y
129
- target_["boxes"][:, 2] = target_["boxes"][:, 2] + pad_x
130
- target_["boxes"][:, 3] = target_["boxes"][:, 3] + pad_y
131
- return padded_image, target_
132
-
133
-
134
- class ColorJitter(object):
135
- """Randomly change the brightness, contrast and saturation of an image.
136
- Args:
137
- brightness (float): How much to jitter brightness. brightness_factor
138
- is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
139
- contrast (float): How much to jitter contrast. contrast_factor
140
- is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
141
- saturation (float): How much to jitter saturation. saturation_factor
142
- is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
143
- hue(float): How much to jitter hue. hue_factor is chosen uniformly from
144
- [-hue, hue]. Should be >=0 and <= 0.5.
145
- """
146
-
147
- def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
148
-
149
- assert isinstance(brightness, float) or (
150
- isinstance(brightness, collections.Iterable) and len(brightness) == 2
151
- )
152
- assert isinstance(contrast, float) or (
153
- isinstance(contrast, collections.Iterable) and len(contrast) == 2
154
- )
155
- assert isinstance(saturation, float) or (
156
- isinstance(saturation, collections.Iterable) and len(saturation) == 2
157
- )
158
- assert isinstance(hue, float) or (
159
- isinstance(hue, collections.Iterable) and len(hue) == 2
160
- )
161
-
162
- self.brightness = brightness
163
- self.contrast = contrast
164
- self.saturation = saturation
165
- self.hue = hue
166
-
167
- @staticmethod
168
- def get_params(brightness, contrast, saturation, hue):
169
- """Get a randomized transform to be applied on image.
170
- Arguments are same as that of __init__.
171
- Returns:
172
- Transform which randomly adjusts brightness, contrast and
173
- saturation in a random order.
174
- """
175
- transforms = []
176
-
177
- if isinstance(brightness, numbers.Number):
178
-
179
- if brightness > 0:
180
- brightness_factor = random.uniform(
181
- max(0, 1 - brightness), 1 + brightness
182
- )
183
- transforms.append(
184
- Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
185
- )
186
-
187
- if contrast > 0:
188
- contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
189
- transforms.append(
190
- Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
191
- )
192
-
193
- if saturation > 0:
194
- saturation_factor = random.uniform(
195
- max(0, 1 - saturation), 1 + saturation
196
- )
197
- transforms.append(
198
- Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
199
- )
200
-
201
- if hue > 0:
202
- hue_factor = random.uniform(-hue, hue)
203
- transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
204
-
205
- else:
206
-
207
- if brightness[0] > 0 and brightness[1] > 0:
208
-
209
- brightness_factor = random.uniform(brightness[0], brightness[1])
210
- transforms.append(
211
- Lambda(lambda img: F.adjust_brightness(img, brightness_factor))
212
- )
213
-
214
- if contrast[0] > 0 and contrast[1] > 0:
215
-
216
- contrast_factor = random.uniform(contrast[0], contrast[1])
217
- transforms.append(
218
- Lambda(lambda img: F.adjust_contrast(img, contrast_factor))
219
- )
220
-
221
- if saturation[0] > 0 and saturation[1] > 0:
222
-
223
- saturation_factor = random.uniform(saturation[0], saturation[1])
224
- transforms.append(
225
- Lambda(lambda img: F.adjust_saturation(img, saturation_factor))
226
- )
227
-
228
- if hue[0] > 0 and hue[1] > 0:
229
- hue_factor = random.uniform(hue[0], hue[1])
230
- transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
231
-
232
- random.shuffle(transforms)
233
- transform = ComposeSingle(transforms)
234
-
235
- return transform
236
-
237
- def __call__(self, img, target):
238
- """
239
- Args:
240
- img (np.ndarray): Input image.
241
- Returns:
242
- np.ndarray: Color jittered image.
243
- """
244
- transform = self.get_params(
245
- self.brightness, self.contrast, self.saturation, self.hue
246
- )
247
- return transform(img), target
248
-
249
- def __repr__(self):
250
- format_string = self.__class__.__name__ + "("
251
- format_string += "brightness={0}".format(self.brightness)
252
- format_string += ", contrast={0}".format(self.contrast)
253
- format_string += ", saturation={0}".format(self.saturation)
254
- format_string += ", hue={0})".format(self.hue)
255
- return format_string
256
-
257
-
258
16
  class Normalize(object):
259
17
  """Normalize a tensor image with mean and standard deviation.
260
18
  Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
@@ -284,69 +42,6 @@ class Normalize(object):
284
42
  )
285
43
 
286
44
 
287
- class NoTransformation(object):
288
- """Do Nothing"""
289
-
290
- def __call__(self, img, target):
291
- return img, target
292
-
293
-
294
- class Compose(object):
295
- """Composes several transforms together.
296
- Args:
297
- transforms (list of ``Transform`` objects): list of transforms to compose.
298
- Example:
299
- >>> transforms.Compose([
300
- >>> transforms.CenterCrop(10),
301
- >>> transforms.ToTensor(),
302
- >>> ])
303
- """
304
-
305
- def __init__(self, transforms):
306
- self.transforms = transforms
307
-
308
- def __call__(self, img, target):
309
- for t in self.transforms:
310
- img, target = t(img, target)
311
- return img, target
312
-
313
- def __repr__(self):
314
- format_string = self.__class__.__name__ + "("
315
- for t in self.transforms:
316
- format_string += "\n"
317
- format_string += " {0}".format(t)
318
- format_string += "\n)"
319
- return format_string
320
-
321
-
322
- class ComposeSingle(object):
323
- """Composes several transforms together.
324
- Args:
325
- transforms (list of ``Transform`` objects): list of transforms to compose.
326
- Example:
327
- >>> transforms.Compose([
328
- >>> transforms.CenterCrop(10),
329
- >>> transforms.ToTensor(),
330
- >>> ])
331
- """
332
-
333
- def __init__(self, transforms):
334
- self.transforms = transforms
335
-
336
- def __call__(self, img):
337
- for t in self.transforms:
338
- img = t(img)
339
- return img
340
-
341
- def __repr__(self):
342
- format_string = self.__class__.__name__ + "("
343
- for t in self.transforms:
344
- format_string += "\n"
345
- format_string += " {0}".format(t)
346
- format_string += "\n)"
347
- return format_string
348
-
349
-
350
45
  class Resize(object):
351
46
  """Resize the input PIL Image to the given size.
352
47
  Args:
@@ -26,7 +26,7 @@ class TableModel04_rs(BaseModel, nn.Module):
26
26
  TableNet04Model encoder, dual-decoder model with OTSL+ support
27
27
  """
28
28
 
29
- def __init__(self, config, init_data, purpose, device):
29
+ def __init__(self, config, init_data, device):
30
30
  super(TableModel04_rs, self).__init__(config, init_data, device)
31
31
 
32
32
  self._prof = config["predict"].get("profiling", False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -1,32 +1,28 @@
1
1
  docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
2
2
  docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO9DhOcbvPS5k,6023
3
+ docling_ibm_models/tableformer/common.py,sha256=VLBQ_9JWl4EsmBMSftyooIXId8FN4iTVqTIho4eNZrg,3041
4
4
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
6
- docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
5
+ docling_ibm_models/tableformer/data_management/functional.py,sha256=kJntHEXFz2SP7obEcHyjAqZNZC9qh-U75MwUJALLADI,3143
7
6
  docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
8
7
  docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=GaBW5px3xX9JaHVASZArKiQ-qfrzX0oj-E_6P3-OvuU,21238
9
- docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
10
- docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=32rox4--vqFddCG6oJ1_RQpIoc8nmq4ADvPpgphVR60,40959
11
- docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
8
+ docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=J_AjIGnpT0SkSV12comBlDa8Ga86WnsyJvKkIok4ohs,38834
9
+ docling_ibm_models/tableformer/data_management/transforms.py,sha256=NNaz_7GI7FCVmu_rJuenqH5VfzRSljJHUHpNQQ8Mq3Q,2983
12
10
  docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
11
  docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
12
  docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
15
13
  docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
14
  docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
17
15
  docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
18
- docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=FtmWZNOKjQFLG5GtBCvvU23rWrIsDu3gqfcfl68soPg,12275
16
+ docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
19
17
  docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
20
18
  docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
21
19
  docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
22
- docling_ibm_models/tableformer/test_dataset_cache.py,sha256=zvVJvUnYz4GxAQfPUmLTHUbqj0Yhi2vwgOBnsRgt1rI,818
23
- docling_ibm_models/tableformer/test_prepare_image.py,sha256=oPmU93-yWIkCeUYulGQ1p676Vq-zcjw2EX24WA5lspA,3155
24
20
  docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
21
  docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
26
22
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
27
23
  docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
28
24
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
29
- docling_ibm_models-1.3.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
30
- docling_ibm_models-1.3.1.dist-info/METADATA,sha256=W7euvW9ItpwNBZbYXelzFY_O1sseHNe0HO4sJdO3Hbo,7088
31
- docling_ibm_models-1.3.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
- docling_ibm_models-1.3.1.dist-info/RECORD,,
25
+ docling_ibm_models-1.3.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
+ docling_ibm_models-1.3.2.dist-info/METADATA,sha256=JoQmgI44L9riX-SDDwu0w9rWu9l4hsuKoaGok5cnoHE,7088
27
+ docling_ibm_models-1.3.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
+ docling_ibm_models-1.3.2.dist-info/RECORD,,