docling-ibm-models 1.3.0__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +0,0 @@
1
- #
2
- # Copyright IBM Corp. 2024 - 2024
3
- # SPDX-License-Identifier: MIT
4
- #
5
- import logging
6
-
7
- import docling_ibm_models.tableformer.common as c
8
- from docling_ibm_models.tableformer.data_management.tf_dataset import TFDataset
9
-
10
- LOG_LEVEL = logging.INFO
11
- # LOG_LEVEL = logging.DEBUG
12
-
13
-
14
- def dataset_test(config):
15
- r"""
16
- Parameters
17
- ----------
18
- config : dictionary
19
- The configuration settings
20
- """
21
-
22
- # model_type = config["model"]["type"]
23
- # Create the device and the Dataset
24
- device = "cpu"
25
- dataset = TFDataset(config, "train")
26
- dataset.set_device(device)
27
-
28
- # Loop over the data
29
- dataset.reset()
30
- dataset.shuffle()
31
- for i, batch in enumerate(dataset):
32
- print("Loading batch: {}".format(i))
33
-
34
-
35
- if __name__ == "__main__":
36
- config = c.parse_arguments()
37
- dataset_test(config)
@@ -1,99 +0,0 @@
1
- #
2
- # Copyright IBM Corp. 2024 - 2024
3
- # SPDX-License-Identifier: MIT
4
- #
5
- import glob
6
- import os
7
-
8
- import numpy as np
9
- from PIL import Image
10
-
11
- import docling_ibm_models.tableformer.common as c
12
- from docling_ibm_models.tableformer.data_management.data_transformer import (
13
- DataTransformer,
14
- )
15
-
16
-
17
- def dump_np(img_np: np.array, fn, n=6):
18
- # Expect to receive a numpy array for an image with the shape [channels, rows, columns]
19
- s = img_np.shape
20
- if s[0] not in [1, 2, 3, 4] or len(s) != 3:
21
- print("Image of invalid shape: {}".format(s))
22
- return
23
-
24
- channels = s[0]
25
- rows = s[1]
26
- cols = s[2]
27
- w = n + 6
28
- with open(fn, "w") as fd:
29
- for r in range(rows):
30
- for col in range(cols):
31
- for ch in range(channels):
32
- x = img_np[ch][r][col]
33
- if isinstance(x, np.float32):
34
- f_str = "0:>{}.{}f".format(w, n)
35
- elif isinstance(x, np.uint8):
36
- f_str = "0:>{}".format(w)
37
- else:
38
- return False
39
-
40
- x_str = ("{" + f_str + "}").format(x)
41
- fd.write(x_str)
42
- if ch < channels - 1:
43
- fd.write(" ")
44
- fd.write("\n")
45
- return True
46
-
47
-
48
- def dump_channels(save_dir, fn_prefix, img_np: np.array):
49
- # Dump the np array into 3 files per channel
50
- img_np_ch0 = img_np[0, :, :]
51
- img_np_ch1 = img_np[1, :, :]
52
- img_np_ch2 = img_np[2, :, :]
53
- txt_ch0_fn = os.path.join(save_dir, fn_prefix + "_ch0.txt")
54
- txt_ch1_fn = os.path.join(save_dir, fn_prefix + "_ch1.txt")
55
- txt_ch2_fn = os.path.join(save_dir, fn_prefix + "_ch2.txt")
56
- np.savetxt(txt_ch0_fn, img_np_ch0)
57
- np.savetxt(txt_ch1_fn, img_np_ch1)
58
- np.savetxt(txt_ch2_fn, img_np_ch2)
59
- print(f"{txt_ch0_fn}")
60
- print(f"{txt_ch1_fn}")
61
- print(f"{txt_ch2_fn}")
62
-
63
-
64
- def prepare_image(config):
65
- transformer = DataTransformer(config)
66
- predict_dir = config["predict"]["predict_dir"]
67
- use_normalization = config["dataset"]["image_normalization"]["state"]
68
-
69
- pattern = os.path.join(predict_dir, "*.png")
70
- for img_fn in glob.glob(pattern):
71
- print(f"img_fn: {img_fn}")
72
-
73
- with Image.open(img_fn) as img:
74
- # Dump the initial image in txt files
75
- img_np = np.array(img)
76
-
77
- # Reshape the image in order to print it
78
- img_np_m = np.moveaxis(img_np, 2, 0)
79
- print(
80
- "orig. img_np.shape: {}, reshaped image: {}".format(
81
- img_np.shape, img_np_m.shape
82
- )
83
- )
84
- original_fn = img_fn + "_python.txt"
85
- dump_np(img_np_m, original_fn)
86
-
87
- r_img_ten = transformer.rescale_in_memory(img, use_normalization)
88
- print("npimgc: {} - {}".format(r_img_ten.type(), r_img_ten.size()))
89
-
90
- # Dump the processed image tensor in txt files
91
- r_img_np = r_img_ten.numpy()
92
-
93
- prepared_fn = img_fn + "_python_prepared.txt"
94
- dump_np(r_img_np, prepared_fn)
95
-
96
-
97
- if __name__ == "__main__":
98
- config = c.parse_arguments()
99
- prepare_image(config)