docling-ibm-models 1.1.6__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling_ibm_models/layoutmodel/layout_predictor.py +33 -25
- docling_ibm_models/tableformer/data_management/matching_post_processor.py +44 -24
- docling_ibm_models/tableformer/data_management/tf_predictor.py +21 -5
- docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +5 -0
- docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +4 -4
- docling_ibm_models/tableformer/otsl.py +3 -0
- docling_ibm_models/tableformer/utils/app_profiler.py +12 -1
- docling_ibm_models/tableformer/utils/mem_monitor.py +175 -0
- {docling_ibm_models-1.1.6.dist-info → docling_ibm_models-1.2.0.dist-info}/LICENSE +1 -1
- {docling_ibm_models-1.1.6.dist-info → docling_ibm_models-1.2.0.dist-info}/METADATA +1 -1
- {docling_ibm_models-1.1.6.dist-info → docling_ibm_models-1.2.0.dist-info}/RECORD +12 -12
- docling_ibm_models/tableformer/utils/variance.py +0 -175
- {docling_ibm_models-1.1.6.dist-info → docling_ibm_models-1.2.0.dist-info}/WHEEL +0 -0
@@ -14,29 +14,6 @@ MODEL_CHECKPOINT_FN = "model.pt"
|
|
14
14
|
DEFAULT_NUM_THREADS = 4
|
15
15
|
|
16
16
|
|
17
|
-
# Classes:
|
18
|
-
CLASSES_MAP = {
|
19
|
-
0: "background",
|
20
|
-
1: "Caption",
|
21
|
-
2: "Footnote",
|
22
|
-
3: "Formula",
|
23
|
-
4: "List-item",
|
24
|
-
5: "Page-footer",
|
25
|
-
6: "Page-header",
|
26
|
-
7: "Picture",
|
27
|
-
8: "Section-header",
|
28
|
-
9: "Table",
|
29
|
-
10: "Text",
|
30
|
-
11: "Title",
|
31
|
-
12: "Document Index",
|
32
|
-
13: "Code",
|
33
|
-
14: "Checkbox-Selected",
|
34
|
-
15: "Checkbox-Unselected",
|
35
|
-
16: "Form",
|
36
|
-
17: "Key-Value Region",
|
37
|
-
}
|
38
|
-
|
39
|
-
|
40
17
|
class LayoutPredictor:
|
41
18
|
r"""
|
42
19
|
Document layout prediction using ONNX
|
@@ -69,6 +46,31 @@ class LayoutPredictor:
|
|
69
46
|
------
|
70
47
|
FileNotFoundError when the model's ONNX file is missing
|
71
48
|
"""
|
49
|
+
# Initialize classes map:
|
50
|
+
self._classes_map = {
|
51
|
+
0: "background",
|
52
|
+
1: "Caption",
|
53
|
+
2: "Footnote",
|
54
|
+
3: "Formula",
|
55
|
+
4: "List-item",
|
56
|
+
5: "Page-footer",
|
57
|
+
6: "Page-header",
|
58
|
+
7: "Picture",
|
59
|
+
8: "Section-header",
|
60
|
+
9: "Table",
|
61
|
+
10: "Text",
|
62
|
+
11: "Title",
|
63
|
+
12: "Document Index",
|
64
|
+
13: "Code",
|
65
|
+
14: "Checkbox-Selected",
|
66
|
+
15: "Checkbox-Unselected",
|
67
|
+
16: "Form",
|
68
|
+
17: "Key-Value Region",
|
69
|
+
}
|
70
|
+
|
71
|
+
# Blacklisted classes
|
72
|
+
self._black_classes = set(["Form", "Key-Value Region"])
|
73
|
+
|
72
74
|
# Set basic params
|
73
75
|
self._threshold = 0.6 # Score threshold
|
74
76
|
self._image_size = 640
|
@@ -159,13 +161,19 @@ class LayoutPredictor:
|
|
159
161
|
)
|
160
162
|
|
161
163
|
# Yield output
|
162
|
-
for
|
164
|
+
for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
|
165
|
+
# Filter out blacklisted classes
|
166
|
+
label = self._classes_map[label_idx]
|
167
|
+
if label in self._black_classes:
|
168
|
+
continue
|
169
|
+
|
170
|
+
# Check against threshold
|
163
171
|
if score > self._threshold:
|
164
172
|
yield {
|
165
173
|
"l": box[0] / self._image_size * w,
|
166
174
|
"t": box[1] / self._image_size * h,
|
167
175
|
"r": box[2] / self._image_size * w,
|
168
176
|
"b": box[3] / self._image_size * h,
|
169
|
-
"label":
|
177
|
+
"label": label,
|
170
178
|
"confidence": score,
|
171
179
|
}
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#
|
5
5
|
import json
|
6
6
|
import logging
|
7
|
+
import math
|
7
8
|
import statistics
|
8
9
|
|
9
10
|
import docling_ibm_models.tableformer.settings as s
|
@@ -403,45 +404,63 @@ class MatchingPostProcessor:
|
|
403
404
|
# Push horizontally
|
404
405
|
if x1_min < x2_min:
|
405
406
|
# Move box1 to the left and box2 to the right
|
406
|
-
box1["bbox"][2] -= overlap_x
|
407
|
-
box2["bbox"][0] += overlap_x
|
407
|
+
box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
408
|
+
box2["bbox"][0] += math.floor(overlap_x / 2)
|
408
409
|
else:
|
409
410
|
# Move box2 to the left and box1 to the right
|
410
|
-
box2["bbox"][2] -= overlap_x
|
411
|
-
box1["bbox"][0] += overlap_x
|
411
|
+
box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
|
412
|
+
box1["bbox"][0] += math.floor(overlap_x / 2)
|
412
413
|
else:
|
413
414
|
# Push vertically
|
414
415
|
if y1_min < y2_min:
|
415
416
|
# Move box1 up and box2 down
|
416
|
-
box1["bbox"][3] -= overlap_y
|
417
|
-
box2["bbox"][1] += overlap_y
|
417
|
+
box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
418
|
+
box2["bbox"][1] += math.floor(overlap_y / 2)
|
418
419
|
else:
|
419
420
|
# Move box2 up and box1 down
|
420
|
-
box2["bbox"][3] -= overlap_y
|
421
|
-
box1["bbox"][1] += overlap_y
|
421
|
+
box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
|
422
|
+
box1["bbox"][1] += math.floor(overlap_y / 2)
|
423
|
+
|
424
|
+
# Will flip coordinates in proper order, if previous operations reversed it
|
425
|
+
box1["bbox"] = [
|
426
|
+
min(box1["bbox"][0], box1["bbox"][2]),
|
427
|
+
min(box1["bbox"][1], box1["bbox"][3]),
|
428
|
+
max(box1["bbox"][0], box1["bbox"][2]),
|
429
|
+
max(box1["bbox"][1], box1["bbox"][3]),
|
430
|
+
]
|
431
|
+
box2["bbox"] = [
|
432
|
+
min(box2["bbox"][0], box2["bbox"][2]),
|
433
|
+
min(box2["bbox"][1], box2["bbox"][3]),
|
434
|
+
max(box2["bbox"][0], box2["bbox"][2]),
|
435
|
+
max(box2["bbox"][1], box2["bbox"][3]),
|
436
|
+
]
|
422
437
|
|
423
438
|
return box1, box2
|
424
439
|
|
425
440
|
def do_boxes_overlap(box1, box2):
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
441
|
+
B1 = box1["bbox"]
|
442
|
+
B2 = box2["bbox"]
|
443
|
+
if (
|
444
|
+
(B1[0] >= B2[2])
|
445
|
+
or (B1[2] <= B2[0])
|
446
|
+
or (B1[3] <= B2[1])
|
447
|
+
or (B1[1] >= B2[3])
|
448
|
+
):
|
432
449
|
return False
|
433
|
-
|
434
|
-
|
435
|
-
return False
|
436
|
-
return True
|
450
|
+
else:
|
451
|
+
return True
|
437
452
|
|
438
453
|
def find_overlapping_pairs_indexes(bboxes):
|
439
454
|
overlapping_indexes = []
|
440
455
|
# Compare each box with every other box (combinations)
|
441
456
|
for i in range(len(bboxes)):
|
442
457
|
for j in range(i + 1, len(bboxes)):
|
443
|
-
if
|
444
|
-
bboxes[i]
|
458
|
+
if i != j:
|
459
|
+
if bboxes[i] != bboxes[j]:
|
460
|
+
if do_boxes_overlap(bboxes[i], bboxes[j]):
|
461
|
+
bboxes[i], bboxes[j] = correct_overlap(
|
462
|
+
bboxes[i], bboxes[j]
|
463
|
+
)
|
445
464
|
|
446
465
|
return overlapping_indexes, bboxes
|
447
466
|
|
@@ -1144,7 +1163,7 @@ class MatchingPostProcessor:
|
|
1144
1163
|
new_pdf_cells.append(pdf_cells[i])
|
1145
1164
|
return new_pdf_cells
|
1146
1165
|
|
1147
|
-
def process(self, matching_details):
|
1166
|
+
def process(self, matching_details, correct_overlapping_cells=False):
|
1148
1167
|
r"""
|
1149
1168
|
Do post processing, see details in the comments below
|
1150
1169
|
|
@@ -1348,9 +1367,10 @@ class MatchingPostProcessor:
|
|
1348
1367
|
table_cells_wo = po2
|
1349
1368
|
max_cell_id = po3
|
1350
1369
|
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1370
|
+
if correct_overlapping_cells:
|
1371
|
+
# As the last step - correct cell bboxes in a way that they don't overlap:
|
1372
|
+
if len(table_cells_wo) <= 300: # For performance reasons
|
1373
|
+
table_cells_wo = self._find_overlapping(table_cells_wo)
|
1354
1374
|
|
1355
1375
|
self._log().debug("*** final_matches_wo")
|
1356
1376
|
self._log().debug(final_matches_wo)
|
@@ -523,8 +523,9 @@ class TFPredictor:
|
|
523
523
|
# return the resized image
|
524
524
|
return resized, sf
|
525
525
|
|
526
|
-
def multi_table_predict(
|
527
|
-
|
526
|
+
def multi_table_predict(
|
527
|
+
self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
|
528
|
+
):
|
528
529
|
multi_tf_output = []
|
529
530
|
page_image = iocr_page["image"]
|
530
531
|
|
@@ -546,7 +547,12 @@ class TFPredictor:
|
|
546
547
|
# Predict
|
547
548
|
if do_matching:
|
548
549
|
tf_responses, predict_details = self.predict(
|
549
|
-
iocr_page,
|
550
|
+
iocr_page,
|
551
|
+
table_bbox,
|
552
|
+
table_image,
|
553
|
+
scale_factor,
|
554
|
+
None,
|
555
|
+
correct_overlapping_cells,
|
550
556
|
)
|
551
557
|
else:
|
552
558
|
tf_responses, predict_details = self.predict_dummy(
|
@@ -733,7 +739,13 @@ class TFPredictor:
|
|
733
739
|
return tf_output, matching_details
|
734
740
|
|
735
741
|
def predict(
|
736
|
-
self,
|
742
|
+
self,
|
743
|
+
iocr_page,
|
744
|
+
table_bbox,
|
745
|
+
table_image,
|
746
|
+
scale_factor,
|
747
|
+
eval_res_preds=None,
|
748
|
+
correct_overlapping_cells=False,
|
737
749
|
):
|
738
750
|
r"""
|
739
751
|
Predict the table out of an image in memory
|
@@ -744,6 +756,8 @@ class TFPredictor:
|
|
744
756
|
Docling provided table data
|
745
757
|
eval_res_preds : dict
|
746
758
|
Ready predictions provided by the evaluation results
|
759
|
+
correct_overlapping_cells : boolean
|
760
|
+
Enables or disables last post-processing step, that fixes cell bboxes to remove overlap
|
747
761
|
|
748
762
|
Returns
|
749
763
|
-------
|
@@ -834,7 +848,9 @@ class TFPredictor:
|
|
834
848
|
): # There are at least some pdf cells to match with
|
835
849
|
if self.enable_post_process:
|
836
850
|
AggProfiler().begin("post_process", self._prof)
|
837
|
-
matching_details = self._post_processor.process(
|
851
|
+
matching_details = self._post_processor.process(
|
852
|
+
matching_details, correct_overlapping_cells
|
853
|
+
)
|
838
854
|
AggProfiler().end("post_process", self._prof)
|
839
855
|
|
840
856
|
# Generate the expected Docling responses
|
@@ -157,7 +157,12 @@ class BBoxDecoder(nn.Module):
|
|
157
157
|
predictions_classes.append(self._class_embed(h))
|
158
158
|
if len(predictions_bboxes) > 0:
|
159
159
|
predictions_bboxes = torch.stack([x[0] for x in predictions_bboxes])
|
160
|
+
else:
|
161
|
+
predictions_bboxes = torch.empty(0)
|
162
|
+
|
160
163
|
if len(predictions_classes) > 0:
|
161
164
|
predictions_classes = torch.stack([x[0] for x in predictions_classes])
|
165
|
+
else:
|
166
|
+
predictions_classes = torch.empty(0)
|
162
167
|
|
163
168
|
return predictions_classes, predictions_bboxes
|
@@ -149,11 +149,11 @@ class Tag_Transformer(nn.Module):
|
|
149
149
|
self._positional_encoding = PositionalEncoding(embed_dim)
|
150
150
|
self._td_encode = td_encode
|
151
151
|
|
152
|
+
encoder_layer = nn.TransformerEncoderLayer(
|
153
|
+
d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
|
154
|
+
)
|
152
155
|
self._encoder = nn.TransformerEncoder(
|
153
|
-
|
154
|
-
d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
|
155
|
-
),
|
156
|
-
num_layers=encoder_layers,
|
156
|
+
encoder_layer, num_layers=encoder_layers, enable_nested_tensor=False
|
157
157
|
)
|
158
158
|
|
159
159
|
self._decoder = TMTransformerDecoder(
|
@@ -123,6 +123,9 @@ def otsl_check_right(rs_split, x, y):
|
|
123
123
|
|
124
124
|
|
125
125
|
def otsl_to_html(rs_list, logdebug):
|
126
|
+
if len(rs_list) == 0:
|
127
|
+
return []
|
128
|
+
|
126
129
|
if rs_list[0] not in ["fcel", "ched", "rhed", "srow", "ecel"]:
|
127
130
|
# Most likely already HTML...
|
128
131
|
return rs_list
|
@@ -6,6 +6,8 @@ import time
|
|
6
6
|
from collections import deque
|
7
7
|
from statistics import mean, median
|
8
8
|
|
9
|
+
from docling_ibm_models.tableformer.utils.mem_monitor import MemMonitor
|
10
|
+
|
9
11
|
|
10
12
|
class SingletonClass(type):
|
11
13
|
r"""
|
@@ -37,11 +39,13 @@ class Profiler:
|
|
37
39
|
def __init__(self):
|
38
40
|
self._section_dts = {} # section name -> sum(section intervals)
|
39
41
|
self._section_calls = {} # section name -> number of invocations
|
40
|
-
self._section_kB = {} # section name -> max kB of used heap
|
42
|
+
self._section_kB = {} # section name -> max kB of used heap (resident set size)
|
41
43
|
|
42
44
|
# section name -> beginning of the last interval
|
43
45
|
self._last_begin = {}
|
44
46
|
|
47
|
+
self._mem_monitor = MemMonitor()
|
48
|
+
|
45
49
|
def begin(self, section_name, enable=True):
|
46
50
|
r"""
|
47
51
|
Mark the beginning of an interval
|
@@ -83,13 +87,20 @@ class Profiler:
|
|
83
87
|
if section_name not in self._last_begin:
|
84
88
|
return False
|
85
89
|
|
90
|
+
# Get memory
|
91
|
+
kB = self._mem_monitor.get_memory()
|
92
|
+
if isinstance(kB, dict):
|
93
|
+
kB = kB["resident"]
|
94
|
+
|
86
95
|
dt = time.time() - self._last_begin[section_name]
|
87
96
|
if section_name not in self._section_dts:
|
88
97
|
self._section_dts[section_name] = dt
|
89
98
|
self._section_calls[section_name] = 1
|
99
|
+
self._section_kB[section_name] = kB
|
90
100
|
else:
|
91
101
|
self._section_dts[section_name] += dt
|
92
102
|
self._section_calls[section_name] += 1
|
103
|
+
self._section_kB[section_name] = max(kB, self._section_kB[section_name])
|
93
104
|
|
94
105
|
return True
|
95
106
|
|
@@ -0,0 +1,175 @@
|
|
1
|
+
#
|
2
|
+
# Copyright IBM Corp. 2024 - 2024
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
#
|
5
|
+
import os
|
6
|
+
import platform
|
7
|
+
import re
|
8
|
+
|
9
|
+
|
10
|
+
class MemMonitor:
|
11
|
+
r"""
|
12
|
+
Memory monitor for Linux
|
13
|
+
|
14
|
+
It supports 2 approaches for extracting memory information:
|
15
|
+
- linux-native: It parse the `/proc` pseudo-files. It is available only for Linux
|
16
|
+
- psutil: Use the `psutil` library
|
17
|
+
|
18
|
+
## Linux-Native approach
|
19
|
+
|
20
|
+
The linux-native approach implements 2 methods to extract the memory fields:
|
21
|
+
|
22
|
+
1. The `get_memory()` method:
|
23
|
+
|
24
|
+
- It is very fast
|
25
|
+
- It parses the `/proc/<pid>/statm` pseudo-file
|
26
|
+
- It Contains the following fields:
|
27
|
+
size (1) total program size
|
28
|
+
(same as VmSize in /proc/[pid]/status)
|
29
|
+
resident (2) resident set size
|
30
|
+
(same as VmRSS in /proc/[pid]/status)
|
31
|
+
shared (3) number of resident shared pages (i.e., backed by a file)
|
32
|
+
(same as RssFile+RssShmem in /proc/[pid]/status)
|
33
|
+
text (4) text (code)
|
34
|
+
lib (5) library (unused since Linux 2.6; always 0)
|
35
|
+
data (6) data + stack
|
36
|
+
dt (7) dirty pages (unused since Linux 2.6; always 0)
|
37
|
+
|
38
|
+
|
39
|
+
2. The `get_memory_full()` method:
|
40
|
+
|
41
|
+
- It is slower to parse but contains more detailed information
|
42
|
+
- It uses regex to parse the `/proc/<pid>/status` pseudo-file
|
43
|
+
- It contains the following fields:
|
44
|
+
VmPeak: Peak virtual memory size.
|
45
|
+
VmSize: Virtual memory size.
|
46
|
+
VmLck: Locked memory size (see mlock(2)).
|
47
|
+
VmPin: Pinned memory size (since Linux 3.2). These are pages that can't be moved because
|
48
|
+
something needs to directly access physical memory.
|
49
|
+
VmHWM: Peak resident set size ("high water mark").
|
50
|
+
VmRSS: Resident set size. Note that the value here is the sum of RssAnon, RssFile, and
|
51
|
+
RssShmem.
|
52
|
+
RssAnon: Size of resident anonymous memory. (since Linux 4.5).
|
53
|
+
RssFile: Size of resident file mappings. (since Linux 4.5).
|
54
|
+
RssShmem: Size of resident shared memory (includes System V shared memory, mappings from
|
55
|
+
tmpfs(5), and shared anonymous mappings). (since Linux 4.5).
|
56
|
+
VmData, VmStk, VmExe: Size of data, stack, and text segments.
|
57
|
+
VmLib: Shared library code size.
|
58
|
+
VmPTE: Page table entries size (since Linux 2.6.10).
|
59
|
+
VmPMD: Size of second-level page tables (added in Linux 4.0; removed in Linux 4.15).
|
60
|
+
VmSwap: Swapped-out virtual memory size by anonymous private pages; shmem swap usage is
|
61
|
+
not included (since Linux 2.6.34).
|
62
|
+
|
63
|
+
|
64
|
+
## The psutil library
|
65
|
+
|
66
|
+
- Apparently the psutil library parses the `/proc/<pid>/statm`
|
67
|
+
- The memory_info() function returns the fields: rss, vms, shared, text, lib, data, dirty
|
68
|
+
|
69
|
+
|
70
|
+
## Field mappings
|
71
|
+
|
72
|
+
These are the fields returned by psutil memory_info() and their mapping in the /proc files:
|
73
|
+
(I put ? when I am not 100% about the mapping)
|
74
|
+
|
75
|
+
| psutil | /proc/$$/status | /proc/$$/statm |
|
76
|
+
|---------|--------------------|----------------|
|
77
|
+
| rss | VmRSS | resident |
|
78
|
+
| vms | VmSize | size |
|
79
|
+
| shared | RssFile + RssShmem | shared |
|
80
|
+
| text | VmExe ? | text |
|
81
|
+
| lib | RssShmem ? | lib |
|
82
|
+
| data | VmData + VmStk | data |
|
83
|
+
| dirty | VmSwap ? | dt |
|
84
|
+
|
85
|
+
"""
|
86
|
+
|
87
|
+
def __init__(self, enable=True):
|
88
|
+
self._enable = enable
|
89
|
+
self._pid = os.getpid()
|
90
|
+
|
91
|
+
# Create regex for each memory field of the /proc/status pseudo-file
|
92
|
+
self._status_fields = [
|
93
|
+
"VmPeak",
|
94
|
+
"VmSize",
|
95
|
+
"VmLck",
|
96
|
+
"VmPin",
|
97
|
+
"VmHWM",
|
98
|
+
"VmRSS",
|
99
|
+
"RssAnon",
|
100
|
+
"RssFile",
|
101
|
+
"RssShmem",
|
102
|
+
"VmData",
|
103
|
+
"VmStk",
|
104
|
+
"VmExe",
|
105
|
+
"VmLib",
|
106
|
+
"VmPTE",
|
107
|
+
"VmPMD",
|
108
|
+
"VmSwap",
|
109
|
+
]
|
110
|
+
self._status_regex = {}
|
111
|
+
for mem_field in self._status_fields:
|
112
|
+
regex_str = r"({}:)(\s+)(\d*)(.*)".format(mem_field)
|
113
|
+
self._status_regex[mem_field] = re.compile(regex_str)
|
114
|
+
|
115
|
+
def get_memory_full(self) -> dict:
|
116
|
+
r"""
|
117
|
+
- Parse /proc/<pid>status to get all memory info.
|
118
|
+
- The method returns a dict with the fields self._status_fields
|
119
|
+
- This method is SLOW. Unless you need the full memory info, better to use `get_memory`
|
120
|
+
|
121
|
+
The returned values are in kB
|
122
|
+
"""
|
123
|
+
if not self._enable:
|
124
|
+
return -2
|
125
|
+
if platform.system() != "Linux":
|
126
|
+
return -1
|
127
|
+
pid_fn = "/proc/{}/status".format(self._pid)
|
128
|
+
|
129
|
+
# Dict to collect all memory fields
|
130
|
+
memory = {}
|
131
|
+
with open(pid_fn, "r") as fn:
|
132
|
+
for ll in fn:
|
133
|
+
for mem_field in self._status_fields:
|
134
|
+
regex = self._status_regex[mem_field]
|
135
|
+
m = regex.match(ll)
|
136
|
+
if m is not None:
|
137
|
+
memory[mem_field] = int(m.group(3))
|
138
|
+
if len(memory) == len(self._status_fields):
|
139
|
+
break
|
140
|
+
|
141
|
+
return memory
|
142
|
+
|
143
|
+
def get_memory(self) -> dict:
|
144
|
+
r"""
|
145
|
+
- Parse /proc/<pid>statm to get the most important memory fields
|
146
|
+
- This is a fast implementation.
|
147
|
+
- The method returns a dict with the fields:
|
148
|
+
"size", "resident", "shared", "text", "lib", "data", "dt"
|
149
|
+
- Check the documentation at the top for a mapping across the various fields
|
150
|
+
|
151
|
+
The returned values are in kB
|
152
|
+
"""
|
153
|
+
if not self._enable:
|
154
|
+
return -2
|
155
|
+
if platform.system() != "Linux":
|
156
|
+
return -1
|
157
|
+
pid_fn = "/proc/{}/statm".format(self._pid)
|
158
|
+
|
159
|
+
# Dict to collect all memory fields
|
160
|
+
memory = {}
|
161
|
+
with open(pid_fn, "r") as fn:
|
162
|
+
ll = fn.read()
|
163
|
+
# The values are in pages
|
164
|
+
# Each page is 4096 bytes (4kB)
|
165
|
+
data = [int(x) << 2 for x in ll.split(" ")]
|
166
|
+
memory = {
|
167
|
+
"size": data[0],
|
168
|
+
"resident": data[1],
|
169
|
+
"shared": data[2],
|
170
|
+
"text": data[3],
|
171
|
+
"lib": data[4],
|
172
|
+
"data": data[5],
|
173
|
+
"dt": data[6],
|
174
|
+
}
|
175
|
+
return memory
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.0
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -1,32 +1,32 @@
|
|
1
|
-
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=
|
1
|
+
docling_ibm_models/layoutmodel/layout_predictor.py,sha256=JHZbh6HyA2fLqaN0p9Lv3Y9P9dgkeHUqQI-JyyetocE,6042
|
2
2
|
docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
docling_ibm_models/tableformer/common.py,sha256=RV2ptqgkfz1OIoN-WqiSeln0pkZ_7zTO9DhOcbvPS5k,6023
|
4
4
|
docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
docling_ibm_models/tableformer/data_management/data_transformer.py,sha256=lNKkAk0VALbixapCuDDSIQKtA0QPCGQF8AGO3D64new,18263
|
6
6
|
docling_ibm_models/tableformer/data_management/functional.py,sha256=UrXsEm4DSc1QXdUPb0tZ7nvbg7mGVjpQhX3pGL6C5bA,20633
|
7
|
-
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256
|
7
|
+
docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
|
8
8
|
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=kzOjSmXkYrxc0de8wHbDJMvwKXelxYf4OccHTRqnpco,21081
|
9
9
|
docling_ibm_models/tableformer/data_management/tf_dataset.py,sha256=6_qSsYt6qoE2JBzUNrJfCDX3Kgg7tyrv3kimGLdEQ5o,49890
|
10
|
-
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=
|
10
|
+
docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=Ha--59Rfs3V78p3q__q5cuEoewrTld18qhX8VqAQrYc,39730
|
11
11
|
docling_ibm_models/tableformer/data_management/transforms.py,sha256=_i1HXkX8LAuHbeGRrg8kF9yFNJRQZOKmWzxKt559ABQ,13268
|
12
12
|
docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
docling_ibm_models/tableformer/models/common/base_model.py,sha256=SbCjeEvDmGnyoKYhB5pYeg2LFVQdArglfrhqkuW1nUw,10030
|
15
15
|
docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=
|
16
|
+
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
|
17
17
|
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
|
18
18
|
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=7iGkrTNLzjC1yn1zuA3N6DvBvbrcO_BR5tmHG3RKmXs,12159
|
19
|
-
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=
|
20
|
-
docling_ibm_models/tableformer/otsl.py,sha256=
|
19
|
+
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
|
20
|
+
docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
|
21
21
|
docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
|
22
22
|
docling_ibm_models/tableformer/test_dataset_cache.py,sha256=zvVJvUnYz4GxAQfPUmLTHUbqj0Yhi2vwgOBnsRgt1rI,818
|
23
23
|
docling_ibm_models/tableformer/test_prepare_image.py,sha256=oPmU93-yWIkCeUYulGQ1p676Vq-zcjw2EX24WA5lspA,3155
|
24
24
|
docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
-
docling_ibm_models/tableformer/utils/app_profiler.py,sha256=
|
25
|
+
docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
|
26
|
+
docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
|
26
27
|
docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
|
27
28
|
docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
|
28
|
-
docling_ibm_models/
|
29
|
-
docling_ibm_models-1.
|
30
|
-
docling_ibm_models-1.
|
31
|
-
docling_ibm_models-1.
|
32
|
-
docling_ibm_models-1.1.6.dist-info/RECORD,,
|
29
|
+
docling_ibm_models-1.2.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
30
|
+
docling_ibm_models-1.2.0.dist-info/METADATA,sha256=j_ccZliZ-e99bOg1MVoshV2f_ZxmfqKsIE-JRW2N2tI,7172
|
31
|
+
docling_ibm_models-1.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
32
|
+
docling_ibm_models-1.2.0.dist-info/RECORD,,
|
@@ -1,175 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright IBM Corp. 2024 - 2024
|
3
|
-
# SPDX-License-Identifier: MIT
|
4
|
-
#
|
5
|
-
import logging
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
|
9
|
-
import docling_ibm_models.tableformer.settings as s
|
10
|
-
|
11
|
-
LOG_LEVEL = logging.INFO
|
12
|
-
|
13
|
-
|
14
|
-
class MyWelford:
|
15
|
-
r"""
|
16
|
-
Running computation of the sample mean and sample variance using Welford's algorithm
|
17
|
-
"""
|
18
|
-
|
19
|
-
def __init__(self):
|
20
|
-
self._i = 0 # Running index
|
21
|
-
self._m = 0 # Running mean
|
22
|
-
self._s = 0 # (n - 1) * variance
|
23
|
-
|
24
|
-
def reset(self):
|
25
|
-
r"""
|
26
|
-
Reset the object
|
27
|
-
"""
|
28
|
-
self._i = 0
|
29
|
-
self._m = 0
|
30
|
-
self._s = 0
|
31
|
-
|
32
|
-
def add(self, xi):
|
33
|
-
r"""
|
34
|
-
Invoke add each time a new sample arrives
|
35
|
-
|
36
|
-
Inputs:
|
37
|
-
xi: The next sample of data
|
38
|
-
"""
|
39
|
-
self._i += 1
|
40
|
-
old_m = self._m
|
41
|
-
self._m = self._m + (xi - self._m) / self._i
|
42
|
-
self._s = self._s + (xi - self._m) * (xi - old_m)
|
43
|
-
|
44
|
-
def results(self):
|
45
|
-
r"""
|
46
|
-
Get the computed mean, variance and standard deviation up to now
|
47
|
-
|
48
|
-
Outputs:
|
49
|
-
m: Sample mean
|
50
|
-
v: Sample variance
|
51
|
-
std: Sample standard deviation
|
52
|
-
"""
|
53
|
-
if self._i <= 1:
|
54
|
-
return None, None, None
|
55
|
-
|
56
|
-
# v = self._s / (self._i - 1) # Sample variance
|
57
|
-
v = self._s / (self._i) # Population variance
|
58
|
-
std = np.sqrt(v)
|
59
|
-
return self._m, v, std
|
60
|
-
|
61
|
-
|
62
|
-
class MyWelfordImg(MyWelford):
|
63
|
-
r"""
|
64
|
-
Welford algorithm to calculate running mean and sample variance for images
|
65
|
-
"""
|
66
|
-
|
67
|
-
def __init__(self):
|
68
|
-
super(MyWelfordImg, self).__init__()
|
69
|
-
|
70
|
-
def add(self, img):
|
71
|
-
r"""
|
72
|
-
Input:
|
73
|
-
img: An image numpy array (channel, width, height). The only requirement is to have the
|
74
|
-
channels as the first dimension and have 3 dimensions in total
|
75
|
-
"""
|
76
|
-
channels = img.shape[0]
|
77
|
-
flat_dim = img.shape[1] * img.shape[2]
|
78
|
-
img_r = img.reshape(channels, flat_dim)
|
79
|
-
|
80
|
-
for i in range(flat_dim):
|
81
|
-
super(MyWelfordImg, self).add(img_r[:, i])
|
82
|
-
|
83
|
-
|
84
|
-
class ChanVarianceImg:
|
85
|
-
r"""
|
86
|
-
Chan's algorithm to compute a running variance with support of sub-samples
|
87
|
-
In this implementation each sub-sample is an images
|
88
|
-
|
89
|
-
Math for the original paper:
|
90
|
-
https://github.ibm.com/nli/variance_formulae
|
91
|
-
"""
|
92
|
-
|
93
|
-
def __init__(self):
|
94
|
-
r""" """
|
95
|
-
self._first = True
|
96
|
-
# Size of the calculated dataset
|
97
|
-
self._n = 0
|
98
|
-
# Sum of the samples for the 3 image channels
|
99
|
-
self._t = 0
|
100
|
-
# Sum of the square differences of the deviations of the samples from the mean
|
101
|
-
self._s = 0
|
102
|
-
|
103
|
-
def add(self, img):
|
104
|
-
r"""
|
105
|
-
Add the provided image to the computation of the dataset statistics
|
106
|
-
|
107
|
-
Input:
|
108
|
-
img: An image numpy array (channel, width, height). The only requirement is to have the
|
109
|
-
channels as the first dimension and have 3 dimensions in total
|
110
|
-
"""
|
111
|
-
ch = img.shape[0]
|
112
|
-
n = img.shape[1] * img.shape[2]
|
113
|
-
img = img.reshape(ch, n)
|
114
|
-
img_t = img.sum(axis=1)
|
115
|
-
img_t_v = img_t.reshape(ch, 1)
|
116
|
-
diff = (img - (img_t_v / n)) ** 2
|
117
|
-
img_s = diff.sum(axis=1)
|
118
|
-
|
119
|
-
if not self._first:
|
120
|
-
c = (self._n / (n * (self._n + n))) * (
|
121
|
-
((n / self._n) * self._t - img_t) ** 2
|
122
|
-
)
|
123
|
-
self._s += img_s + c
|
124
|
-
self._t += img_t
|
125
|
-
else:
|
126
|
-
self._s = img_s
|
127
|
-
self._t = img_t
|
128
|
-
self._first = False
|
129
|
-
self._n += n
|
130
|
-
|
131
|
-
def results(self):
|
132
|
-
r"""
|
133
|
-
Get the computed statistics
|
134
|
-
|
135
|
-
Output:
|
136
|
-
mean: Mean for the complete dataset
|
137
|
-
var: Population variance for the complete dataset
|
138
|
-
std: Population standard deviation for the complete dataset
|
139
|
-
"""
|
140
|
-
mean = list(self._t / self._n)
|
141
|
-
var = list(self._s / self._n) # Population variance
|
142
|
-
std = list(np.sqrt(var))
|
143
|
-
|
144
|
-
return mean, var, std
|
145
|
-
|
146
|
-
def reset(self):
|
147
|
-
r"""
|
148
|
-
Reset the object to start over again
|
149
|
-
"""
|
150
|
-
self._n = 0
|
151
|
-
self._t = 0
|
152
|
-
self._s = 0
|
153
|
-
self._first = True
|
154
|
-
|
155
|
-
|
156
|
-
if __name__ == "__main__":
|
157
|
-
logger = s.get_custom_logger("variance", LOG_LEVEL)
|
158
|
-
|
159
|
-
n = 50000
|
160
|
-
channels = 3
|
161
|
-
width = 448
|
162
|
-
height = 448
|
163
|
-
|
164
|
-
my = ChanVarianceImg()
|
165
|
-
# Generate random images
|
166
|
-
for i in range(n):
|
167
|
-
logger.info(i)
|
168
|
-
img = 255 * np.random.rand(channels, width, height)
|
169
|
-
my.add(img)
|
170
|
-
|
171
|
-
# Calculate the statistics
|
172
|
-
m, v, std = my.results()
|
173
|
-
assert m.shape == (3,), "Wrong mean dimension"
|
174
|
-
assert v.shape == (3,), "Wrong variance dimension"
|
175
|
-
assert std.shape == (3,), "Wrong std dimension"
|
File without changes
|