docling-ibm-models 1.1.7__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/LICENSE +1 -1
  2. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/PKG-INFO +1 -1
  3. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/layoutmodel/layout_predictor.py +33 -25
  4. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +9 -6
  5. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/tf_predictor.py +92 -59
  6. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +4 -0
  7. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +4 -4
  8. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/utils/app_profiler.py +12 -1
  9. docling_ibm_models-1.2.1/docling_ibm_models/tableformer/utils/mem_monitor.py +175 -0
  10. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/pyproject.toml +1 -1
  11. docling_ibm_models-1.1.7/docling_ibm_models/tableformer/utils/variance.py +0 -175
  12. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/README.md +0 -0
  13. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/__init__.py +0 -0
  14. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/common.py +0 -0
  15. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  16. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -0
  17. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  18. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  19. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -0
  20. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  21. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  22. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  23. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  24. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  25. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  26. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  27. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/otsl.py +0 -0
  28. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/settings.py +0 -0
  29. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/test_dataset_cache.py +0 -0
  30. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/test_prepare_image.py +0 -0
  31. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  32. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  33. {docling_ibm_models-1.1.7 → docling_ibm_models-1.2.1}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) [year] [fullname]
3
+ Copyright (c) 2024 International Business Machines
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.1.7
3
+ Version: 1.2.1
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -14,29 +14,6 @@ MODEL_CHECKPOINT_FN = "model.pt"
14
14
  DEFAULT_NUM_THREADS = 4
15
15
 
16
16
 
17
- # Classes:
18
- CLASSES_MAP = {
19
- 0: "background",
20
- 1: "Caption",
21
- 2: "Footnote",
22
- 3: "Formula",
23
- 4: "List-item",
24
- 5: "Page-footer",
25
- 6: "Page-header",
26
- 7: "Picture",
27
- 8: "Section-header",
28
- 9: "Table",
29
- 10: "Text",
30
- 11: "Title",
31
- 12: "Document Index",
32
- 13: "Code",
33
- 14: "Checkbox-Selected",
34
- 15: "Checkbox-Unselected",
35
- 16: "Form",
36
- 17: "Key-Value Region",
37
- }
38
-
39
-
40
17
  class LayoutPredictor:
41
18
  r"""
42
19
  Document layout prediction using ONNX
@@ -69,6 +46,31 @@ class LayoutPredictor:
69
46
  ------
70
47
  FileNotFoundError when the model's ONNX file is missing
71
48
  """
49
+ # Initialize classes map:
50
+ self._classes_map = {
51
+ 0: "background",
52
+ 1: "Caption",
53
+ 2: "Footnote",
54
+ 3: "Formula",
55
+ 4: "List-item",
56
+ 5: "Page-footer",
57
+ 6: "Page-header",
58
+ 7: "Picture",
59
+ 8: "Section-header",
60
+ 9: "Table",
61
+ 10: "Text",
62
+ 11: "Title",
63
+ 12: "Document Index",
64
+ 13: "Code",
65
+ 14: "Checkbox-Selected",
66
+ 15: "Checkbox-Unselected",
67
+ 16: "Form",
68
+ 17: "Key-Value Region",
69
+ }
70
+
71
+ # Blacklisted classes
72
+ self._black_classes = set(["Form", "Key-Value Region"])
73
+
72
74
  # Set basic params
73
75
  self._threshold = 0.6 # Score threshold
74
76
  self._image_size = 640
@@ -159,13 +161,19 @@ class LayoutPredictor:
159
161
  )
160
162
 
161
163
  # Yield output
162
- for label, box, score in zip(labels[0], boxes[0], scores[0]):
164
+ for label_idx, box, score in zip(labels[0], boxes[0], scores[0]):
165
+ # Filter out blacklisted classes
166
+ label = self._classes_map[label_idx]
167
+ if label in self._black_classes:
168
+ continue
169
+
170
+ # Check against threshold
163
171
  if score > self._threshold:
164
172
  yield {
165
173
  "l": box[0] / self._image_size * w,
166
174
  "t": box[1] / self._image_size * h,
167
175
  "r": box[2] / self._image_size * w,
168
176
  "b": box[3] / self._image_size * h,
169
- "label": CLASSES_MAP[label],
177
+ "label": label,
170
178
  "confidence": score,
171
179
  }
@@ -129,12 +129,15 @@ class CellMatcher:
129
129
  pdf_cells = copy.deepcopy(iocr_page["tokens"])
130
130
  if len(pdf_cells) > 0:
131
131
  for word in pdf_cells:
132
- word["bbox"] = [
133
- word["bbox"]["l"],
134
- word["bbox"]["t"],
135
- word["bbox"]["r"],
136
- word["bbox"]["b"],
137
- ]
132
+ if isinstance(word["bbox"], list):
133
+ continue
134
+ elif isinstance(word["bbox"], dict):
135
+ word["bbox"] = [
136
+ word["bbox"]["l"],
137
+ word["bbox"]["t"],
138
+ word["bbox"]["r"],
139
+ word["bbox"]["b"],
140
+ ]
138
141
  table_bboxes = prediction["bboxes"]
139
142
  table_classes = prediction["classes"]
140
143
  # BBOXES transformed...
@@ -524,7 +524,12 @@ class TFPredictor:
524
524
  return resized, sf
525
525
 
526
526
  def multi_table_predict(
527
- self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
527
+ self,
528
+ iocr_page,
529
+ table_bboxes,
530
+ do_matching=True,
531
+ correct_overlapping_cells=False,
532
+ sort_row_col_indexes=True,
528
533
  ):
529
534
  multi_tf_output = []
530
535
  page_image = iocr_page["image"]
@@ -563,56 +568,70 @@ class TFPredictor:
563
568
  # PROCESS PREDICTED RESULTS, TO TURN PREDICTED COL/ROW IDs into Indexes
564
569
  # Indexes should be in increasing order, without gaps
565
570
 
566
- # Fix col/row indexes
567
- # Arranges all col/row indexes sequentially without gaps using input IDs
568
-
569
- indexing_start_cols = [] # Index of original start col IDs (not indexes)
570
- indexing_end_cols = [] # Index of original end col IDs (not indexes)
571
- indexing_start_rows = [] # Index of original start row IDs (not indexes)
572
- indexing_end_rows = [] # Index of original end row IDs (not indexes)
573
-
574
- # First, collect all possible predicted IDs, to be used as indexes
575
- # ID's returned by Tableformer are sequential, but might contain gaps
576
- for tf_response_cell in tf_responses:
577
- start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
578
- end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
579
- start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
580
- end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
581
-
582
- # Collect all possible col/row IDs:
583
- if start_col_offset_idx not in indexing_start_cols:
584
- indexing_start_cols.append(start_col_offset_idx)
585
- if end_col_offset_idx not in indexing_end_cols:
586
- indexing_end_cols.append(end_col_offset_idx)
587
- if start_row_offset_idx not in indexing_start_rows:
588
- indexing_start_rows.append(start_row_offset_idx)
589
- if end_row_offset_idx not in indexing_end_rows:
590
- indexing_end_rows.append(end_row_offset_idx)
591
-
592
- indexing_start_cols.sort()
593
- indexing_end_cols.sort()
594
- indexing_start_rows.sort()
595
- indexing_end_rows.sort()
596
-
597
- # After this - put actual indexes of IDs back into predicted structure...
598
- for tf_response_cell in tf_responses:
599
- tf_response_cell["start_col_offset_idx"] = indexing_start_cols.index(
600
- tf_response_cell["start_col_offset_idx"]
601
- )
602
- tf_response_cell["end_col_offset_idx"] = (
603
- tf_response_cell["start_col_offset_idx"]
604
- + tf_response_cell["col_span"]
605
- )
606
- tf_response_cell["start_row_offset_idx"] = indexing_start_rows.index(
607
- tf_response_cell["start_row_offset_idx"]
608
- )
609
- tf_response_cell["end_row_offset_idx"] = (
610
- tf_response_cell["start_row_offset_idx"]
611
- + tf_response_cell["row_span"]
612
- )
613
- # Counting matched cols/rows from actual indexes (and not ids)
614
- predict_details["num_cols"] = len(indexing_end_cols)
615
- predict_details["num_rows"] = len(indexing_end_rows)
571
+ if sort_row_col_indexes:
572
+ # Fix col/row indexes
573
+ # Arranges all col/row indexes sequentially without gaps using input IDs
574
+
575
+ indexing_start_cols = (
576
+ []
577
+ ) # Index of original start col IDs (not indexes)
578
+ indexing_end_cols = [] # Index of original end col IDs (not indexes)
579
+ indexing_start_rows = (
580
+ []
581
+ ) # Index of original start row IDs (not indexes)
582
+ indexing_end_rows = [] # Index of original end row IDs (not indexes)
583
+
584
+ # First, collect all possible predicted IDs, to be used as indexes
585
+ # ID's returned by Tableformer are sequential, but might contain gaps
586
+ for tf_response_cell in tf_responses:
587
+ start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
588
+ end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
589
+ start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
590
+ end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
591
+
592
+ # Collect all possible col/row IDs:
593
+ if start_col_offset_idx not in indexing_start_cols:
594
+ indexing_start_cols.append(start_col_offset_idx)
595
+ if end_col_offset_idx not in indexing_end_cols:
596
+ indexing_end_cols.append(end_col_offset_idx)
597
+ if start_row_offset_idx not in indexing_start_rows:
598
+ indexing_start_rows.append(start_row_offset_idx)
599
+ if end_row_offset_idx not in indexing_end_rows:
600
+ indexing_end_rows.append(end_row_offset_idx)
601
+
602
+ indexing_start_cols.sort()
603
+ indexing_end_cols.sort()
604
+ indexing_start_rows.sort()
605
+ indexing_end_rows.sort()
606
+
607
+ # After this - put actual indexes of IDs back into predicted structure...
608
+ for tf_response_cell in tf_responses:
609
+ tf_response_cell["start_col_offset_idx"] = (
610
+ indexing_start_cols.index(
611
+ tf_response_cell["start_col_offset_idx"]
612
+ )
613
+ )
614
+ tf_response_cell["end_col_offset_idx"] = (
615
+ tf_response_cell["start_col_offset_idx"]
616
+ + tf_response_cell["col_span"]
617
+ )
618
+ tf_response_cell["start_row_offset_idx"] = (
619
+ indexing_start_rows.index(
620
+ tf_response_cell["start_row_offset_idx"]
621
+ )
622
+ )
623
+ tf_response_cell["end_row_offset_idx"] = (
624
+ tf_response_cell["start_row_offset_idx"]
625
+ + tf_response_cell["row_span"]
626
+ )
627
+ # Counting matched cols/rows from actual indexes (and not ids)
628
+ predict_details["num_cols"] = len(indexing_end_cols)
629
+ predict_details["num_rows"] = len(indexing_end_rows)
630
+ else:
631
+ otsl_seq = predict_details["prediction"]["rs_seq"]
632
+ predict_details["num_cols"] = otsl_seq.index("nl")
633
+ predict_details["num_rows"] = otsl_seq.count("nl")
634
+
616
635
  # Put results into multi_tf_output
617
636
  multi_tf_output.append(
618
637
  {"tf_responses": tf_responses, "predict_details": predict_details}
@@ -667,13 +686,20 @@ class TFPredictor:
667
686
  )
668
687
 
669
688
  if outputs_coord is not None:
670
- bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
671
- prediction["bboxes"] = bbox_pred.tolist()
689
+ if len(outputs_coord) == 0:
690
+ prediction["bboxes"] = []
691
+ else:
692
+ bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
693
+ prediction["bboxes"] = bbox_pred.tolist()
672
694
  else:
673
695
  prediction["bboxes"] = []
696
+
674
697
  if outputs_class is not None:
675
- result_class = torch.argmax(outputs_class, dim=1)
676
- prediction["classes"] = result_class.tolist()
698
+ if len(outputs_class) == 0:
699
+ prediction["classes"] = []
700
+ else:
701
+ result_class = torch.argmax(outputs_class, dim=1)
702
+ prediction["classes"] = result_class.tolist()
677
703
  else:
678
704
  prediction["classes"] = []
679
705
  if self._remove_padding:
@@ -788,13 +814,20 @@ class TFPredictor:
788
814
  )
789
815
 
790
816
  if outputs_coord is not None:
791
- bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
792
- prediction["bboxes"] = bbox_pred.tolist()
817
+ if len(outputs_coord) == 0:
818
+ prediction["bboxes"] = []
819
+ else:
820
+ bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
821
+ prediction["bboxes"] = bbox_pred.tolist()
793
822
  else:
794
823
  prediction["bboxes"] = []
824
+
795
825
  if outputs_class is not None:
796
- result_class = torch.argmax(outputs_class, dim=1)
797
- prediction["classes"] = result_class.tolist()
826
+ if len(outputs_class) == 0:
827
+ prediction["classes"] = []
828
+ else:
829
+ result_class = torch.argmax(outputs_class, dim=1)
830
+ prediction["classes"] = result_class.tolist()
798
831
  else:
799
832
  prediction["classes"] = []
800
833
  if self._remove_padding:
@@ -308,8 +308,12 @@ class TableModel04_rs(BaseModel, nn.Module):
308
308
 
309
309
  if len(outputs_coord1) > 0:
310
310
  outputs_coord1 = torch.stack(outputs_coord1)
311
+ else:
312
+ outputs_coord1 = torch.empty(0)
311
313
  if len(outputs_class1) > 0:
312
314
  outputs_class1 = torch.stack(outputs_class1)
315
+ else:
316
+ outputs_class1 = torch.empty(0)
313
317
 
314
318
  outputs_class = outputs_class1
315
319
  outputs_coord = outputs_coord1
@@ -149,11 +149,11 @@ class Tag_Transformer(nn.Module):
149
149
  self._positional_encoding = PositionalEncoding(embed_dim)
150
150
  self._td_encode = td_encode
151
151
 
152
+ encoder_layer = nn.TransformerEncoderLayer(
153
+ d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
154
+ )
152
155
  self._encoder = nn.TransformerEncoder(
153
- nn.TransformerEncoderLayer(
154
- d_model=embed_dim, nhead=n_heads, dim_feedforward=dim_ff
155
- ),
156
- num_layers=encoder_layers,
156
+ encoder_layer, num_layers=encoder_layers, enable_nested_tensor=False
157
157
  )
158
158
 
159
159
  self._decoder = TMTransformerDecoder(
@@ -6,6 +6,8 @@ import time
6
6
  from collections import deque
7
7
  from statistics import mean, median
8
8
 
9
+ from docling_ibm_models.tableformer.utils.mem_monitor import MemMonitor
10
+
9
11
 
10
12
  class SingletonClass(type):
11
13
  r"""
@@ -37,11 +39,13 @@ class Profiler:
37
39
  def __init__(self):
38
40
  self._section_dts = {} # section name -> sum(section intervals)
39
41
  self._section_calls = {} # section name -> number of invocations
40
- self._section_kB = {} # section name -> max kB of used heap
42
+ self._section_kB = {} # section name -> max kB of used heap (resident set size)
41
43
 
42
44
  # section name -> beginning of the last interval
43
45
  self._last_begin = {}
44
46
 
47
+ self._mem_monitor = MemMonitor()
48
+
45
49
  def begin(self, section_name, enable=True):
46
50
  r"""
47
51
  Mark the beginning of an interval
@@ -83,13 +87,20 @@ class Profiler:
83
87
  if section_name not in self._last_begin:
84
88
  return False
85
89
 
90
+ # Get memory
91
+ kB = self._mem_monitor.get_memory()
92
+ if isinstance(kB, dict):
93
+ kB = kB["resident"]
94
+
86
95
  dt = time.time() - self._last_begin[section_name]
87
96
  if section_name not in self._section_dts:
88
97
  self._section_dts[section_name] = dt
89
98
  self._section_calls[section_name] = 1
99
+ self._section_kB[section_name] = kB
90
100
  else:
91
101
  self._section_dts[section_name] += dt
92
102
  self._section_calls[section_name] += 1
103
+ self._section_kB[section_name] = max(kB, self._section_kB[section_name])
93
104
 
94
105
  return True
95
106
 
@@ -0,0 +1,175 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ import os
6
+ import platform
7
+ import re
8
+
9
+
10
+ class MemMonitor:
11
+ r"""
12
+ Memory monitor for Linux
13
+
14
+ It supports 2 approaches for extracting memory information:
15
+ - linux-native: It parse the `/proc` pseudo-files. It is available only for Linux
16
+ - psutil: Use the `psutil` library
17
+
18
+ ## Linux-Native approach
19
+
20
+ The linux-native approach implements 2 methods to extract the memory fields:
21
+
22
+ 1. The `get_memory()` method:
23
+
24
+ - It is very fast
25
+ - It parses the `/proc/<pid>/statm` pseudo-file
26
+ - It Contains the following fields:
27
+ size (1) total program size
28
+ (same as VmSize in /proc/[pid]/status)
29
+ resident (2) resident set size
30
+ (same as VmRSS in /proc/[pid]/status)
31
+ shared (3) number of resident shared pages (i.e., backed by a file)
32
+ (same as RssFile+RssShmem in /proc/[pid]/status)
33
+ text (4) text (code)
34
+ lib (5) library (unused since Linux 2.6; always 0)
35
+ data (6) data + stack
36
+ dt (7) dirty pages (unused since Linux 2.6; always 0)
37
+
38
+
39
+ 2. The `get_memory_full()` method:
40
+
41
+ - It is slower to parse but contains more detailed information
42
+ - It uses regex to parse the `/proc/<pid>/status` pseudo-file
43
+ - It contains the following fields:
44
+ VmPeak: Peak virtual memory size.
45
+ VmSize: Virtual memory size.
46
+ VmLck: Locked memory size (see mlock(2)).
47
+ VmPin: Pinned memory size (since Linux 3.2). These are pages that can't be moved because
48
+ something needs to directly access physical memory.
49
+ VmHWM: Peak resident set size ("high water mark").
50
+ VmRSS: Resident set size. Note that the value here is the sum of RssAnon, RssFile, and
51
+ RssShmem.
52
+ RssAnon: Size of resident anonymous memory. (since Linux 4.5).
53
+ RssFile: Size of resident file mappings. (since Linux 4.5).
54
+ RssShmem: Size of resident shared memory (includes System V shared memory, mappings from
55
+ tmpfs(5), and shared anonymous mappings). (since Linux 4.5).
56
+ VmData, VmStk, VmExe: Size of data, stack, and text segments.
57
+ VmLib: Shared library code size.
58
+ VmPTE: Page table entries size (since Linux 2.6.10).
59
+ VmPMD: Size of second-level page tables (added in Linux 4.0; removed in Linux 4.15).
60
+ VmSwap: Swapped-out virtual memory size by anonymous private pages; shmem swap usage is
61
+ not included (since Linux 2.6.34).
62
+
63
+
64
+ ## The psutil library
65
+
66
+ - Apparently the psutil library parses the `/proc/<pid>/statm`
67
+ - The memory_info() function returns the fields: rss, vms, shared, text, lib, data, dirty
68
+
69
+
70
+ ## Field mappings
71
+
72
+ These are the fields returned by psutil memory_info() and their mapping in the /proc files:
73
+ (I put ? when I am not 100% about the mapping)
74
+
75
+ | psutil | /proc/$$/status | /proc/$$/statm |
76
+ |---------|--------------------|----------------|
77
+ | rss | VmRSS | resident |
78
+ | vms | VmSize | size |
79
+ | shared | RssFile + RssShmem | shared |
80
+ | text | VmExe ? | text |
81
+ | lib | RssShmem ? | lib |
82
+ | data | VmData + VmStk | data |
83
+ | dirty | VmSwap ? | dt |
84
+
85
+ """
86
+
87
+ def __init__(self, enable=True):
88
+ self._enable = enable
89
+ self._pid = os.getpid()
90
+
91
+ # Create regex for each memory field of the /proc/status pseudo-file
92
+ self._status_fields = [
93
+ "VmPeak",
94
+ "VmSize",
95
+ "VmLck",
96
+ "VmPin",
97
+ "VmHWM",
98
+ "VmRSS",
99
+ "RssAnon",
100
+ "RssFile",
101
+ "RssShmem",
102
+ "VmData",
103
+ "VmStk",
104
+ "VmExe",
105
+ "VmLib",
106
+ "VmPTE",
107
+ "VmPMD",
108
+ "VmSwap",
109
+ ]
110
+ self._status_regex = {}
111
+ for mem_field in self._status_fields:
112
+ regex_str = r"({}:)(\s+)(\d*)(.*)".format(mem_field)
113
+ self._status_regex[mem_field] = re.compile(regex_str)
114
+
115
+ def get_memory_full(self) -> dict:
116
+ r"""
117
+ - Parse /proc/<pid>status to get all memory info.
118
+ - The method returns a dict with the fields self._status_fields
119
+ - This method is SLOW. Unless you need the full memory info, better to use `get_memory`
120
+
121
+ The returned values are in kB
122
+ """
123
+ if not self._enable:
124
+ return -2
125
+ if platform.system() != "Linux":
126
+ return -1
127
+ pid_fn = "/proc/{}/status".format(self._pid)
128
+
129
+ # Dict to collect all memory fields
130
+ memory = {}
131
+ with open(pid_fn, "r") as fn:
132
+ for ll in fn:
133
+ for mem_field in self._status_fields:
134
+ regex = self._status_regex[mem_field]
135
+ m = regex.match(ll)
136
+ if m is not None:
137
+ memory[mem_field] = int(m.group(3))
138
+ if len(memory) == len(self._status_fields):
139
+ break
140
+
141
+ return memory
142
+
143
+ def get_memory(self) -> dict:
144
+ r"""
145
+ - Parse /proc/<pid>statm to get the most important memory fields
146
+ - This is a fast implementation.
147
+ - The method returns a dict with the fields:
148
+ "size", "resident", "shared", "text", "lib", "data", "dt"
149
+ - Check the documentation at the top for a mapping across the various fields
150
+
151
+ The returned values are in kB
152
+ """
153
+ if not self._enable:
154
+ return -2
155
+ if platform.system() != "Linux":
156
+ return -1
157
+ pid_fn = "/proc/{}/statm".format(self._pid)
158
+
159
+ # Dict to collect all memory fields
160
+ memory = {}
161
+ with open(pid_fn, "r") as fn:
162
+ ll = fn.read()
163
+ # The values are in pages
164
+ # Each page is 4096 bytes (4kB)
165
+ data = [int(x) << 2 for x in ll.split(" ")]
166
+ memory = {
167
+ "size": data[0],
168
+ "resident": data[1],
169
+ "shared": data[2],
170
+ "text": data[3],
171
+ "lib": data[4],
172
+ "data": data[5],
173
+ "dt": data[6],
174
+ }
175
+ return memory
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "1.1.7" # DO NOT EDIT, updated automatically
3
+ version = "1.2.1" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -1,175 +0,0 @@
1
- #
2
- # Copyright IBM Corp. 2024 - 2024
3
- # SPDX-License-Identifier: MIT
4
- #
5
- import logging
6
-
7
- import numpy as np
8
-
9
- import docling_ibm_models.tableformer.settings as s
10
-
11
- LOG_LEVEL = logging.INFO
12
-
13
-
14
- class MyWelford:
15
- r"""
16
- Running computation of the sample mean and sample variance using Welford's algorithm
17
- """
18
-
19
- def __init__(self):
20
- self._i = 0 # Running index
21
- self._m = 0 # Running mean
22
- self._s = 0 # (n - 1) * variance
23
-
24
- def reset(self):
25
- r"""
26
- Reset the object
27
- """
28
- self._i = 0
29
- self._m = 0
30
- self._s = 0
31
-
32
- def add(self, xi):
33
- r"""
34
- Invoke add each time a new sample arrives
35
-
36
- Inputs:
37
- xi: The next sample of data
38
- """
39
- self._i += 1
40
- old_m = self._m
41
- self._m = self._m + (xi - self._m) / self._i
42
- self._s = self._s + (xi - self._m) * (xi - old_m)
43
-
44
- def results(self):
45
- r"""
46
- Get the computed mean, variance and standard deviation up to now
47
-
48
- Outputs:
49
- m: Sample mean
50
- v: Sample variance
51
- std: Sample standard deviation
52
- """
53
- if self._i <= 1:
54
- return None, None, None
55
-
56
- # v = self._s / (self._i - 1) # Sample variance
57
- v = self._s / (self._i) # Population variance
58
- std = np.sqrt(v)
59
- return self._m, v, std
60
-
61
-
62
- class MyWelfordImg(MyWelford):
63
- r"""
64
- Welford algorithm to calculate running mean and sample variance for images
65
- """
66
-
67
- def __init__(self):
68
- super(MyWelfordImg, self).__init__()
69
-
70
- def add(self, img):
71
- r"""
72
- Input:
73
- img: An image numpy array (channel, width, height). The only requirement is to have the
74
- channels as the first dimension and have 3 dimensions in total
75
- """
76
- channels = img.shape[0]
77
- flat_dim = img.shape[1] * img.shape[2]
78
- img_r = img.reshape(channels, flat_dim)
79
-
80
- for i in range(flat_dim):
81
- super(MyWelfordImg, self).add(img_r[:, i])
82
-
83
-
84
- class ChanVarianceImg:
85
- r"""
86
- Chan's algorithm to compute a running variance with support of sub-samples
87
- In this implementation each sub-sample is an images
88
-
89
- Math for the original paper:
90
- https://github.ibm.com/nli/variance_formulae
91
- """
92
-
93
- def __init__(self):
94
- r""" """
95
- self._first = True
96
- # Size of the calculated dataset
97
- self._n = 0
98
- # Sum of the samples for the 3 image channels
99
- self._t = 0
100
- # Sum of the square differences of the deviations of the samples from the mean
101
- self._s = 0
102
-
103
- def add(self, img):
104
- r"""
105
- Add the provided image to the computation of the dataset statistics
106
-
107
- Input:
108
- img: An image numpy array (channel, width, height). The only requirement is to have the
109
- channels as the first dimension and have 3 dimensions in total
110
- """
111
- ch = img.shape[0]
112
- n = img.shape[1] * img.shape[2]
113
- img = img.reshape(ch, n)
114
- img_t = img.sum(axis=1)
115
- img_t_v = img_t.reshape(ch, 1)
116
- diff = (img - (img_t_v / n)) ** 2
117
- img_s = diff.sum(axis=1)
118
-
119
- if not self._first:
120
- c = (self._n / (n * (self._n + n))) * (
121
- ((n / self._n) * self._t - img_t) ** 2
122
- )
123
- self._s += img_s + c
124
- self._t += img_t
125
- else:
126
- self._s = img_s
127
- self._t = img_t
128
- self._first = False
129
- self._n += n
130
-
131
- def results(self):
132
- r"""
133
- Get the computed statistics
134
-
135
- Output:
136
- mean: Mean for the complete dataset
137
- var: Population variance for the complete dataset
138
- std: Population standard deviation for the complete dataset
139
- """
140
- mean = list(self._t / self._n)
141
- var = list(self._s / self._n) # Population variance
142
- std = list(np.sqrt(var))
143
-
144
- return mean, var, std
145
-
146
- def reset(self):
147
- r"""
148
- Reset the object to start over again
149
- """
150
- self._n = 0
151
- self._t = 0
152
- self._s = 0
153
- self._first = True
154
-
155
-
156
- if __name__ == "__main__":
157
- logger = s.get_custom_logger("variance", LOG_LEVEL)
158
-
159
- n = 50000
160
- channels = 3
161
- width = 448
162
- height = 448
163
-
164
- my = ChanVarianceImg()
165
- # Generate random images
166
- for i in range(n):
167
- logger.info(i)
168
- img = 255 * np.random.rand(channels, width, height)
169
- my.add(img)
170
-
171
- # Calculate the statistics
172
- m, v, std = my.results()
173
- assert m.shape == (3,), "Wrong mean dimension"
174
- assert v.shape == (3,), "Wrong variance dimension"
175
- assert std.shape == (3,), "Wrong std dimension"