docling-ibm-models 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/PKG-INFO +5 -7
  2. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/README.md +3 -6
  3. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +9 -6
  4. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py +92 -59
  5. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +4 -0
  6. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/pyproject.toml +2 -1
  7. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/LICENSE +0 -0
  8. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  9. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/__init__.py +0 -0
  10. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/common.py +0 -0
  11. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  12. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -0
  13. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  14. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  15. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -0
  16. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  17. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  18. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  19. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  20. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  21. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  22. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  23. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  24. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/otsl.py +0 -0
  25. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/settings.py +0 -0
  26. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/test_dataset_cache.py +0 -0
  27. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/test_prepare_image.py +0 -0
  28. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  29. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  30. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
  31. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  32. {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.11
19
19
  Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
21
  Requires-Dist: Pillow (>=10.0.0,<11.0.0)
22
+ Requires-Dist: huggingface_hub (>=0.23,<1)
22
23
  Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
23
24
  Requires-Dist: lxml (>=4.9.1,<5.0.0)
24
25
  Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
@@ -110,7 +111,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
110
111
 
111
112
  ## Configuration file
112
113
 
113
- Example configuration can be seen inside test `tests/test_tf_predictor.py`
114
+ Example configuration can be found inside test `tests/test_tf_predictor.py`
114
115
  These are the main sections of the configuration file:
115
116
 
116
117
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
@@ -128,16 +129,13 @@ You can download the model weights and config files from the links:
128
129
  - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
129
130
  - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
130
131
 
131
- Place the downloaded files into `tests/test_data/model_artifacts/` directory.
132
-
133
132
 
134
133
  ## Inference Tests
135
134
 
136
- This contains unit tests for Docling models.
135
+ You can run the inference tests for the models with:
137
136
 
138
- First download the model weights (see above), then run:
139
137
  ```
140
- ./devtools/check_code.sh
138
+ python -m pytest tests/
141
139
  ```
142
140
 
143
141
  This will also generate prediction and matching visualizations that can be found here:
@@ -76,7 +76,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
76
76
 
77
77
  ## Configuration file
78
78
 
79
- Example configuration can be seen inside test `tests/test_tf_predictor.py`
79
+ Example configuration can be found inside test `tests/test_tf_predictor.py`
80
80
  These are the main sections of the configuration file:
81
81
 
82
82
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
@@ -94,16 +94,13 @@ You can download the model weights and config files from the links:
94
94
  - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
95
95
  - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
96
96
 
97
- Place the downloaded files into `tests/test_data/model_artifacts/` directory.
98
-
99
97
 
100
98
  ## Inference Tests
101
99
 
102
- This contains unit tests for Docling models.
100
+ You can run the inference tests for the models with:
103
101
 
104
- First download the model weights (see above), then run:
105
102
  ```
106
- ./devtools/check_code.sh
103
+ python -m pytest tests/
107
104
  ```
108
105
 
109
106
  This will also generate prediction and matching visualizations that can be found here:
@@ -129,12 +129,15 @@ class CellMatcher:
129
129
  pdf_cells = copy.deepcopy(iocr_page["tokens"])
130
130
  if len(pdf_cells) > 0:
131
131
  for word in pdf_cells:
132
- word["bbox"] = [
133
- word["bbox"]["l"],
134
- word["bbox"]["t"],
135
- word["bbox"]["r"],
136
- word["bbox"]["b"],
137
- ]
132
+ if isinstance(word["bbox"], list):
133
+ continue
134
+ elif isinstance(word["bbox"], dict):
135
+ word["bbox"] = [
136
+ word["bbox"]["l"],
137
+ word["bbox"]["t"],
138
+ word["bbox"]["r"],
139
+ word["bbox"]["b"],
140
+ ]
138
141
  table_bboxes = prediction["bboxes"]
139
142
  table_classes = prediction["classes"]
140
143
  # BBOXES transformed...
@@ -524,7 +524,12 @@ class TFPredictor:
524
524
  return resized, sf
525
525
 
526
526
  def multi_table_predict(
527
- self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
527
+ self,
528
+ iocr_page,
529
+ table_bboxes,
530
+ do_matching=True,
531
+ correct_overlapping_cells=False,
532
+ sort_row_col_indexes=True,
528
533
  ):
529
534
  multi_tf_output = []
530
535
  page_image = iocr_page["image"]
@@ -563,56 +568,70 @@ class TFPredictor:
563
568
  # PROCESS PREDICTED RESULTS, TO TURN PREDICTED COL/ROW IDs into Indexes
564
569
  # Indexes should be in increasing order, without gaps
565
570
 
566
- # Fix col/row indexes
567
- # Arranges all col/row indexes sequentially without gaps using input IDs
568
-
569
- indexing_start_cols = [] # Index of original start col IDs (not indexes)
570
- indexing_end_cols = [] # Index of original end col IDs (not indexes)
571
- indexing_start_rows = [] # Index of original start row IDs (not indexes)
572
- indexing_end_rows = [] # Index of original end row IDs (not indexes)
573
-
574
- # First, collect all possible predicted IDs, to be used as indexes
575
- # ID's returned by Tableformer are sequential, but might contain gaps
576
- for tf_response_cell in tf_responses:
577
- start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
578
- end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
579
- start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
580
- end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
581
-
582
- # Collect all possible col/row IDs:
583
- if start_col_offset_idx not in indexing_start_cols:
584
- indexing_start_cols.append(start_col_offset_idx)
585
- if end_col_offset_idx not in indexing_end_cols:
586
- indexing_end_cols.append(end_col_offset_idx)
587
- if start_row_offset_idx not in indexing_start_rows:
588
- indexing_start_rows.append(start_row_offset_idx)
589
- if end_row_offset_idx not in indexing_end_rows:
590
- indexing_end_rows.append(end_row_offset_idx)
591
-
592
- indexing_start_cols.sort()
593
- indexing_end_cols.sort()
594
- indexing_start_rows.sort()
595
- indexing_end_rows.sort()
596
-
597
- # After this - put actual indexes of IDs back into predicted structure...
598
- for tf_response_cell in tf_responses:
599
- tf_response_cell["start_col_offset_idx"] = indexing_start_cols.index(
600
- tf_response_cell["start_col_offset_idx"]
601
- )
602
- tf_response_cell["end_col_offset_idx"] = (
603
- tf_response_cell["start_col_offset_idx"]
604
- + tf_response_cell["col_span"]
605
- )
606
- tf_response_cell["start_row_offset_idx"] = indexing_start_rows.index(
607
- tf_response_cell["start_row_offset_idx"]
608
- )
609
- tf_response_cell["end_row_offset_idx"] = (
610
- tf_response_cell["start_row_offset_idx"]
611
- + tf_response_cell["row_span"]
612
- )
613
- # Counting matched cols/rows from actual indexes (and not ids)
614
- predict_details["num_cols"] = len(indexing_end_cols)
615
- predict_details["num_rows"] = len(indexing_end_rows)
571
+ if sort_row_col_indexes:
572
+ # Fix col/row indexes
573
+ # Arranges all col/row indexes sequentially without gaps using input IDs
574
+
575
+ indexing_start_cols = (
576
+ []
577
+ ) # Index of original start col IDs (not indexes)
578
+ indexing_end_cols = [] # Index of original end col IDs (not indexes)
579
+ indexing_start_rows = (
580
+ []
581
+ ) # Index of original start row IDs (not indexes)
582
+ indexing_end_rows = [] # Index of original end row IDs (not indexes)
583
+
584
+ # First, collect all possible predicted IDs, to be used as indexes
585
+ # ID's returned by Tableformer are sequential, but might contain gaps
586
+ for tf_response_cell in tf_responses:
587
+ start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
588
+ end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
589
+ start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
590
+ end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
591
+
592
+ # Collect all possible col/row IDs:
593
+ if start_col_offset_idx not in indexing_start_cols:
594
+ indexing_start_cols.append(start_col_offset_idx)
595
+ if end_col_offset_idx not in indexing_end_cols:
596
+ indexing_end_cols.append(end_col_offset_idx)
597
+ if start_row_offset_idx not in indexing_start_rows:
598
+ indexing_start_rows.append(start_row_offset_idx)
599
+ if end_row_offset_idx not in indexing_end_rows:
600
+ indexing_end_rows.append(end_row_offset_idx)
601
+
602
+ indexing_start_cols.sort()
603
+ indexing_end_cols.sort()
604
+ indexing_start_rows.sort()
605
+ indexing_end_rows.sort()
606
+
607
+ # After this - put actual indexes of IDs back into predicted structure...
608
+ for tf_response_cell in tf_responses:
609
+ tf_response_cell["start_col_offset_idx"] = (
610
+ indexing_start_cols.index(
611
+ tf_response_cell["start_col_offset_idx"]
612
+ )
613
+ )
614
+ tf_response_cell["end_col_offset_idx"] = (
615
+ tf_response_cell["start_col_offset_idx"]
616
+ + tf_response_cell["col_span"]
617
+ )
618
+ tf_response_cell["start_row_offset_idx"] = (
619
+ indexing_start_rows.index(
620
+ tf_response_cell["start_row_offset_idx"]
621
+ )
622
+ )
623
+ tf_response_cell["end_row_offset_idx"] = (
624
+ tf_response_cell["start_row_offset_idx"]
625
+ + tf_response_cell["row_span"]
626
+ )
627
+ # Counting matched cols/rows from actual indexes (and not ids)
628
+ predict_details["num_cols"] = len(indexing_end_cols)
629
+ predict_details["num_rows"] = len(indexing_end_rows)
630
+ else:
631
+ otsl_seq = predict_details["prediction"]["rs_seq"]
632
+ predict_details["num_cols"] = otsl_seq.index("nl")
633
+ predict_details["num_rows"] = otsl_seq.count("nl")
634
+
616
635
  # Put results into multi_tf_output
617
636
  multi_tf_output.append(
618
637
  {"tf_responses": tf_responses, "predict_details": predict_details}
@@ -667,13 +686,20 @@ class TFPredictor:
667
686
  )
668
687
 
669
688
  if outputs_coord is not None:
670
- bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
671
- prediction["bboxes"] = bbox_pred.tolist()
689
+ if len(outputs_coord) == 0:
690
+ prediction["bboxes"] = []
691
+ else:
692
+ bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
693
+ prediction["bboxes"] = bbox_pred.tolist()
672
694
  else:
673
695
  prediction["bboxes"] = []
696
+
674
697
  if outputs_class is not None:
675
- result_class = torch.argmax(outputs_class, dim=1)
676
- prediction["classes"] = result_class.tolist()
698
+ if len(outputs_class) == 0:
699
+ prediction["classes"] = []
700
+ else:
701
+ result_class = torch.argmax(outputs_class, dim=1)
702
+ prediction["classes"] = result_class.tolist()
677
703
  else:
678
704
  prediction["classes"] = []
679
705
  if self._remove_padding:
@@ -788,13 +814,20 @@ class TFPredictor:
788
814
  )
789
815
 
790
816
  if outputs_coord is not None:
791
- bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
792
- prediction["bboxes"] = bbox_pred.tolist()
817
+ if len(outputs_coord) == 0:
818
+ prediction["bboxes"] = []
819
+ else:
820
+ bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
821
+ prediction["bboxes"] = bbox_pred.tolist()
793
822
  else:
794
823
  prediction["bboxes"] = []
824
+
795
825
  if outputs_class is not None:
796
- result_class = torch.argmax(outputs_class, dim=1)
797
- prediction["classes"] = result_class.tolist()
826
+ if len(outputs_class) == 0:
827
+ prediction["classes"] = []
828
+ else:
829
+ result_class = torch.argmax(outputs_class, dim=1)
830
+ prediction["classes"] = result_class.tolist()
798
831
  else:
799
832
  prediction["classes"] = []
800
833
  if self._remove_padding:
@@ -308,8 +308,12 @@ class TableModel04_rs(BaseModel, nn.Module):
308
308
 
309
309
  if len(outputs_coord1) > 0:
310
310
  outputs_coord1 = torch.stack(outputs_coord1)
311
+ else:
312
+ outputs_coord1 = torch.empty(0)
311
313
  if len(outputs_class1) > 0:
312
314
  outputs_class1 = torch.stack(outputs_class1)
315
+ else:
316
+ outputs_class1 = torch.empty(0)
313
317
 
314
318
  outputs_class = outputs_class1
315
319
  outputs_coord = outputs_coord1
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "1.2.0" # DO NOT EDIT, updated automatically
3
+ version = "1.3.0" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -38,6 +38,7 @@ Pillow = "^10.0.0"
38
38
  tqdm = "^4.64.0"
39
39
  mean_average_precision = "^2021.4.26.0"
40
40
  opencv-python-headless = { version = "^4.9.0.80" }
41
+ huggingface_hub = ">=0.23,<1"
41
42
 
42
43
  [tool.poetry.dev-dependencies]
43
44
  black = {extras = ["jupyter"], version = "^24.4.2"}