docling-ibm-models 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/PKG-INFO +5 -7
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/README.md +3 -6
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +9 -6
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py +92 -59
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +4 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/pyproject.toml +2 -1
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/LICENSE +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/common.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/otsl.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/settings.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/test_dataset_cache.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/test_prepare_image.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
- {docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.0
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
5
|
License: MIT
|
6
6
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
20
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
21
21
|
Requires-Dist: Pillow (>=10.0.0,<11.0.0)
|
22
|
+
Requires-Dist: huggingface_hub (>=0.23,<1)
|
22
23
|
Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
|
23
24
|
Requires-Dist: lxml (>=4.9.1,<5.0.0)
|
24
25
|
Requires-Dist: mean_average_precision (>=2021.4.26.0,<2022.0.0.0)
|
@@ -110,7 +111,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
|
|
110
111
|
|
111
112
|
## Configuration file
|
112
113
|
|
113
|
-
Example configuration can be
|
114
|
+
Example configuration can be found inside test `tests/test_tf_predictor.py`
|
114
115
|
These are the main sections of the configuration file:
|
115
116
|
|
116
117
|
- `dataset`: The directory for prepared data and the parameters used during the data loading.
|
@@ -128,16 +129,13 @@ You can download the model weights and config files from the links:
|
|
128
129
|
- [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
|
129
130
|
- [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
|
130
131
|
|
131
|
-
Place the downloaded files into `tests/test_data/model_artifacts/` directory.
|
132
|
-
|
133
132
|
|
134
133
|
## Inference Tests
|
135
134
|
|
136
|
-
|
135
|
+
You can run the inference tests for the models with:
|
137
136
|
|
138
|
-
First download the model weights (see above), then run:
|
139
137
|
```
|
140
|
-
|
138
|
+
python -m pytest tests/
|
141
139
|
```
|
142
140
|
|
143
141
|
This will also generate prediction and matching visualizations that can be found here:
|
@@ -76,7 +76,7 @@ Below we list datasets used with their description, source, and ***"TableFormer
|
|
76
76
|
|
77
77
|
## Configuration file
|
78
78
|
|
79
|
-
Example configuration can be
|
79
|
+
Example configuration can be found inside test `tests/test_tf_predictor.py`
|
80
80
|
These are the main sections of the configuration file:
|
81
81
|
|
82
82
|
- `dataset`: The directory for prepared data and the parameters used during the data loading.
|
@@ -94,16 +94,13 @@ You can download the model weights and config files from the links:
|
|
94
94
|
- [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
|
95
95
|
- [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
|
96
96
|
|
97
|
-
Place the downloaded files into `tests/test_data/model_artifacts/` directory.
|
98
|
-
|
99
97
|
|
100
98
|
## Inference Tests
|
101
99
|
|
102
|
-
|
100
|
+
You can run the inference tests for the models with:
|
103
101
|
|
104
|
-
First download the model weights (see above), then run:
|
105
102
|
```
|
106
|
-
|
103
|
+
python -m pytest tests/
|
107
104
|
```
|
108
105
|
|
109
106
|
This will also generate prediction and matching visualizations that can be found here:
|
@@ -129,12 +129,15 @@ class CellMatcher:
|
|
129
129
|
pdf_cells = copy.deepcopy(iocr_page["tokens"])
|
130
130
|
if len(pdf_cells) > 0:
|
131
131
|
for word in pdf_cells:
|
132
|
-
word["bbox"]
|
133
|
-
|
134
|
-
|
135
|
-
word["bbox"][
|
136
|
-
|
137
|
-
|
132
|
+
if isinstance(word["bbox"], list):
|
133
|
+
continue
|
134
|
+
elif isinstance(word["bbox"], dict):
|
135
|
+
word["bbox"] = [
|
136
|
+
word["bbox"]["l"],
|
137
|
+
word["bbox"]["t"],
|
138
|
+
word["bbox"]["r"],
|
139
|
+
word["bbox"]["b"],
|
140
|
+
]
|
138
141
|
table_bboxes = prediction["bboxes"]
|
139
142
|
table_classes = prediction["classes"]
|
140
143
|
# BBOXES transformed...
|
@@ -524,7 +524,12 @@ class TFPredictor:
|
|
524
524
|
return resized, sf
|
525
525
|
|
526
526
|
def multi_table_predict(
|
527
|
-
self,
|
527
|
+
self,
|
528
|
+
iocr_page,
|
529
|
+
table_bboxes,
|
530
|
+
do_matching=True,
|
531
|
+
correct_overlapping_cells=False,
|
532
|
+
sort_row_col_indexes=True,
|
528
533
|
):
|
529
534
|
multi_tf_output = []
|
530
535
|
page_image = iocr_page["image"]
|
@@ -563,56 +568,70 @@ class TFPredictor:
|
|
563
568
|
# PROCESS PREDICTED RESULTS, TO TURN PREDICTED COL/ROW IDs into Indexes
|
564
569
|
# Indexes should be in increasing order, without gaps
|
565
570
|
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
571
|
+
if sort_row_col_indexes:
|
572
|
+
# Fix col/row indexes
|
573
|
+
# Arranges all col/row indexes sequentially without gaps using input IDs
|
574
|
+
|
575
|
+
indexing_start_cols = (
|
576
|
+
[]
|
577
|
+
) # Index of original start col IDs (not indexes)
|
578
|
+
indexing_end_cols = [] # Index of original end col IDs (not indexes)
|
579
|
+
indexing_start_rows = (
|
580
|
+
[]
|
581
|
+
) # Index of original start row IDs (not indexes)
|
582
|
+
indexing_end_rows = [] # Index of original end row IDs (not indexes)
|
583
|
+
|
584
|
+
# First, collect all possible predicted IDs, to be used as indexes
|
585
|
+
# ID's returned by Tableformer are sequential, but might contain gaps
|
586
|
+
for tf_response_cell in tf_responses:
|
587
|
+
start_col_offset_idx = tf_response_cell["start_col_offset_idx"]
|
588
|
+
end_col_offset_idx = tf_response_cell["end_col_offset_idx"]
|
589
|
+
start_row_offset_idx = tf_response_cell["start_row_offset_idx"]
|
590
|
+
end_row_offset_idx = tf_response_cell["end_row_offset_idx"]
|
591
|
+
|
592
|
+
# Collect all possible col/row IDs:
|
593
|
+
if start_col_offset_idx not in indexing_start_cols:
|
594
|
+
indexing_start_cols.append(start_col_offset_idx)
|
595
|
+
if end_col_offset_idx not in indexing_end_cols:
|
596
|
+
indexing_end_cols.append(end_col_offset_idx)
|
597
|
+
if start_row_offset_idx not in indexing_start_rows:
|
598
|
+
indexing_start_rows.append(start_row_offset_idx)
|
599
|
+
if end_row_offset_idx not in indexing_end_rows:
|
600
|
+
indexing_end_rows.append(end_row_offset_idx)
|
601
|
+
|
602
|
+
indexing_start_cols.sort()
|
603
|
+
indexing_end_cols.sort()
|
604
|
+
indexing_start_rows.sort()
|
605
|
+
indexing_end_rows.sort()
|
606
|
+
|
607
|
+
# After this - put actual indexes of IDs back into predicted structure...
|
608
|
+
for tf_response_cell in tf_responses:
|
609
|
+
tf_response_cell["start_col_offset_idx"] = (
|
610
|
+
indexing_start_cols.index(
|
611
|
+
tf_response_cell["start_col_offset_idx"]
|
612
|
+
)
|
613
|
+
)
|
614
|
+
tf_response_cell["end_col_offset_idx"] = (
|
615
|
+
tf_response_cell["start_col_offset_idx"]
|
616
|
+
+ tf_response_cell["col_span"]
|
617
|
+
)
|
618
|
+
tf_response_cell["start_row_offset_idx"] = (
|
619
|
+
indexing_start_rows.index(
|
620
|
+
tf_response_cell["start_row_offset_idx"]
|
621
|
+
)
|
622
|
+
)
|
623
|
+
tf_response_cell["end_row_offset_idx"] = (
|
624
|
+
tf_response_cell["start_row_offset_idx"]
|
625
|
+
+ tf_response_cell["row_span"]
|
626
|
+
)
|
627
|
+
# Counting matched cols/rows from actual indexes (and not ids)
|
628
|
+
predict_details["num_cols"] = len(indexing_end_cols)
|
629
|
+
predict_details["num_rows"] = len(indexing_end_rows)
|
630
|
+
else:
|
631
|
+
otsl_seq = predict_details["prediction"]["rs_seq"]
|
632
|
+
predict_details["num_cols"] = otsl_seq.index("nl")
|
633
|
+
predict_details["num_rows"] = otsl_seq.count("nl")
|
634
|
+
|
616
635
|
# Put results into multi_tf_output
|
617
636
|
multi_tf_output.append(
|
618
637
|
{"tf_responses": tf_responses, "predict_details": predict_details}
|
@@ -667,13 +686,20 @@ class TFPredictor:
|
|
667
686
|
)
|
668
687
|
|
669
688
|
if outputs_coord is not None:
|
670
|
-
|
671
|
-
|
689
|
+
if len(outputs_coord) == 0:
|
690
|
+
prediction["bboxes"] = []
|
691
|
+
else:
|
692
|
+
bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
|
693
|
+
prediction["bboxes"] = bbox_pred.tolist()
|
672
694
|
else:
|
673
695
|
prediction["bboxes"] = []
|
696
|
+
|
674
697
|
if outputs_class is not None:
|
675
|
-
|
676
|
-
|
698
|
+
if len(outputs_class) == 0:
|
699
|
+
prediction["classes"] = []
|
700
|
+
else:
|
701
|
+
result_class = torch.argmax(outputs_class, dim=1)
|
702
|
+
prediction["classes"] = result_class.tolist()
|
677
703
|
else:
|
678
704
|
prediction["classes"] = []
|
679
705
|
if self._remove_padding:
|
@@ -788,13 +814,20 @@ class TFPredictor:
|
|
788
814
|
)
|
789
815
|
|
790
816
|
if outputs_coord is not None:
|
791
|
-
|
792
|
-
|
817
|
+
if len(outputs_coord) == 0:
|
818
|
+
prediction["bboxes"] = []
|
819
|
+
else:
|
820
|
+
bbox_pred = u.box_cxcywh_to_xyxy(outputs_coord)
|
821
|
+
prediction["bboxes"] = bbox_pred.tolist()
|
793
822
|
else:
|
794
823
|
prediction["bboxes"] = []
|
824
|
+
|
795
825
|
if outputs_class is not None:
|
796
|
-
|
797
|
-
|
826
|
+
if len(outputs_class) == 0:
|
827
|
+
prediction["classes"] = []
|
828
|
+
else:
|
829
|
+
result_class = torch.argmax(outputs_class, dim=1)
|
830
|
+
prediction["classes"] = result_class.tolist()
|
798
831
|
else:
|
799
832
|
prediction["classes"] = []
|
800
833
|
if self._remove_padding:
|
@@ -308,8 +308,12 @@ class TableModel04_rs(BaseModel, nn.Module):
|
|
308
308
|
|
309
309
|
if len(outputs_coord1) > 0:
|
310
310
|
outputs_coord1 = torch.stack(outputs_coord1)
|
311
|
+
else:
|
312
|
+
outputs_coord1 = torch.empty(0)
|
311
313
|
if len(outputs_class1) > 0:
|
312
314
|
outputs_class1 = torch.stack(outputs_class1)
|
315
|
+
else:
|
316
|
+
outputs_class1 = torch.empty(0)
|
313
317
|
|
314
318
|
outputs_class = outputs_class1
|
315
319
|
outputs_coord = outputs_coord1
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling-ibm-models"
|
3
|
-
version = "1.
|
3
|
+
version = "1.3.0" # DO NOT EDIT, updated automatically
|
4
4
|
description = "This package contains the AI models used by the Docling PDF conversion package"
|
5
5
|
authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
@@ -38,6 +38,7 @@ Pillow = "^10.0.0"
|
|
38
38
|
tqdm = "^4.64.0"
|
39
39
|
mean_average_precision = "^2021.4.26.0"
|
40
40
|
opencv-python-headless = { version = "^4.9.0.80" }
|
41
|
+
huggingface_hub = ">=0.23,<1"
|
41
42
|
|
42
43
|
[tool.poetry.dev-dependencies]
|
43
44
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
File without changes
|
File without changes
|
{docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/__init__.py
RENAMED
File without changes
|
{docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/common.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/otsl.py
RENAMED
File without changes
|
{docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/settings.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{docling_ibm_models-1.2.0 → docling_ibm_models-1.3.0}/docling_ibm_models/tableformer/utils/utils.py
RENAMED
File without changes
|