docling-ibm-models 3.4.3__tar.gz → 3.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/PKG-INFO +27 -79
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/README.md +1 -48
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt.py +2 -2
- docling_ibm_models-3.4.4/docling_ibm_models.egg-info/PKG-INFO +129 -0
- docling_ibm_models-3.4.4/docling_ibm_models.egg-info/SOURCES.txt +50 -0
- docling_ibm_models-3.4.4/docling_ibm_models.egg-info/dependency_links.txt +1 -0
- docling_ibm_models-3.4.4/docling_ibm_models.egg-info/requires.txt +12 -0
- docling_ibm_models-3.4.4/docling_ibm_models.egg-info/top_level.txt +1 -0
- docling_ibm_models-3.4.4/pyproject.toml +105 -0
- docling_ibm_models-3.4.4/setup.cfg +4 -0
- docling_ibm_models-3.4.4/tests/test_code_formula_predictor.py +141 -0
- docling_ibm_models-3.4.4/tests/test_common.py +89 -0
- docling_ibm_models-3.4.4/tests/test_document_figure_classifier.py +98 -0
- docling_ibm_models-3.4.4/tests/test_layout_predictor.py +110 -0
- docling_ibm_models-3.4.4/tests/test_reading_order.py +272 -0
- docling_ibm_models-3.4.4/tests/test_tf_predictor.py +580 -0
- docling_ibm_models-3.4.3/pyproject.toml +0 -123
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/LICENSE +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/code_formula_predictor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/py.typed +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/reading_order_rb.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/common.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_predictor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/otsl.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/settings.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
- {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,45 +1,41 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: docling-ibm-models
|
3
|
-
Version: 3.4.
|
3
|
+
Version: 3.4.4
|
4
4
|
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
|
-
|
5
|
+
Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
|
8
|
+
Project-URL: repository, https://github.com/docling-project/docling-ibm-models
|
9
|
+
Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
|
10
|
+
Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
|
6
11
|
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
7
|
-
|
8
|
-
|
9
|
-
Requires-Python: >=3.9,<4.0
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
10
14
|
Classifier: Development Status :: 5 - Production/Stable
|
11
15
|
Classifier: Intended Audience :: Developers
|
12
16
|
Classifier: Intended Audience :: Science/Research
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
14
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
15
|
-
Classifier: Operating System :: POSIX :: Linux
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
21
|
-
Classifier: Programming Language :: Python :: 3.13
|
22
17
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
23
|
-
|
24
|
-
Requires-
|
25
|
-
Requires-Dist: huggingface_hub (>=0.23,<1)
|
26
|
-
Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
|
27
|
-
Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
|
28
|
-
Requires-Dist: numpy (>=1.24.4,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
29
|
-
Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
|
30
|
-
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
31
|
-
Requires-Dist: safetensors[torch] (>=0.4.3,<1)
|
32
|
-
Requires-Dist: torch (>=2.2.2,<3.0.0)
|
33
|
-
Requires-Dist: torchvision (>=0,<1)
|
34
|
-
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
35
|
-
Requires-Dist: transformers (>=4.42.0,<4.43.0) ; python_version < "3.13" and sys_platform == "darwin" and platform_machine == "x86_64"
|
36
|
-
Requires-Dist: transformers (>=4.42.0,<5.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
|
37
|
-
Requires-Dist: transformers (>=4.47.0,<5.0.0) ; python_version >= "3.13" and (sys_platform != "darwin" or platform_machine != "x86_64")
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
19
|
+
Requires-Python: <4.0,>=3.9
|
38
20
|
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: torch<3.0.0,>=2.2.2
|
23
|
+
Requires-Dist: torchvision<1,>=0
|
24
|
+
Requires-Dist: jsonlines<4.0.0,>=3.1.0
|
25
|
+
Requires-Dist: Pillow<12.0.0,>=10.0.0
|
26
|
+
Requires-Dist: tqdm<5.0.0,>=4.64.0
|
27
|
+
Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
|
28
|
+
Requires-Dist: huggingface_hub<1,>=0.23
|
29
|
+
Requires-Dist: safetensors[torch]<1,>=0.4.3
|
30
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
31
|
+
Requires-Dist: docling-core<3.0.0,>=2.19.0
|
32
|
+
Requires-Dist: transformers<5.0.0,>=4.42.0
|
33
|
+
Requires-Dist: numpy<3.0.0,>=1.24.4
|
34
|
+
Dynamic: license-file
|
39
35
|
|
40
36
|
[](https://pypi.org/project/docling-ibm-models/)
|
41
37
|
[](https://pypi.org/project/docling-ibm-models/)
|
42
|
-
[](https://github.com/astral-sh/uv)
|
43
39
|
[](https://github.com/psf/black)
|
44
40
|
[](https://pycqa.github.io/isort/)
|
45
41
|
[](https://github.com/pre-commit/pre-commit)
|
@@ -54,53 +50,6 @@ AI modules to support the Docling PDF document conversion project.
|
|
54
50
|
- Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
|
55
51
|
|
56
52
|
|
57
|
-
## Installation Instructions
|
58
|
-
|
59
|
-
### MacOS / Linux
|
60
|
-
|
61
|
-
To install `poetry` locally, use either `pip` or `homebrew`.
|
62
|
-
|
63
|
-
To install `poetry` on a docker container, do the following:
|
64
|
-
```
|
65
|
-
ENV POETRY_NO_INTERACTION=1 \
|
66
|
-
POETRY_VIRTUALENVS_CREATE=false
|
67
|
-
|
68
|
-
# Install poetry
|
69
|
-
RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
|
70
|
-
&& python install-poetry.py \
|
71
|
-
&& poetry --version \
|
72
|
-
&& rm install-poetry.py
|
73
|
-
```
|
74
|
-
|
75
|
-
To install and run the package, simply set up a poetry environment
|
76
|
-
|
77
|
-
```
|
78
|
-
poetry env use $(which python3.10)
|
79
|
-
poetry shell
|
80
|
-
```
|
81
|
-
|
82
|
-
and install all the dependencies,
|
83
|
-
|
84
|
-
```
|
85
|
-
poetry install # this will only install the deps from the poetry.lock
|
86
|
-
|
87
|
-
poetry install --no-dev # this will skip installing dev dependencies
|
88
|
-
```
|
89
|
-
|
90
|
-
To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
|
91
|
-
```
|
92
|
-
poetry update
|
93
|
-
```
|
94
|
-
|
95
|
-
#### MacOS Intel
|
96
|
-
|
97
|
-
When in development mode on MacOS with Intel chips, one can use compatible dependencies with
|
98
|
-
|
99
|
-
```console
|
100
|
-
poetry update --with mac_intel
|
101
|
-
```
|
102
|
-
|
103
|
-
|
104
53
|
## Pipeline Overview
|
105
54
|

|
106
55
|
|
@@ -178,4 +127,3 @@ e.g.
|
|
178
127
|
```
|
179
128
|
python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
|
180
129
|
```
|
181
|
-
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[](https://pypi.org/project/docling-ibm-models/)
|
2
2
|
[](https://pypi.org/project/docling-ibm-models/)
|
3
|
-
[](https://github.com/astral-sh/uv)
|
4
4
|
[](https://github.com/psf/black)
|
5
5
|
[](https://pycqa.github.io/isort/)
|
6
6
|
[](https://github.com/pre-commit/pre-commit)
|
@@ -15,53 +15,6 @@ AI modules to support the Docling PDF document conversion project.
|
|
15
15
|
- Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
|
16
16
|
|
17
17
|
|
18
|
-
## Installation Instructions
|
19
|
-
|
20
|
-
### MacOS / Linux
|
21
|
-
|
22
|
-
To install `poetry` locally, use either `pip` or `homebrew`.
|
23
|
-
|
24
|
-
To install `poetry` on a docker container, do the following:
|
25
|
-
```
|
26
|
-
ENV POETRY_NO_INTERACTION=1 \
|
27
|
-
POETRY_VIRTUALENVS_CREATE=false
|
28
|
-
|
29
|
-
# Install poetry
|
30
|
-
RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
|
31
|
-
&& python install-poetry.py \
|
32
|
-
&& poetry --version \
|
33
|
-
&& rm install-poetry.py
|
34
|
-
```
|
35
|
-
|
36
|
-
To install and run the package, simply set up a poetry environment
|
37
|
-
|
38
|
-
```
|
39
|
-
poetry env use $(which python3.10)
|
40
|
-
poetry shell
|
41
|
-
```
|
42
|
-
|
43
|
-
and install all the dependencies,
|
44
|
-
|
45
|
-
```
|
46
|
-
poetry install # this will only install the deps from the poetry.lock
|
47
|
-
|
48
|
-
poetry install --no-dev # this will skip installing dev dependencies
|
49
|
-
```
|
50
|
-
|
51
|
-
To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
|
52
|
-
```
|
53
|
-
poetry update
|
54
|
-
```
|
55
|
-
|
56
|
-
#### MacOS Intel
|
57
|
-
|
58
|
-
When in development mode on MacOS with Intel chips, one can use compatible dependencies with
|
59
|
-
|
60
|
-
```console
|
61
|
-
poetry update --with mac_intel
|
62
|
-
```
|
63
|
-
|
64
|
-
|
65
18
|
## Pipeline Overview
|
66
19
|

|
67
20
|
|
@@ -52,7 +52,7 @@ class SamOptConfig(OPTConfig):
|
|
52
52
|
|
53
53
|
|
54
54
|
class SamOPTModel(OPTModel):
|
55
|
-
config_class = SamOptConfig
|
55
|
+
config_class = SamOptConfig # type: ignore
|
56
56
|
|
57
57
|
def __init__(self, config: OPTConfig):
|
58
58
|
super(SamOPTModel, self).__init__(config)
|
@@ -131,7 +131,7 @@ class SamOPTModel(OPTModel):
|
|
131
131
|
|
132
132
|
|
133
133
|
class SamOPTForCausalLM(OPTForCausalLM):
|
134
|
-
config_class = SamOptConfig
|
134
|
+
config_class = SamOptConfig # type: ignore
|
135
135
|
|
136
136
|
def __init__(self, config):
|
137
137
|
super(OPTForCausalLM, self).__init__(config)
|
@@ -0,0 +1,129 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: docling-ibm-models
|
3
|
+
Version: 3.4.4
|
4
|
+
Summary: This package contains the AI models used by the Docling PDF conversion package
|
5
|
+
Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
|
8
|
+
Project-URL: repository, https://github.com/docling-project/docling-ibm-models
|
9
|
+
Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
|
10
|
+
Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
|
11
|
+
Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
|
12
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
14
|
+
Classifier: Development Status :: 5 - Production/Stable
|
15
|
+
Classifier: Intended Audience :: Developers
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
19
|
+
Requires-Python: <4.0,>=3.9
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: torch<3.0.0,>=2.2.2
|
23
|
+
Requires-Dist: torchvision<1,>=0
|
24
|
+
Requires-Dist: jsonlines<4.0.0,>=3.1.0
|
25
|
+
Requires-Dist: Pillow<12.0.0,>=10.0.0
|
26
|
+
Requires-Dist: tqdm<5.0.0,>=4.64.0
|
27
|
+
Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
|
28
|
+
Requires-Dist: huggingface_hub<1,>=0.23
|
29
|
+
Requires-Dist: safetensors[torch]<1,>=0.4.3
|
30
|
+
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
31
|
+
Requires-Dist: docling-core<3.0.0,>=2.19.0
|
32
|
+
Requires-Dist: transformers<5.0.0,>=4.42.0
|
33
|
+
Requires-Dist: numpy<3.0.0,>=1.24.4
|
34
|
+
Dynamic: license-file
|
35
|
+
|
36
|
+
[](https://pypi.org/project/docling-ibm-models/)
|
37
|
+
[](https://pypi.org/project/docling-ibm-models/)
|
38
|
+
[](https://github.com/astral-sh/uv)
|
39
|
+
[](https://github.com/psf/black)
|
40
|
+
[](https://pycqa.github.io/isort/)
|
41
|
+
[](https://github.com/pre-commit/pre-commit)
|
42
|
+
[](https://huggingface.co/ds4sd/docling-models/)
|
43
|
+
[](https://opensource.org/licenses/MIT)
|
44
|
+
|
45
|
+
# Docling IBM models
|
46
|
+
|
47
|
+
AI modules to support the Docling PDF document conversion project.
|
48
|
+
|
49
|
+
- TableFormer is an AI module that recognizes the structure of a table and the bounding boxes of the table content.
|
50
|
+
- Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
|
51
|
+
|
52
|
+
|
53
|
+
## Pipeline Overview
|
54
|
+

|
55
|
+
|
56
|
+
## Datasets
|
57
|
+
Below we list datasets used with their description, source, and ***"TableFormer Format"***. The TableFormer Format is our processed version of the version of the original format to work with the dataloader out of the box, and to augment the dataset when necassary to add missing groundtruth (bounding boxes for empty cells).
|
58
|
+
|
59
|
+
|
60
|
+
| Name | Description | URL |
|
61
|
+
| ------------- |:-------------:|----|
|
62
|
+
| PubTabNet | PubTabNet contains heterogeneous tables in both image and HTML format, 516k+ tables in the PubMed Central Open Access Subset | [PubTabNet](https://developer.ibm.com/exchanges/data/all/pubtabnet/) |
|
63
|
+
| FinTabNet| A dataset for Financial Report Tables with corresponding ground truth location and structure. 112k+ tables included.| [FinTabNet](https://developer.ibm.com/exchanges/data/all/fintabnet/) |
|
64
|
+
| TableBank| TableBank is a new image-based table detection and recognition dataset built with novel weak supervision from Word and Latex documents on the internet, contains 417K high-quality labeled tables. | [TableBank](https://github.com/doc-analysis/TableBank) |
|
65
|
+
|
66
|
+
## Models
|
67
|
+
|
68
|
+
### TableModel04:
|
69
|
+

|
70
|
+
**TableModel04rs (OTSL)** is our SOTA method that using transformers in order to predict table structure and bounding box.
|
71
|
+
|
72
|
+
|
73
|
+
## Configuration file
|
74
|
+
|
75
|
+
Example configuration can be found inside test `tests/test_tf_predictor.py`
|
76
|
+
These are the main sections of the configuration file:
|
77
|
+
|
78
|
+
- `dataset`: The directory for prepared data and the parameters used during the data loading.
|
79
|
+
- `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
|
80
|
+
trained checkpoint files.
|
81
|
+
- `train`: Parameters for the training of the model.
|
82
|
+
- `predict`: Parameters for the evaluation of the model.
|
83
|
+
- `dataset_wordmap`: Very important part that contains token maps.
|
84
|
+
|
85
|
+
|
86
|
+
## Model weights
|
87
|
+
|
88
|
+
You can download the model weights and config files from the links:
|
89
|
+
|
90
|
+
- [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
|
91
|
+
- [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
|
92
|
+
|
93
|
+
|
94
|
+
## Inference Tests
|
95
|
+
|
96
|
+
You can run the inference tests for the models with:
|
97
|
+
|
98
|
+
```
|
99
|
+
python -m pytest tests/
|
100
|
+
```
|
101
|
+
|
102
|
+
This will also generate prediction and matching visualizations that can be found here:
|
103
|
+
`tests\test_data\viz\`
|
104
|
+
|
105
|
+
Visualization outlines:
|
106
|
+
- `Light Pink`: border of recognized table
|
107
|
+
- `Grey`: OCR cells
|
108
|
+
- `Green`: prediction bboxes
|
109
|
+
- `Red`: OCR cells matched with prediction
|
110
|
+
- `Blue`: Post processed, match
|
111
|
+
- `Bold Blue`: column header
|
112
|
+
- `Bold Magenta`: row header
|
113
|
+
- `Bold Brown`: section row (if table have one)
|
114
|
+
|
115
|
+
|
116
|
+
## Demo
|
117
|
+
|
118
|
+
A demo application allows to apply the `LayoutPredictor` on a directory `<input_dir>` that contains
|
119
|
+
`png` images and visualize the predictions inside another directory `<viz_dir>`.
|
120
|
+
|
121
|
+
First download the model weights (see above), then run:
|
122
|
+
```
|
123
|
+
python -m demo.demo_layout_predictor -i <input_dir> -v <viz_dir>
|
124
|
+
```
|
125
|
+
|
126
|
+
e.g.
|
127
|
+
```
|
128
|
+
python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
|
129
|
+
```
|
@@ -0,0 +1,50 @@
|
|
1
|
+
LICENSE
|
2
|
+
README.md
|
3
|
+
pyproject.toml
|
4
|
+
docling_ibm_models/__init__.py
|
5
|
+
docling_ibm_models/py.typed
|
6
|
+
docling_ibm_models.egg-info/PKG-INFO
|
7
|
+
docling_ibm_models.egg-info/SOURCES.txt
|
8
|
+
docling_ibm_models.egg-info/dependency_links.txt
|
9
|
+
docling_ibm_models.egg-info/requires.txt
|
10
|
+
docling_ibm_models.egg-info/top_level.txt
|
11
|
+
docling_ibm_models/code_formula_model/__init__.py
|
12
|
+
docling_ibm_models/code_formula_model/code_formula_predictor.py
|
13
|
+
docling_ibm_models/code_formula_model/models/__init__.py
|
14
|
+
docling_ibm_models/code_formula_model/models/sam.py
|
15
|
+
docling_ibm_models/code_formula_model/models/sam_opt.py
|
16
|
+
docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py
|
17
|
+
docling_ibm_models/document_figure_classifier_model/__init__.py
|
18
|
+
docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py
|
19
|
+
docling_ibm_models/layoutmodel/__init__.py
|
20
|
+
docling_ibm_models/layoutmodel/layout_predictor.py
|
21
|
+
docling_ibm_models/reading_order/__init__.py
|
22
|
+
docling_ibm_models/reading_order/reading_order_rb.py
|
23
|
+
docling_ibm_models/tableformer/__init__.py
|
24
|
+
docling_ibm_models/tableformer/common.py
|
25
|
+
docling_ibm_models/tableformer/otsl.py
|
26
|
+
docling_ibm_models/tableformer/settings.py
|
27
|
+
docling_ibm_models/tableformer/data_management/__init__.py
|
28
|
+
docling_ibm_models/tableformer/data_management/functional.py
|
29
|
+
docling_ibm_models/tableformer/data_management/matching_post_processor.py
|
30
|
+
docling_ibm_models/tableformer/data_management/tf_cell_matcher.py
|
31
|
+
docling_ibm_models/tableformer/data_management/tf_predictor.py
|
32
|
+
docling_ibm_models/tableformer/data_management/transforms.py
|
33
|
+
docling_ibm_models/tableformer/models/__init__.py
|
34
|
+
docling_ibm_models/tableformer/models/common/__init__.py
|
35
|
+
docling_ibm_models/tableformer/models/common/base_model.py
|
36
|
+
docling_ibm_models/tableformer/models/table04_rs/__init__.py
|
37
|
+
docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py
|
38
|
+
docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py
|
39
|
+
docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py
|
40
|
+
docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py
|
41
|
+
docling_ibm_models/tableformer/utils/__init__.py
|
42
|
+
docling_ibm_models/tableformer/utils/app_profiler.py
|
43
|
+
docling_ibm_models/tableformer/utils/mem_monitor.py
|
44
|
+
docling_ibm_models/tableformer/utils/utils.py
|
45
|
+
tests/test_code_formula_predictor.py
|
46
|
+
tests/test_common.py
|
47
|
+
tests/test_document_figure_classifier.py
|
48
|
+
tests/test_layout_predictor.py
|
49
|
+
tests/test_reading_order.py
|
50
|
+
tests/test_tf_predictor.py
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
torch<3.0.0,>=2.2.2
|
2
|
+
torchvision<1,>=0
|
3
|
+
jsonlines<4.0.0,>=3.1.0
|
4
|
+
Pillow<12.0.0,>=10.0.0
|
5
|
+
tqdm<5.0.0,>=4.64.0
|
6
|
+
opencv-python-headless<5.0.0.0,>=4.6.0.66
|
7
|
+
huggingface_hub<1,>=0.23
|
8
|
+
safetensors[torch]<1,>=0.4.3
|
9
|
+
pydantic<3.0.0,>=2.0.0
|
10
|
+
docling-core<3.0.0,>=2.19.0
|
11
|
+
transformers<5.0.0,>=4.42.0
|
12
|
+
numpy<3.0.0,>=1.24.4
|
@@ -0,0 +1 @@
|
|
1
|
+
docling_ibm_models
|
@@ -0,0 +1,105 @@
|
|
1
|
+
[project]
|
2
|
+
name = "docling-ibm-models"
|
3
|
+
version = "3.4.4" # DO NOT EDIT, updated automatically
|
4
|
+
description = "This package contains the AI models used by the Docling PDF conversion package"
|
5
|
+
license = "MIT"
|
6
|
+
keywords = ["docling", "convert", "document", "pdf", "layout model", "segmentation", "table structure", "table former"]
|
7
|
+
readme = "README.md"
|
8
|
+
authors = [
|
9
|
+
{ name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
|
10
|
+
{ name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
|
11
|
+
{ name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
|
12
|
+
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
13
|
+
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
14
|
+
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
15
|
+
]
|
16
|
+
classifiers = [
|
17
|
+
"Operating System :: MacOS :: MacOS X",
|
18
|
+
"Operating System :: POSIX :: Linux",
|
19
|
+
"Development Status :: 5 - Production/Stable",
|
20
|
+
"Intended Audience :: Developers",
|
21
|
+
"Intended Audience :: Science/Research",
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
23
|
+
"Programming Language :: Python :: 3",
|
24
|
+
]
|
25
|
+
requires-python = '>=3.9,<4.0'
|
26
|
+
dependencies = [
|
27
|
+
'torch (>=2.2.2,<3.0.0)',
|
28
|
+
'torchvision (>=0,<1)',
|
29
|
+
'jsonlines (>=3.1.0,<4.0.0)',
|
30
|
+
'Pillow (>=10.0.0,<12.0.0)',
|
31
|
+
'tqdm (>=4.64.0,<5.0.0)',
|
32
|
+
'opencv-python-headless (>=4.6.0.66,<5.0.0.0)',
|
33
|
+
'huggingface_hub (>=0.23,<1)',
|
34
|
+
'safetensors[torch] (>=0.4.3,<1)',
|
35
|
+
'pydantic (>=2.0.0,<3.0.0)',
|
36
|
+
'docling-core (>=2.19.0,<3.0.0)',
|
37
|
+
'transformers (>=4.42.0,<5.0.0)',
|
38
|
+
'numpy (>=1.24.4,<3.0.0)',
|
39
|
+
]
|
40
|
+
|
41
|
+
[project.urls]
|
42
|
+
homepage = "https://github.com/docling-project/docling-ibm-models"
|
43
|
+
repository = "https://github.com/docling-project/docling-ibm-models"
|
44
|
+
issues = "https://github.com/docling-project/docling-ibm-models/issues"
|
45
|
+
changelog = "https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md"
|
46
|
+
|
47
|
+
[dependency-groups]
|
48
|
+
dev = [
|
49
|
+
"pre-commit~=3.7",
|
50
|
+
"mypy~=1.10",
|
51
|
+
"black~=24.4",
|
52
|
+
"isort~=5.10",
|
53
|
+
"autoflake~=2.0",
|
54
|
+
"flake8~=7.1",
|
55
|
+
"flake8-docstrings~=1.6",
|
56
|
+
"types-setuptools~=70.3",
|
57
|
+
"pandas-stubs~=2.1",
|
58
|
+
"types-requests~=2.31",
|
59
|
+
"coverage~=7.6",
|
60
|
+
"pytest~=8.3",
|
61
|
+
"pytest-cov>=6.1.1",
|
62
|
+
"pytest-dependency~=0.6",
|
63
|
+
"pytest-xdist~=3.3",
|
64
|
+
"python-semantic-release~=7.32",
|
65
|
+
"datasets~=3.2",
|
66
|
+
]
|
67
|
+
|
68
|
+
[tool.uv]
|
69
|
+
package = true
|
70
|
+
|
71
|
+
[tool.setuptools.packages.find]
|
72
|
+
include = ["docling_ibm_models*"]
|
73
|
+
|
74
|
+
[tool.black]
|
75
|
+
line-length = 88
|
76
|
+
target-version = ["py39"]
|
77
|
+
include = '\.pyi?$'
|
78
|
+
|
79
|
+
[tool.isort]
|
80
|
+
profile = "black"
|
81
|
+
line_length = 88
|
82
|
+
py_version = 39
|
83
|
+
|
84
|
+
[tool.semantic_release]
|
85
|
+
# for default values check:
|
86
|
+
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
87
|
+
|
88
|
+
version_source = "tag_only"
|
89
|
+
branch = "main"
|
90
|
+
|
91
|
+
# configure types which should trigger minor and patch version bumps respectively
|
92
|
+
# (note that they must be a subset of the configured allowed types):
|
93
|
+
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
94
|
+
parser_angular_minor_types = "feat"
|
95
|
+
parser_angular_patch_types = "fix,perf"
|
96
|
+
|
97
|
+
|
98
|
+
[tool.mypy]
|
99
|
+
pretty = true
|
100
|
+
no_implicit_optional = true
|
101
|
+
python_version = "3.10"
|
102
|
+
|
103
|
+
[[tool.mypy.overrides]]
|
104
|
+
module = ["torchvision.*", "transformers.*"]
|
105
|
+
ignore_missing_imports = true
|
@@ -0,0 +1,141 @@
|
|
1
|
+
#
|
2
|
+
# Copyright IBM Corp. 2024 - 2024
|
3
|
+
# SPDX-License-Identifier: MIT
|
4
|
+
#
|
5
|
+
import os
|
6
|
+
import numpy as np
|
7
|
+
import pytest
|
8
|
+
from PIL import Image
|
9
|
+
|
10
|
+
from docling_ibm_models.code_formula_model.code_formula_predictor import CodeFormulaPredictor
|
11
|
+
|
12
|
+
from huggingface_hub import snapshot_download
|
13
|
+
|
14
|
+
@pytest.fixture(scope="module")
|
15
|
+
def init() -> dict:
|
16
|
+
r"""
|
17
|
+
Initialize the testing environment
|
18
|
+
"""
|
19
|
+
init = {
|
20
|
+
"num_threads": 1,
|
21
|
+
"test_imgs": [
|
22
|
+
{
|
23
|
+
"label": "code",
|
24
|
+
"image_path": "tests/test_data/code_formula/images/code.png",
|
25
|
+
"gt_path": "tests/test_data/code_formula/gt/code.txt",
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"label": "formula",
|
29
|
+
"image_path": "tests/test_data/code_formula/images/formula.png",
|
30
|
+
"gt_path": "tests/test_data/code_formula/gt/formula.txt",
|
31
|
+
},
|
32
|
+
],
|
33
|
+
"info": {
|
34
|
+
"device": "auto",
|
35
|
+
"temperature": 0,
|
36
|
+
},
|
37
|
+
}
|
38
|
+
|
39
|
+
# Download models from HF
|
40
|
+
artifact_path = snapshot_download(repo_id="ds4sd/CodeFormula", revision="v1.0.1")
|
41
|
+
|
42
|
+
init["artifact_path"] = artifact_path
|
43
|
+
|
44
|
+
return init
|
45
|
+
|
46
|
+
|
47
|
+
def test_code_formula_predictor(init: dict):
|
48
|
+
r"""
|
49
|
+
Unit test for the CodeFormulaPredictor
|
50
|
+
"""
|
51
|
+
device = "cpu"
|
52
|
+
num_threads = 2
|
53
|
+
|
54
|
+
# Initialize LayoutPredictor
|
55
|
+
code_formula_predictor = CodeFormulaPredictor(
|
56
|
+
init["artifact_path"], device=device, num_threads=num_threads
|
57
|
+
)
|
58
|
+
|
59
|
+
# Check info
|
60
|
+
info = code_formula_predictor.info()
|
61
|
+
assert info["device"] == device, "Wronly set device"
|
62
|
+
assert info["num_threads"] == num_threads, "Wronly set number of threads"
|
63
|
+
|
64
|
+
# Unsupported input image
|
65
|
+
is_exception = False
|
66
|
+
try:
|
67
|
+
for _ in code_formula_predictor.predict(["wrong"], ['label']):
|
68
|
+
pass
|
69
|
+
except TypeError:
|
70
|
+
is_exception = True
|
71
|
+
assert is_exception
|
72
|
+
|
73
|
+
# wrong type for temperature
|
74
|
+
is_exception = False
|
75
|
+
try:
|
76
|
+
dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
|
77
|
+
for _ in code_formula_predictor.predict([dummy_image], ['label'], "0.1"):
|
78
|
+
pass
|
79
|
+
except Exception:
|
80
|
+
is_exception = True
|
81
|
+
assert is_exception
|
82
|
+
|
83
|
+
# wrong value for temperature
|
84
|
+
is_exception = False
|
85
|
+
try:
|
86
|
+
dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
|
87
|
+
for _ in code_formula_predictor.predict([dummy_image], ['label'], -0.1):
|
88
|
+
pass
|
89
|
+
except Exception:
|
90
|
+
is_exception = True
|
91
|
+
assert is_exception
|
92
|
+
|
93
|
+
# wrong value for temperature
|
94
|
+
is_exception = False
|
95
|
+
try:
|
96
|
+
dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
|
97
|
+
for _ in code_formula_predictor.predict([dummy_image], ["label"], None):
|
98
|
+
pass
|
99
|
+
except Exception:
|
100
|
+
is_exception = True
|
101
|
+
assert is_exception
|
102
|
+
|
103
|
+
# mistmatched number of images and labels
|
104
|
+
is_exception = False
|
105
|
+
try:
|
106
|
+
dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
|
107
|
+
for _ in code_formula_predictor.predict([dummy_image], ['label', 'label']):
|
108
|
+
pass
|
109
|
+
except Exception:
|
110
|
+
is_exception = True
|
111
|
+
assert is_exception
|
112
|
+
|
113
|
+
# Predict on test images, not batched
|
114
|
+
temperature = init['info']['temperature']
|
115
|
+
for d in init["test_imgs"]:
|
116
|
+
label = d['label']
|
117
|
+
img_path = d['image_path']
|
118
|
+
gt_path = d['gt_path']
|
119
|
+
|
120
|
+
with Image.open(img_path) as img, open(gt_path, 'r') as gt_fp:
|
121
|
+
gt = gt_fp.read()
|
122
|
+
|
123
|
+
output = code_formula_predictor.predict([img], [label], temperature)
|
124
|
+
output = output[0]
|
125
|
+
|
126
|
+
assert output == gt
|
127
|
+
|
128
|
+
# Load images as numpy arrays
|
129
|
+
np_arr = np.asarray(img)
|
130
|
+
output = code_formula_predictor.predict([np_arr], [label], temperature)
|
131
|
+
output = output[0]
|
132
|
+
|
133
|
+
assert output == gt
|
134
|
+
|
135
|
+
# Predict on test images, batched
|
136
|
+
labels = [d['label'] for d in init["test_imgs"]]
|
137
|
+
images = [Image.open(d['image_path']) for d in init["test_imgs"]]
|
138
|
+
gts = [open(d['gt_path'], 'r').read() for d in init["test_imgs"]]
|
139
|
+
|
140
|
+
outputs = code_formula_predictor.predict(images, labels, temperature)
|
141
|
+
assert outputs == gts
|