docling-ibm-models 3.4.3__tar.gz → 3.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/PKG-INFO +27 -79
  2. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/README.md +1 -48
  3. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt.py +2 -2
  4. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/PKG-INFO +129 -0
  5. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/SOURCES.txt +50 -0
  6. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/dependency_links.txt +1 -0
  7. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/requires.txt +12 -0
  8. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/top_level.txt +1 -0
  9. docling_ibm_models-3.4.4/pyproject.toml +105 -0
  10. docling_ibm_models-3.4.4/setup.cfg +4 -0
  11. docling_ibm_models-3.4.4/tests/test_code_formula_predictor.py +141 -0
  12. docling_ibm_models-3.4.4/tests/test_common.py +89 -0
  13. docling_ibm_models-3.4.4/tests/test_document_figure_classifier.py +98 -0
  14. docling_ibm_models-3.4.4/tests/test_layout_predictor.py +110 -0
  15. docling_ibm_models-3.4.4/tests/test_reading_order.py +272 -0
  16. docling_ibm_models-3.4.4/tests/test_tf_predictor.py +580 -0
  17. docling_ibm_models-3.4.3/pyproject.toml +0 -123
  18. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/LICENSE +0 -0
  19. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/__init__.py +0 -0
  20. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/__init__.py +0 -0
  21. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/code_formula_predictor.py +0 -0
  22. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/__init__.py +0 -0
  23. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam.py +0 -0
  24. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py +0 -0
  25. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/__init__.py +0 -0
  26. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py +0 -0
  27. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/__init__.py +0 -0
  28. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  29. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/py.typed +0 -0
  30. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/__init__.py +0 -0
  31. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/reading_order_rb.py +0 -0
  32. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/__init__.py +0 -0
  33. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/common.py +0 -0
  34. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  35. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  36. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  37. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  38. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_predictor.py +0 -0
  39. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  40. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  41. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  42. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  43. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  44. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  45. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  46. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
  47. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  48. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/otsl.py +0 -0
  49. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/settings.py +0 -0
  50. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  51. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  52. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
  53. {docling_ibm_models-3.4.3 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,45 +1,41 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling-ibm-models
3
- Version: 3.4.3
3
+ Version: 3.4.4
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
- License: MIT
5
+ Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
8
+ Project-URL: repository, https://github.com/docling-project/docling-ibm-models
9
+ Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
6
11
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
7
- Author: Nikos Livathinos
8
- Author-email: nli@zurich.ibm.com
9
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
10
14
  Classifier: Development Status :: 5 - Production/Stable
11
15
  Classifier: Intended Audience :: Developers
12
16
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Operating System :: MacOS :: MacOS X
15
- Classifier: Operating System :: POSIX :: Linux
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3.13
22
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
- Requires-Dist: Pillow (>=10.0.0,<12.0.0)
24
- Requires-Dist: docling-core (>=2.19.0,<3.0.0)
25
- Requires-Dist: huggingface_hub (>=0.23,<1)
26
- Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
27
- Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
28
- Requires-Dist: numpy (>=1.24.4,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
29
- Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
30
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
31
- Requires-Dist: safetensors[torch] (>=0.4.3,<1)
32
- Requires-Dist: torch (>=2.2.2,<3.0.0)
33
- Requires-Dist: torchvision (>=0,<1)
34
- Requires-Dist: tqdm (>=4.64.0,<5.0.0)
35
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; python_version < "3.13" and sys_platform == "darwin" and platform_machine == "x86_64"
36
- Requires-Dist: transformers (>=4.42.0,<5.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
37
- Requires-Dist: transformers (>=4.47.0,<5.0.0) ; python_version >= "3.13" and (sys_platform != "darwin" or platform_machine != "x86_64")
18
+ Classifier: Programming Language :: Python :: 3
19
+ Requires-Python: <4.0,>=3.9
38
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: torch<3.0.0,>=2.2.2
23
+ Requires-Dist: torchvision<1,>=0
24
+ Requires-Dist: jsonlines<4.0.0,>=3.1.0
25
+ Requires-Dist: Pillow<12.0.0,>=10.0.0
26
+ Requires-Dist: tqdm<5.0.0,>=4.64.0
27
+ Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
28
+ Requires-Dist: huggingface_hub<1,>=0.23
29
+ Requires-Dist: safetensors[torch]<1,>=0.4.3
30
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
31
+ Requires-Dist: docling-core<3.0.0,>=2.19.0
32
+ Requires-Dist: transformers<5.0.0,>=4.42.0
33
+ Requires-Dist: numpy<3.0.0,>=1.24.4
34
+ Dynamic: license-file
39
35
 
40
36
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
41
37
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
42
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
38
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
43
39
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
44
40
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
45
41
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
@@ -54,53 +50,6 @@ AI modules to support the Docling PDF document conversion project.
54
50
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
55
51
 
56
52
 
57
- ## Installation Instructions
58
-
59
- ### MacOS / Linux
60
-
61
- To install `poetry` locally, use either `pip` or `homebrew`.
62
-
63
- To install `poetry` on a docker container, do the following:
64
- ```
65
- ENV POETRY_NO_INTERACTION=1 \
66
- POETRY_VIRTUALENVS_CREATE=false
67
-
68
- # Install poetry
69
- RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
70
- && python install-poetry.py \
71
- && poetry --version \
72
- && rm install-poetry.py
73
- ```
74
-
75
- To install and run the package, simply set up a poetry environment
76
-
77
- ```
78
- poetry env use $(which python3.10)
79
- poetry shell
80
- ```
81
-
82
- and install all the dependencies,
83
-
84
- ```
85
- poetry install # this will only install the deps from the poetry.lock
86
-
87
- poetry install --no-dev # this will skip installing dev dependencies
88
- ```
89
-
90
- To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
91
- ```
92
- poetry update
93
- ```
94
-
95
- #### MacOS Intel
96
-
97
- When in development mode on MacOS with Intel chips, one can use compatible dependencies with
98
-
99
- ```console
100
- poetry update --with mac_intel
101
- ```
102
-
103
-
104
53
  ## Pipeline Overview
105
54
  ![Architecture](docs/tablemodel_overview_color.png)
106
55
 
@@ -178,4 +127,3 @@ e.g.
178
127
  ```
179
128
  python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
180
129
  ```
181
-
@@ -1,6 +1,6 @@
1
1
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
2
2
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
3
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
3
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
4
4
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
5
5
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
6
6
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
@@ -15,53 +15,6 @@ AI modules to support the Docling PDF document conversion project.
15
15
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
16
16
 
17
17
 
18
- ## Installation Instructions
19
-
20
- ### MacOS / Linux
21
-
22
- To install `poetry` locally, use either `pip` or `homebrew`.
23
-
24
- To install `poetry` on a docker container, do the following:
25
- ```
26
- ENV POETRY_NO_INTERACTION=1 \
27
- POETRY_VIRTUALENVS_CREATE=false
28
-
29
- # Install poetry
30
- RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
31
- && python install-poetry.py \
32
- && poetry --version \
33
- && rm install-poetry.py
34
- ```
35
-
36
- To install and run the package, simply set up a poetry environment
37
-
38
- ```
39
- poetry env use $(which python3.10)
40
- poetry shell
41
- ```
42
-
43
- and install all the dependencies,
44
-
45
- ```
46
- poetry install # this will only install the deps from the poetry.lock
47
-
48
- poetry install --no-dev # this will skip installing dev dependencies
49
- ```
50
-
51
- To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
52
- ```
53
- poetry update
54
- ```
55
-
56
- #### MacOS Intel
57
-
58
- When in development mode on MacOS with Intel chips, one can use compatible dependencies with
59
-
60
- ```console
61
- poetry update --with mac_intel
62
- ```
63
-
64
-
65
18
  ## Pipeline Overview
66
19
  ![Architecture](docs/tablemodel_overview_color.png)
67
20
 
@@ -52,7 +52,7 @@ class SamOptConfig(OPTConfig):
52
52
 
53
53
 
54
54
  class SamOPTModel(OPTModel):
55
- config_class = SamOptConfig
55
+ config_class = SamOptConfig # type: ignore
56
56
 
57
57
  def __init__(self, config: OPTConfig):
58
58
  super(SamOPTModel, self).__init__(config)
@@ -131,7 +131,7 @@ class SamOPTModel(OPTModel):
131
131
 
132
132
 
133
133
  class SamOPTForCausalLM(OPTForCausalLM):
134
- config_class = SamOptConfig
134
+ config_class = SamOptConfig # type: ignore
135
135
 
136
136
  def __init__(self, config):
137
137
  super(OPTForCausalLM, self).__init__(config)
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: docling-ibm-models
3
+ Version: 3.4.4
4
+ Summary: This package contains the AI models used by the Docling PDF conversion package
5
+ Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
8
+ Project-URL: repository, https://github.com/docling-project/docling-ibm-models
9
+ Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
11
+ Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Programming Language :: Python :: 3
19
+ Requires-Python: <4.0,>=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: torch<3.0.0,>=2.2.2
23
+ Requires-Dist: torchvision<1,>=0
24
+ Requires-Dist: jsonlines<4.0.0,>=3.1.0
25
+ Requires-Dist: Pillow<12.0.0,>=10.0.0
26
+ Requires-Dist: tqdm<5.0.0,>=4.64.0
27
+ Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
28
+ Requires-Dist: huggingface_hub<1,>=0.23
29
+ Requires-Dist: safetensors[torch]<1,>=0.4.3
30
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
31
+ Requires-Dist: docling-core<3.0.0,>=2.19.0
32
+ Requires-Dist: transformers<5.0.0,>=4.42.0
33
+ Requires-Dist: numpy<3.0.0,>=1.24.4
34
+ Dynamic: license-file
35
+
36
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
37
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
38
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
39
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
40
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
41
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
42
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
43
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
44
+
45
+ # Docling IBM models
46
+
47
+ AI modules to support the Docling PDF document conversion project.
48
+
49
+ - TableFormer is an AI module that recognizes the structure of a table and the bounding boxes of the table content.
50
+ - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
51
+
52
+
53
+ ## Pipeline Overview
54
+ ![Architecture](docs/tablemodel_overview_color.png)
55
+
56
+ ## Datasets
57
+ Below we list datasets used with their description, source, and ***"TableFormer Format"***. The TableFormer Format is our processed version of the version of the original format to work with the dataloader out of the box, and to augment the dataset when necassary to add missing groundtruth (bounding boxes for empty cells).
58
+
59
+
60
+ | Name | Description | URL |
61
+ | ------------- |:-------------:|----|
62
+ | PubTabNet | PubTabNet contains heterogeneous tables in both image and HTML format, 516k+ tables in the PubMed Central Open Access Subset | [PubTabNet](https://developer.ibm.com/exchanges/data/all/pubtabnet/) |
63
+ | FinTabNet| A dataset for Financial Report Tables with corresponding ground truth location and structure. 112k+ tables included.| [FinTabNet](https://developer.ibm.com/exchanges/data/all/fintabnet/) |
64
+ | TableBank| TableBank is a new image-based table detection and recognition dataset built with novel weak supervision from Word and Latex documents on the internet, contains 417K high-quality labeled tables. | [TableBank](https://github.com/doc-analysis/TableBank) |
65
+
66
+ ## Models
67
+
68
+ ### TableModel04:
69
+ ![TableModel04](docs/tbm04.png)
70
+ **TableModel04rs (OTSL)** is our SOTA method that using transformers in order to predict table structure and bounding box.
71
+
72
+
73
+ ## Configuration file
74
+
75
+ Example configuration can be found inside test `tests/test_tf_predictor.py`
76
+ These are the main sections of the configuration file:
77
+
78
+ - `dataset`: The directory for prepared data and the parameters used during the data loading.
79
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
80
+ trained checkpoint files.
81
+ - `train`: Parameters for the training of the model.
82
+ - `predict`: Parameters for the evaluation of the model.
83
+ - `dataset_wordmap`: Very important part that contains token maps.
84
+
85
+
86
+ ## Model weights
87
+
88
+ You can download the model weights and config files from the links:
89
+
90
+ - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
91
+ - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
92
+
93
+
94
+ ## Inference Tests
95
+
96
+ You can run the inference tests for the models with:
97
+
98
+ ```
99
+ python -m pytest tests/
100
+ ```
101
+
102
+ This will also generate prediction and matching visualizations that can be found here:
103
+ `tests\test_data\viz\`
104
+
105
+ Visualization outlines:
106
+ - `Light Pink`: border of recognized table
107
+ - `Grey`: OCR cells
108
+ - `Green`: prediction bboxes
109
+ - `Red`: OCR cells matched with prediction
110
+ - `Blue`: Post processed, match
111
+ - `Bold Blue`: column header
112
+ - `Bold Magenta`: row header
113
+ - `Bold Brown`: section row (if table have one)
114
+
115
+
116
+ ## Demo
117
+
118
+ A demo application allows to apply the `LayoutPredictor` on a directory `<input_dir>` that contains
119
+ `png` images and visualize the predictions inside another directory `<viz_dir>`.
120
+
121
+ First download the model weights (see above), then run:
122
+ ```
123
+ python -m demo.demo_layout_predictor -i <input_dir> -v <viz_dir>
124
+ ```
125
+
126
+ e.g.
127
+ ```
128
+ python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
129
+ ```
@@ -0,0 +1,50 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ docling_ibm_models/__init__.py
5
+ docling_ibm_models/py.typed
6
+ docling_ibm_models.egg-info/PKG-INFO
7
+ docling_ibm_models.egg-info/SOURCES.txt
8
+ docling_ibm_models.egg-info/dependency_links.txt
9
+ docling_ibm_models.egg-info/requires.txt
10
+ docling_ibm_models.egg-info/top_level.txt
11
+ docling_ibm_models/code_formula_model/__init__.py
12
+ docling_ibm_models/code_formula_model/code_formula_predictor.py
13
+ docling_ibm_models/code_formula_model/models/__init__.py
14
+ docling_ibm_models/code_formula_model/models/sam.py
15
+ docling_ibm_models/code_formula_model/models/sam_opt.py
16
+ docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py
17
+ docling_ibm_models/document_figure_classifier_model/__init__.py
18
+ docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py
19
+ docling_ibm_models/layoutmodel/__init__.py
20
+ docling_ibm_models/layoutmodel/layout_predictor.py
21
+ docling_ibm_models/reading_order/__init__.py
22
+ docling_ibm_models/reading_order/reading_order_rb.py
23
+ docling_ibm_models/tableformer/__init__.py
24
+ docling_ibm_models/tableformer/common.py
25
+ docling_ibm_models/tableformer/otsl.py
26
+ docling_ibm_models/tableformer/settings.py
27
+ docling_ibm_models/tableformer/data_management/__init__.py
28
+ docling_ibm_models/tableformer/data_management/functional.py
29
+ docling_ibm_models/tableformer/data_management/matching_post_processor.py
30
+ docling_ibm_models/tableformer/data_management/tf_cell_matcher.py
31
+ docling_ibm_models/tableformer/data_management/tf_predictor.py
32
+ docling_ibm_models/tableformer/data_management/transforms.py
33
+ docling_ibm_models/tableformer/models/__init__.py
34
+ docling_ibm_models/tableformer/models/common/__init__.py
35
+ docling_ibm_models/tableformer/models/common/base_model.py
36
+ docling_ibm_models/tableformer/models/table04_rs/__init__.py
37
+ docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py
38
+ docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py
39
+ docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py
40
+ docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py
41
+ docling_ibm_models/tableformer/utils/__init__.py
42
+ docling_ibm_models/tableformer/utils/app_profiler.py
43
+ docling_ibm_models/tableformer/utils/mem_monitor.py
44
+ docling_ibm_models/tableformer/utils/utils.py
45
+ tests/test_code_formula_predictor.py
46
+ tests/test_common.py
47
+ tests/test_document_figure_classifier.py
48
+ tests/test_layout_predictor.py
49
+ tests/test_reading_order.py
50
+ tests/test_tf_predictor.py
@@ -0,0 +1,12 @@
1
+ torch<3.0.0,>=2.2.2
2
+ torchvision<1,>=0
3
+ jsonlines<4.0.0,>=3.1.0
4
+ Pillow<12.0.0,>=10.0.0
5
+ tqdm<5.0.0,>=4.64.0
6
+ opencv-python-headless<5.0.0.0,>=4.6.0.66
7
+ huggingface_hub<1,>=0.23
8
+ safetensors[torch]<1,>=0.4.3
9
+ pydantic<3.0.0,>=2.0.0
10
+ docling-core<3.0.0,>=2.19.0
11
+ transformers<5.0.0,>=4.42.0
12
+ numpy<3.0.0,>=1.24.4
@@ -0,0 +1 @@
1
+ docling_ibm_models
@@ -0,0 +1,105 @@
1
+ [project]
2
+ name = "docling-ibm-models"
3
+ version = "3.4.4" # DO NOT EDIT, updated automatically
4
+ description = "This package contains the AI models used by the Docling PDF conversion package"
5
+ license = "MIT"
6
+ keywords = ["docling", "convert", "document", "pdf", "layout model", "segmentation", "table structure", "table former"]
7
+ readme = "README.md"
8
+ authors = [
9
+ { name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
10
+ { name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
11
+ { name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
12
+ { name = "Christoph Auer", email = "cau@zurich.ibm.com" },
13
+ { name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
14
+ { name = "Peter Staar", email = "taa@zurich.ibm.com" },
15
+ ]
16
+ classifiers = [
17
+ "Operating System :: MacOS :: MacOS X",
18
+ "Operating System :: POSIX :: Linux",
19
+ "Development Status :: 5 - Production/Stable",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: Science/Research",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Programming Language :: Python :: 3",
24
+ ]
25
+ requires-python = '>=3.9,<4.0'
26
+ dependencies = [
27
+ 'torch (>=2.2.2,<3.0.0)',
28
+ 'torchvision (>=0,<1)',
29
+ 'jsonlines (>=3.1.0,<4.0.0)',
30
+ 'Pillow (>=10.0.0,<12.0.0)',
31
+ 'tqdm (>=4.64.0,<5.0.0)',
32
+ 'opencv-python-headless (>=4.6.0.66,<5.0.0.0)',
33
+ 'huggingface_hub (>=0.23,<1)',
34
+ 'safetensors[torch] (>=0.4.3,<1)',
35
+ 'pydantic (>=2.0.0,<3.0.0)',
36
+ 'docling-core (>=2.19.0,<3.0.0)',
37
+ 'transformers (>=4.42.0,<5.0.0)',
38
+ 'numpy (>=1.24.4,<3.0.0)',
39
+ ]
40
+
41
+ [project.urls]
42
+ homepage = "https://github.com/docling-project/docling-ibm-models"
43
+ repository = "https://github.com/docling-project/docling-ibm-models"
44
+ issues = "https://github.com/docling-project/docling-ibm-models/issues"
45
+ changelog = "https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md"
46
+
47
+ [dependency-groups]
48
+ dev = [
49
+ "pre-commit~=3.7",
50
+ "mypy~=1.10",
51
+ "black~=24.4",
52
+ "isort~=5.10",
53
+ "autoflake~=2.0",
54
+ "flake8~=7.1",
55
+ "flake8-docstrings~=1.6",
56
+ "types-setuptools~=70.3",
57
+ "pandas-stubs~=2.1",
58
+ "types-requests~=2.31",
59
+ "coverage~=7.6",
60
+ "pytest~=8.3",
61
+ "pytest-cov>=6.1.1",
62
+ "pytest-dependency~=0.6",
63
+ "pytest-xdist~=3.3",
64
+ "python-semantic-release~=7.32",
65
+ "datasets~=3.2",
66
+ ]
67
+
68
+ [tool.uv]
69
+ package = true
70
+
71
+ [tool.setuptools.packages.find]
72
+ include = ["docling_ibm_models*"]
73
+
74
+ [tool.black]
75
+ line-length = 88
76
+ target-version = ["py39"]
77
+ include = '\.pyi?$'
78
+
79
+ [tool.isort]
80
+ profile = "black"
81
+ line_length = 88
82
+ py_version = 39
83
+
84
+ [tool.semantic_release]
85
+ # for default values check:
86
+ # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
87
+
88
+ version_source = "tag_only"
89
+ branch = "main"
90
+
91
+ # configure types which should trigger minor and patch version bumps respectively
92
+ # (note that they must be a subset of the configured allowed types):
93
+ parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
94
+ parser_angular_minor_types = "feat"
95
+ parser_angular_patch_types = "fix,perf"
96
+
97
+
98
+ [tool.mypy]
99
+ pretty = true
100
+ no_implicit_optional = true
101
+ python_version = "3.10"
102
+
103
+ [[tool.mypy.overrides]]
104
+ module = ["torchvision.*", "transformers.*"]
105
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,141 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+ import os
6
+ import numpy as np
7
+ import pytest
8
+ from PIL import Image
9
+
10
+ from docling_ibm_models.code_formula_model.code_formula_predictor import CodeFormulaPredictor
11
+
12
+ from huggingface_hub import snapshot_download
13
+
14
+ @pytest.fixture(scope="module")
15
+ def init() -> dict:
16
+ r"""
17
+ Initialize the testing environment
18
+ """
19
+ init = {
20
+ "num_threads": 1,
21
+ "test_imgs": [
22
+ {
23
+ "label": "code",
24
+ "image_path": "tests/test_data/code_formula/images/code.png",
25
+ "gt_path": "tests/test_data/code_formula/gt/code.txt",
26
+ },
27
+ {
28
+ "label": "formula",
29
+ "image_path": "tests/test_data/code_formula/images/formula.png",
30
+ "gt_path": "tests/test_data/code_formula/gt/formula.txt",
31
+ },
32
+ ],
33
+ "info": {
34
+ "device": "auto",
35
+ "temperature": 0,
36
+ },
37
+ }
38
+
39
+ # Download models from HF
40
+ artifact_path = snapshot_download(repo_id="ds4sd/CodeFormula", revision="v1.0.1")
41
+
42
+ init["artifact_path"] = artifact_path
43
+
44
+ return init
45
+
46
+
47
+ def test_code_formula_predictor(init: dict):
48
+ r"""
49
+ Unit test for the CodeFormulaPredictor
50
+ """
51
+ device = "cpu"
52
+ num_threads = 2
53
+
54
+ # Initialize LayoutPredictor
55
+ code_formula_predictor = CodeFormulaPredictor(
56
+ init["artifact_path"], device=device, num_threads=num_threads
57
+ )
58
+
59
+ # Check info
60
+ info = code_formula_predictor.info()
61
+ assert info["device"] == device, "Wronly set device"
62
+ assert info["num_threads"] == num_threads, "Wronly set number of threads"
63
+
64
+ # Unsupported input image
65
+ is_exception = False
66
+ try:
67
+ for _ in code_formula_predictor.predict(["wrong"], ['label']):
68
+ pass
69
+ except TypeError:
70
+ is_exception = True
71
+ assert is_exception
72
+
73
+ # wrong type for temperature
74
+ is_exception = False
75
+ try:
76
+ dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
77
+ for _ in code_formula_predictor.predict([dummy_image], ['label'], "0.1"):
78
+ pass
79
+ except Exception:
80
+ is_exception = True
81
+ assert is_exception
82
+
83
+ # wrong value for temperature
84
+ is_exception = False
85
+ try:
86
+ dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
87
+ for _ in code_formula_predictor.predict([dummy_image], ['label'], -0.1):
88
+ pass
89
+ except Exception:
90
+ is_exception = True
91
+ assert is_exception
92
+
93
+ # wrong value for temperature
94
+ is_exception = False
95
+ try:
96
+ dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
97
+ for _ in code_formula_predictor.predict([dummy_image], ["label"], None):
98
+ pass
99
+ except Exception:
100
+ is_exception = True
101
+ assert is_exception
102
+
103
+ # mistmatched number of images and labels
104
+ is_exception = False
105
+ try:
106
+ dummy_image = Image.new(mode="RGB", size=(100, 100), color=(255, 255, 255))
107
+ for _ in code_formula_predictor.predict([dummy_image], ['label', 'label']):
108
+ pass
109
+ except Exception:
110
+ is_exception = True
111
+ assert is_exception
112
+
113
+ # Predict on test images, not batched
114
+ temperature = init['info']['temperature']
115
+ for d in init["test_imgs"]:
116
+ label = d['label']
117
+ img_path = d['image_path']
118
+ gt_path = d['gt_path']
119
+
120
+ with Image.open(img_path) as img, open(gt_path, 'r') as gt_fp:
121
+ gt = gt_fp.read()
122
+
123
+ output = code_formula_predictor.predict([img], [label], temperature)
124
+ output = output[0]
125
+
126
+ assert output == gt
127
+
128
+ # Load images as numpy arrays
129
+ np_arr = np.asarray(img)
130
+ output = code_formula_predictor.predict([np_arr], [label], temperature)
131
+ output = output[0]
132
+
133
+ assert output == gt
134
+
135
+ # Predict on test images, batched
136
+ labels = [d['label'] for d in init["test_imgs"]]
137
+ images = [Image.open(d['image_path']) for d in init["test_imgs"]]
138
+ gts = [open(d['gt_path'], 'r').read() for d in init["test_imgs"]]
139
+
140
+ outputs = code_formula_predictor.predict(images, labels, temperature)
141
+ assert outputs == gts