docling-ibm-models 3.4.2__tar.gz → 3.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/PKG-INFO +27 -78
  2. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/README.md +1 -48
  3. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt.py +8 -8
  4. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/PKG-INFO +129 -0
  5. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/SOURCES.txt +50 -0
  6. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/dependency_links.txt +1 -0
  7. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/requires.txt +12 -0
  8. docling_ibm_models-3.4.4/docling_ibm_models.egg-info/top_level.txt +1 -0
  9. docling_ibm_models-3.4.4/pyproject.toml +105 -0
  10. docling_ibm_models-3.4.4/setup.cfg +4 -0
  11. docling_ibm_models-3.4.4/tests/test_code_formula_predictor.py +141 -0
  12. docling_ibm_models-3.4.4/tests/test_common.py +89 -0
  13. docling_ibm_models-3.4.4/tests/test_document_figure_classifier.py +98 -0
  14. docling_ibm_models-3.4.4/tests/test_layout_predictor.py +110 -0
  15. docling_ibm_models-3.4.4/tests/test_reading_order.py +272 -0
  16. docling_ibm_models-3.4.4/tests/test_tf_predictor.py +580 -0
  17. docling_ibm_models-3.4.2/pyproject.toml +0 -122
  18. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/LICENSE +0 -0
  19. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/__init__.py +0 -0
  20. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/__init__.py +0 -0
  21. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/code_formula_predictor.py +0 -0
  22. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/__init__.py +0 -0
  23. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam.py +0 -0
  24. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py +0 -0
  25. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/__init__.py +0 -0
  26. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py +0 -0
  27. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/__init__.py +0 -0
  28. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  29. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/py.typed +0 -0
  30. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/__init__.py +0 -0
  31. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/reading_order/reading_order_rb.py +0 -0
  32. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/__init__.py +0 -0
  33. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/common.py +0 -0
  34. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  35. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  36. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  37. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  38. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/tf_predictor.py +0 -0
  39. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  40. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  41. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  42. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  43. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  44. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  45. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  46. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
  47. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  48. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/otsl.py +0 -0
  49. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/settings.py +0 -0
  50. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  51. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  52. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/mem_monitor.py +0 -0
  53. {docling_ibm_models-3.4.2 → docling_ibm_models-3.4.4}/docling_ibm_models/tableformer/utils/utils.py +0 -0
@@ -1,44 +1,41 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: docling-ibm-models
3
- Version: 3.4.2
3
+ Version: 3.4.4
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
- License: MIT
5
+ Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
8
+ Project-URL: repository, https://github.com/docling-project/docling-ibm-models
9
+ Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
6
11
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
7
- Author: Nikos Livathinos
8
- Author-email: nli@zurich.ibm.com
9
- Requires-Python: >=3.9,<4.0
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
10
14
  Classifier: Development Status :: 5 - Production/Stable
11
15
  Classifier: Intended Audience :: Developers
12
16
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Operating System :: MacOS :: MacOS X
15
- Classifier: Operating System :: POSIX :: Linux
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3.13
22
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
- Requires-Dist: Pillow (>=10.0.0,<12.0.0)
24
- Requires-Dist: docling-core (>=2.19.0,<3.0.0)
25
- Requires-Dist: huggingface_hub (>=0.23,<1)
26
- Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
27
- Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
28
- Requires-Dist: numpy (>=1.24.4,<3.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
29
- Requires-Dist: opencv-python-headless (>=4.6.0.66,<5.0.0.0)
30
- Requires-Dist: pydantic (>=2.0.0,<3.0.0)
31
- Requires-Dist: safetensors[torch] (>=0.4.3,<1)
32
- Requires-Dist: torch (>=2.2.2,<3.0.0)
33
- Requires-Dist: torchvision (>=0,<1)
34
- Requires-Dist: tqdm (>=4.64.0,<5.0.0)
35
- Requires-Dist: transformers (>=4.42.0,<4.43.0) ; sys_platform == "darwin" and platform_machine == "x86_64"
36
- Requires-Dist: transformers (>=4.42.0,<5.0.0) ; sys_platform != "darwin" or platform_machine != "x86_64"
18
+ Classifier: Programming Language :: Python :: 3
19
+ Requires-Python: <4.0,>=3.9
37
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: torch<3.0.0,>=2.2.2
23
+ Requires-Dist: torchvision<1,>=0
24
+ Requires-Dist: jsonlines<4.0.0,>=3.1.0
25
+ Requires-Dist: Pillow<12.0.0,>=10.0.0
26
+ Requires-Dist: tqdm<5.0.0,>=4.64.0
27
+ Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
28
+ Requires-Dist: huggingface_hub<1,>=0.23
29
+ Requires-Dist: safetensors[torch]<1,>=0.4.3
30
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
31
+ Requires-Dist: docling-core<3.0.0,>=2.19.0
32
+ Requires-Dist: transformers<5.0.0,>=4.42.0
33
+ Requires-Dist: numpy<3.0.0,>=1.24.4
34
+ Dynamic: license-file
38
35
 
39
36
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
40
37
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
41
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
38
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
42
39
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
43
40
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
44
41
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
@@ -53,53 +50,6 @@ AI modules to support the Docling PDF document conversion project.
53
50
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
54
51
 
55
52
 
56
- ## Installation Instructions
57
-
58
- ### MacOS / Linux
59
-
60
- To install `poetry` locally, use either `pip` or `homebrew`.
61
-
62
- To install `poetry` on a docker container, do the following:
63
- ```
64
- ENV POETRY_NO_INTERACTION=1 \
65
- POETRY_VIRTUALENVS_CREATE=false
66
-
67
- # Install poetry
68
- RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
69
- && python install-poetry.py \
70
- && poetry --version \
71
- && rm install-poetry.py
72
- ```
73
-
74
- To install and run the package, simply set up a poetry environment
75
-
76
- ```
77
- poetry env use $(which python3.10)
78
- poetry shell
79
- ```
80
-
81
- and install all the dependencies,
82
-
83
- ```
84
- poetry install # this will only install the deps from the poetry.lock
85
-
86
- poetry install --no-dev # this will skip installing dev dependencies
87
- ```
88
-
89
- To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
90
- ```
91
- poetry update
92
- ```
93
-
94
- #### MacOS Intel
95
-
96
- When in development mode on MacOS with Intel chips, one can use compatible dependencies with
97
-
98
- ```console
99
- poetry update --with mac_intel
100
- ```
101
-
102
-
103
53
  ## Pipeline Overview
104
54
  ![Architecture](docs/tablemodel_overview_color.png)
105
55
 
@@ -177,4 +127,3 @@ e.g.
177
127
  ```
178
128
  python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
179
129
  ```
180
-
@@ -1,6 +1,6 @@
1
1
  [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
2
2
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
3
- [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
3
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
4
4
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
5
5
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
6
6
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
@@ -15,53 +15,6 @@ AI modules to support the Docling PDF document conversion project.
15
15
  - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
16
16
 
17
17
 
18
- ## Installation Instructions
19
-
20
- ### MacOS / Linux
21
-
22
- To install `poetry` locally, use either `pip` or `homebrew`.
23
-
24
- To install `poetry` on a docker container, do the following:
25
- ```
26
- ENV POETRY_NO_INTERACTION=1 \
27
- POETRY_VIRTUALENVS_CREATE=false
28
-
29
- # Install poetry
30
- RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
31
- && python install-poetry.py \
32
- && poetry --version \
33
- && rm install-poetry.py
34
- ```
35
-
36
- To install and run the package, simply set up a poetry environment
37
-
38
- ```
39
- poetry env use $(which python3.10)
40
- poetry shell
41
- ```
42
-
43
- and install all the dependencies,
44
-
45
- ```
46
- poetry install # this will only install the deps from the poetry.lock
47
-
48
- poetry install --no-dev # this will skip installing dev dependencies
49
- ```
50
-
51
- To update or add new dependencies from `pyproject.toml`, rebuild `poetry.lock`
52
- ```
53
- poetry update
54
- ```
55
-
56
- #### MacOS Intel
57
-
58
- When in development mode on MacOS with Intel chips, one can use compatible dependencies with
59
-
60
- ```console
61
- poetry update --with mac_intel
62
- ```
63
-
64
-
65
18
  ## Pipeline Overview
66
19
  ![Architecture](docs/tablemodel_overview_color.png)
67
20
 
@@ -52,7 +52,7 @@ class SamOptConfig(OPTConfig):
52
52
 
53
53
 
54
54
  class SamOPTModel(OPTModel):
55
- config_class = SamOptConfig
55
+ config_class = SamOptConfig # type: ignore
56
56
 
57
57
  def __init__(self, config: OPTConfig):
58
58
  super(SamOPTModel, self).__init__(config)
@@ -82,10 +82,10 @@ class SamOPTModel(OPTModel):
82
82
  inputs_embeds = self.embed_tokens(input_ids)
83
83
 
84
84
  vision_tower = getattr(self, "vision_tower", None)
85
- im_start_token = getattr(self.config, "im_start_token", -1)
85
+ im_start_token = getattr(self.config, "im_start_token", -1) # type: ignore
86
86
 
87
- if input_ids.shape[1] != 1 or self.training:
88
- with torch.set_grad_enabled(self.training):
87
+ if input_ids.shape[1] != 1 or self.training: # type: ignore
88
+ with torch.set_grad_enabled(self.training): # type: ignore
89
89
  assert vision_tower is not None
90
90
  image_features = vision_tower(images)
91
91
  image_features = image_features.flatten(2).permute(0, 2, 1)
@@ -118,7 +118,7 @@ class SamOPTModel(OPTModel):
118
118
 
119
119
  inputs_embeds = torch.stack(new_input_embeds, dim=0) # type: ignore
120
120
 
121
- return super(SamOPTModel, self).forward(
121
+ return super(SamOPTModel, self).forward( # type: ignore
122
122
  input_ids=None,
123
123
  attention_mask=attention_mask,
124
124
  past_key_values=past_key_values,
@@ -131,7 +131,7 @@ class SamOPTModel(OPTModel):
131
131
 
132
132
 
133
133
  class SamOPTForCausalLM(OPTForCausalLM):
134
- config_class = SamOptConfig
134
+ config_class = SamOptConfig # type: ignore
135
135
 
136
136
  def __init__(self, config):
137
137
  super(OPTForCausalLM, self).__init__(config)
@@ -165,12 +165,12 @@ class SamOPTForCausalLM(OPTForCausalLM):
165
165
  output_attentions = (
166
166
  output_attentions
167
167
  if output_attentions is not None
168
- else self.config.output_attentions
168
+ else self.config.output_attentions # type: ignore
169
169
  )
170
170
  output_hidden_states = (
171
171
  output_hidden_states
172
172
  if output_hidden_states is not None
173
- else self.config.output_hidden_states
173
+ else self.config.output_hidden_states # type: ignore
174
174
  )
175
175
 
176
176
  outputs = self.model(
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: docling-ibm-models
3
+ Version: 3.4.4
4
+ Summary: This package contains the AI models used by the Docling PDF conversion package
5
+ Author-email: Nikos Livathinos <nli@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/docling-project/docling-ibm-models
8
+ Project-URL: repository, https://github.com/docling-project/docling-ibm-models
9
+ Project-URL: issues, https://github.com/docling-project/docling-ibm-models/issues
10
+ Project-URL: changelog, https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md
11
+ Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
12
+ Classifier: Operating System :: MacOS :: MacOS X
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Programming Language :: Python :: 3
19
+ Requires-Python: <4.0,>=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: torch<3.0.0,>=2.2.2
23
+ Requires-Dist: torchvision<1,>=0
24
+ Requires-Dist: jsonlines<4.0.0,>=3.1.0
25
+ Requires-Dist: Pillow<12.0.0,>=10.0.0
26
+ Requires-Dist: tqdm<5.0.0,>=4.64.0
27
+ Requires-Dist: opencv-python-headless<5.0.0.0,>=4.6.0.66
28
+ Requires-Dist: huggingface_hub<1,>=0.23
29
+ Requires-Dist: safetensors[torch]<1,>=0.4.3
30
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
31
+ Requires-Dist: docling-core<3.0.0,>=2.19.0
32
+ Requires-Dist: transformers<5.0.0,>=4.42.0
33
+ Requires-Dist: numpy<3.0.0,>=1.24.4
34
+ Dynamic: license-file
35
+
36
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
37
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
38
+ [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
39
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
40
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
41
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
42
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
43
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
44
+
45
+ # Docling IBM models
46
+
47
+ AI modules to support the Docling PDF document conversion project.
48
+
49
+ - TableFormer is an AI module that recognizes the structure of a table and the bounding boxes of the table content.
50
+ - Layout model is an AI model that provides among other things ability to detect tables on the page. This package contains inference code for Layout model.
51
+
52
+
53
+ ## Pipeline Overview
54
+ ![Architecture](docs/tablemodel_overview_color.png)
55
+
56
+ ## Datasets
57
+ Below we list datasets used with their description, source, and ***"TableFormer Format"***. The TableFormer Format is our processed version of the version of the original format to work with the dataloader out of the box, and to augment the dataset when necassary to add missing groundtruth (bounding boxes for empty cells).
58
+
59
+
60
+ | Name | Description | URL |
61
+ | ------------- |:-------------:|----|
62
+ | PubTabNet | PubTabNet contains heterogeneous tables in both image and HTML format, 516k+ tables in the PubMed Central Open Access Subset | [PubTabNet](https://developer.ibm.com/exchanges/data/all/pubtabnet/) |
63
+ | FinTabNet| A dataset for Financial Report Tables with corresponding ground truth location and structure. 112k+ tables included.| [FinTabNet](https://developer.ibm.com/exchanges/data/all/fintabnet/) |
64
+ | TableBank| TableBank is a new image-based table detection and recognition dataset built with novel weak supervision from Word and Latex documents on the internet, contains 417K high-quality labeled tables. | [TableBank](https://github.com/doc-analysis/TableBank) |
65
+
66
+ ## Models
67
+
68
+ ### TableModel04:
69
+ ![TableModel04](docs/tbm04.png)
70
+ **TableModel04rs (OTSL)** is our SOTA method that using transformers in order to predict table structure and bounding box.
71
+
72
+
73
+ ## Configuration file
74
+
75
+ Example configuration can be found inside test `tests/test_tf_predictor.py`
76
+ These are the main sections of the configuration file:
77
+
78
+ - `dataset`: The directory for prepared data and the parameters used during the data loading.
79
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
80
+ trained checkpoint files.
81
+ - `train`: Parameters for the training of the model.
82
+ - `predict`: Parameters for the evaluation of the model.
83
+ - `dataset_wordmap`: Very important part that contains token maps.
84
+
85
+
86
+ ## Model weights
87
+
88
+ You can download the model weights and config files from the links:
89
+
90
+ - [TableFormer Checkpoint](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/tableformer)
91
+ - [beehive_v0.0.5](https://huggingface.co/ds4sd/docling-models/tree/main/model_artifacts/layout/beehive_v0.0.5)
92
+
93
+
94
+ ## Inference Tests
95
+
96
+ You can run the inference tests for the models with:
97
+
98
+ ```
99
+ python -m pytest tests/
100
+ ```
101
+
102
+ This will also generate prediction and matching visualizations that can be found here:
103
+ `tests\test_data\viz\`
104
+
105
+ Visualization outlines:
106
+ - `Light Pink`: border of recognized table
107
+ - `Grey`: OCR cells
108
+ - `Green`: prediction bboxes
109
+ - `Red`: OCR cells matched with prediction
110
+ - `Blue`: Post processed, match
111
+ - `Bold Blue`: column header
112
+ - `Bold Magenta`: row header
113
+ - `Bold Brown`: section row (if table have one)
114
+
115
+
116
+ ## Demo
117
+
118
+ A demo application allows to apply the `LayoutPredictor` on a directory `<input_dir>` that contains
119
+ `png` images and visualize the predictions inside another directory `<viz_dir>`.
120
+
121
+ First download the model weights (see above), then run:
122
+ ```
123
+ python -m demo.demo_layout_predictor -i <input_dir> -v <viz_dir>
124
+ ```
125
+
126
+ e.g.
127
+ ```
128
+ python -m demo.demo_layout_predictor -i tests/test_data/samples -v viz/
129
+ ```
@@ -0,0 +1,50 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ docling_ibm_models/__init__.py
5
+ docling_ibm_models/py.typed
6
+ docling_ibm_models.egg-info/PKG-INFO
7
+ docling_ibm_models.egg-info/SOURCES.txt
8
+ docling_ibm_models.egg-info/dependency_links.txt
9
+ docling_ibm_models.egg-info/requires.txt
10
+ docling_ibm_models.egg-info/top_level.txt
11
+ docling_ibm_models/code_formula_model/__init__.py
12
+ docling_ibm_models/code_formula_model/code_formula_predictor.py
13
+ docling_ibm_models/code_formula_model/models/__init__.py
14
+ docling_ibm_models/code_formula_model/models/sam.py
15
+ docling_ibm_models/code_formula_model/models/sam_opt.py
16
+ docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py
17
+ docling_ibm_models/document_figure_classifier_model/__init__.py
18
+ docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py
19
+ docling_ibm_models/layoutmodel/__init__.py
20
+ docling_ibm_models/layoutmodel/layout_predictor.py
21
+ docling_ibm_models/reading_order/__init__.py
22
+ docling_ibm_models/reading_order/reading_order_rb.py
23
+ docling_ibm_models/tableformer/__init__.py
24
+ docling_ibm_models/tableformer/common.py
25
+ docling_ibm_models/tableformer/otsl.py
26
+ docling_ibm_models/tableformer/settings.py
27
+ docling_ibm_models/tableformer/data_management/__init__.py
28
+ docling_ibm_models/tableformer/data_management/functional.py
29
+ docling_ibm_models/tableformer/data_management/matching_post_processor.py
30
+ docling_ibm_models/tableformer/data_management/tf_cell_matcher.py
31
+ docling_ibm_models/tableformer/data_management/tf_predictor.py
32
+ docling_ibm_models/tableformer/data_management/transforms.py
33
+ docling_ibm_models/tableformer/models/__init__.py
34
+ docling_ibm_models/tableformer/models/common/__init__.py
35
+ docling_ibm_models/tableformer/models/common/base_model.py
36
+ docling_ibm_models/tableformer/models/table04_rs/__init__.py
37
+ docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py
38
+ docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py
39
+ docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py
40
+ docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py
41
+ docling_ibm_models/tableformer/utils/__init__.py
42
+ docling_ibm_models/tableformer/utils/app_profiler.py
43
+ docling_ibm_models/tableformer/utils/mem_monitor.py
44
+ docling_ibm_models/tableformer/utils/utils.py
45
+ tests/test_code_formula_predictor.py
46
+ tests/test_common.py
47
+ tests/test_document_figure_classifier.py
48
+ tests/test_layout_predictor.py
49
+ tests/test_reading_order.py
50
+ tests/test_tf_predictor.py
@@ -0,0 +1,12 @@
1
+ torch<3.0.0,>=2.2.2
2
+ torchvision<1,>=0
3
+ jsonlines<4.0.0,>=3.1.0
4
+ Pillow<12.0.0,>=10.0.0
5
+ tqdm<5.0.0,>=4.64.0
6
+ opencv-python-headless<5.0.0.0,>=4.6.0.66
7
+ huggingface_hub<1,>=0.23
8
+ safetensors[torch]<1,>=0.4.3
9
+ pydantic<3.0.0,>=2.0.0
10
+ docling-core<3.0.0,>=2.19.0
11
+ transformers<5.0.0,>=4.42.0
12
+ numpy<3.0.0,>=1.24.4
@@ -0,0 +1 @@
1
+ docling_ibm_models
@@ -0,0 +1,105 @@
1
+ [project]
2
+ name = "docling-ibm-models"
3
+ version = "3.4.4" # DO NOT EDIT, updated automatically
4
+ description = "This package contains the AI models used by the Docling PDF conversion package"
5
+ license = "MIT"
6
+ keywords = ["docling", "convert", "document", "pdf", "layout model", "segmentation", "table structure", "table former"]
7
+ readme = "README.md"
8
+ authors = [
9
+ { name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
10
+ { name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
11
+ { name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
12
+ { name = "Christoph Auer", email = "cau@zurich.ibm.com" },
13
+ { name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
14
+ { name = "Peter Staar", email = "taa@zurich.ibm.com" },
15
+ ]
16
+ classifiers = [
17
+ "Operating System :: MacOS :: MacOS X",
18
+ "Operating System :: POSIX :: Linux",
19
+ "Development Status :: 5 - Production/Stable",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: Science/Research",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Programming Language :: Python :: 3",
24
+ ]
25
+ requires-python = '>=3.9,<4.0'
26
+ dependencies = [
27
+ 'torch (>=2.2.2,<3.0.0)',
28
+ 'torchvision (>=0,<1)',
29
+ 'jsonlines (>=3.1.0,<4.0.0)',
30
+ 'Pillow (>=10.0.0,<12.0.0)',
31
+ 'tqdm (>=4.64.0,<5.0.0)',
32
+ 'opencv-python-headless (>=4.6.0.66,<5.0.0.0)',
33
+ 'huggingface_hub (>=0.23,<1)',
34
+ 'safetensors[torch] (>=0.4.3,<1)',
35
+ 'pydantic (>=2.0.0,<3.0.0)',
36
+ 'docling-core (>=2.19.0,<3.0.0)',
37
+ 'transformers (>=4.42.0,<5.0.0)',
38
+ 'numpy (>=1.24.4,<3.0.0)',
39
+ ]
40
+
41
+ [project.urls]
42
+ homepage = "https://github.com/docling-project/docling-ibm-models"
43
+ repository = "https://github.com/docling-project/docling-ibm-models"
44
+ issues = "https://github.com/docling-project/docling-ibm-models/issues"
45
+ changelog = "https://github.com/docling-project/docling-ibm-models/blob/main/CHANGELOG.md"
46
+
47
+ [dependency-groups]
48
+ dev = [
49
+ "pre-commit~=3.7",
50
+ "mypy~=1.10",
51
+ "black~=24.4",
52
+ "isort~=5.10",
53
+ "autoflake~=2.0",
54
+ "flake8~=7.1",
55
+ "flake8-docstrings~=1.6",
56
+ "types-setuptools~=70.3",
57
+ "pandas-stubs~=2.1",
58
+ "types-requests~=2.31",
59
+ "coverage~=7.6",
60
+ "pytest~=8.3",
61
+ "pytest-cov>=6.1.1",
62
+ "pytest-dependency~=0.6",
63
+ "pytest-xdist~=3.3",
64
+ "python-semantic-release~=7.32",
65
+ "datasets~=3.2",
66
+ ]
67
+
68
+ [tool.uv]
69
+ package = true
70
+
71
+ [tool.setuptools.packages.find]
72
+ include = ["docling_ibm_models*"]
73
+
74
+ [tool.black]
75
+ line-length = 88
76
+ target-version = ["py39"]
77
+ include = '\.pyi?$'
78
+
79
+ [tool.isort]
80
+ profile = "black"
81
+ line_length = 88
82
+ py_version = 39
83
+
84
+ [tool.semantic_release]
85
+ # for default values check:
86
+ # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
87
+
88
+ version_source = "tag_only"
89
+ branch = "main"
90
+
91
+ # configure types which should trigger minor and patch version bumps respectively
92
+ # (note that they must be a subset of the configured allowed types):
93
+ parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
94
+ parser_angular_minor_types = "feat"
95
+ parser_angular_patch_types = "fix,perf"
96
+
97
+
98
+ [tool.mypy]
99
+ pretty = true
100
+ no_implicit_optional = true
101
+ python_version = "3.10"
102
+
103
+ [[tool.mypy.overrides]]
104
+ module = ["torchvision.*", "transformers.*"]
105
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+