docling-ibm-models 0.2.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/PKG-INFO +20 -10
  2. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/README.md +17 -8
  3. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/layoutmodel/layout_predictor.py +1 -1
  4. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py +5 -5
  5. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/pyproject.toml +17 -4
  6. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/LICENSE +0 -0
  7. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/__init__.py +0 -0
  8. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/common.py +0 -0
  9. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  10. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -0
  11. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  12. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  13. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  14. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -0
  15. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  16. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  17. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  18. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  19. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  20. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  21. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  22. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
  23. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  24. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/otsl.py +0 -0
  25. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/settings.py +0 -0
  26. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/test_dataset_cache.py +0 -0
  27. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/test_prepare_image.py +0 -0
  28. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  29. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  30. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  31. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/utils/utils.py +0 -0
  32. {docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/utils/variance.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 0.2.0
3
+ Version: 1.1.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
7
7
  Author: Nikos Livathinos
8
8
  Author-email: nli@zurich.ibm.com
9
- Requires-Python: >=3.11,<4.0
9
+ Requires-Python: >=3.10,<4.0
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -14,6 +14,7 @@ Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: MacOS :: MacOS X
15
15
  Classifier: Operating System :: POSIX :: Linux
16
16
  Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
17
18
  Classifier: Programming Language :: Python :: 3.11
18
19
  Classifier: Programming Language :: Python :: 3.12
19
20
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -32,7 +33,16 @@ Requires-Dist: torchvision (==0.17.2)
32
33
  Requires-Dist: tqdm (>=4.64.0,<5.0.0)
33
34
  Description-Content-Type: text/markdown
34
35
 
35
- # Docling-models
36
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
37
+ ![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)
38
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
39
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
40
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
41
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
42
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
43
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
44
+
45
+ # Docling IBM models
36
46
 
37
47
  AI modules to support the Dockling PDF document conversion project.
38
48
 
@@ -41,7 +51,7 @@ AI modules to support the Dockling PDF document conversion project.
41
51
 
42
52
 
43
53
  ## Installation Instructions
44
-
54
+
45
55
  ### MacOS / Linux
46
56
 
47
57
  To install `poetry` locally, use either `pip` or `homebrew`.
@@ -49,8 +59,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
49
59
  To install `poetry` on a docker container, do the following:
50
60
  ```
51
61
  ENV POETRY_NO_INTERACTION=1 \
52
- POETRY_VIRTUALENVS_CREATE=false
53
-
62
+ POETRY_VIRTUALENVS_CREATE=false
63
+
54
64
  # Install poetry
55
65
  RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
56
66
  && python install-poetry.py \
@@ -58,10 +68,10 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
58
68
  && rm install-poetry.py
59
69
  ```
60
70
 
61
- To install and run the package, simply set up a poetry environment
71
+ To install and run the package, simply set up a poetry environment
62
72
 
63
73
  ```
64
- poetry env use $(which python3.11)
74
+ poetry env use $(which python3.10)
65
75
  poetry shell
66
76
  ```
67
77
 
@@ -105,7 +115,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
105
115
  These are the main sections of the configuration file:
106
116
 
107
117
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
108
- - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
118
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
109
119
  trained checkpoint files.
110
120
  - `train`: Parameters for the training of the model.
111
121
  - `predict`: Parameters for the evaluation of the model.
@@ -131,7 +141,7 @@ First download the model weights (see above), then run:
131
141
  ./devtools/check_code.sh
132
142
  ```
133
143
 
134
- This will also generate prediction and matching visualizations that can be found here:
144
+ This will also generate prediction and matching visualizations that can be found here:
135
145
  `tests\test_data\viz\`
136
146
 
137
147
  Visualization outlines:
@@ -1,4 +1,13 @@
1
- # Docling-models
1
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
2
+ ![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)
3
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
4
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
5
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
6
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
7
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
8
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
9
+
10
+ # Docling IBM models
2
11
 
3
12
  AI modules to support the Dockling PDF document conversion project.
4
13
 
@@ -7,7 +16,7 @@ AI modules to support the Dockling PDF document conversion project.
7
16
 
8
17
 
9
18
  ## Installation Instructions
10
-
19
+
11
20
  ### MacOS / Linux
12
21
 
13
22
  To install `poetry` locally, use either `pip` or `homebrew`.
@@ -15,8 +24,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
15
24
  To install `poetry` on a docker container, do the following:
16
25
  ```
17
26
  ENV POETRY_NO_INTERACTION=1 \
18
- POETRY_VIRTUALENVS_CREATE=false
19
-
27
+ POETRY_VIRTUALENVS_CREATE=false
28
+
20
29
  # Install poetry
21
30
  RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
22
31
  && python install-poetry.py \
@@ -24,10 +33,10 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
24
33
  && rm install-poetry.py
25
34
  ```
26
35
 
27
- To install and run the package, simply set up a poetry environment
36
+ To install and run the package, simply set up a poetry environment
28
37
 
29
38
  ```
30
- poetry env use $(which python3.11)
39
+ poetry env use $(which python3.10)
31
40
  poetry shell
32
41
  ```
33
42
 
@@ -71,7 +80,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
71
80
  These are the main sections of the configuration file:
72
81
 
73
82
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
74
- - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
83
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
75
84
  trained checkpoint files.
76
85
  - `train`: Parameters for the training of the model.
77
86
  - `predict`: Parameters for the evaluation of the model.
@@ -97,7 +106,7 @@ First download the model weights (see above), then run:
97
106
  ./devtools/check_code.sh
98
107
  ```
99
108
 
100
- This will also generate prediction and matching visualizations that can be found here:
109
+ This will also generate prediction and matching visualizations that can be found here:
101
110
  `tests\test_data\viz\`
102
111
 
103
112
  Visualization outlines:
@@ -118,7 +118,7 @@ class LayoutPredictor:
118
118
  }
119
119
  return info
120
120
 
121
- def predict(self, orig_img: Union[Image, np.array]) -> Iterable[dict]:
121
+ def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]:
122
122
  r"""
123
123
  Predict bounding boxes for a given image.
124
124
  The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as:
@@ -177,7 +177,7 @@ class TFPredictor:
177
177
  self._model_type = self._config["model"]["type"]
178
178
  # Added import here to avoid loading turbotransformer library unnecessarily
179
179
  if self._model_type == "TableModel04_rs":
180
- from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa: F401
180
+ from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa
181
181
  TableModel04_rs,
182
182
  )
183
183
  for candidate in BaseModel.__subclasses__():
@@ -437,10 +437,10 @@ class TFPredictor:
437
437
  for pdf_cell in pdf_cells:
438
438
  if pdf_cell["id"] == docling_item["cell_id"]:
439
439
  text_cell_bbox = {
440
- "b": pdf_cell["bbox"][1],
440
+ "b": pdf_cell["bbox"][3],
441
441
  "l": pdf_cell["bbox"][0],
442
442
  "r": pdf_cell["bbox"][2],
443
- "t": pdf_cell["bbox"][3],
443
+ "t": pdf_cell["bbox"][1],
444
444
  "token": pdf_cell["text"],
445
445
  }
446
446
  tf_cells_map[cell_key]["text_cell_bboxes"].append(
@@ -468,10 +468,10 @@ class TFPredictor:
468
468
  for pdf_cell in pdf_cells:
469
469
  if pdf_cell["id"] == docling_item["cell_id"]:
470
470
  text_cell_bbox = {
471
- "b": pdf_cell["bbox"][1],
471
+ "b": pdf_cell["bbox"][3],
472
472
  "l": pdf_cell["bbox"][0],
473
473
  "r": pdf_cell["bbox"][2],
474
- "t": pdf_cell["bbox"][3],
474
+ "t": pdf_cell["bbox"][1],
475
475
  "token": pdf_cell["text"],
476
476
  }
477
477
  tf_cells_map[cell_key]["text_cell_bboxes"].append(
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "0.2.0"
3
+ version = "1.1.0" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -21,7 +21,7 @@ packages = [
21
21
  ]
22
22
 
23
23
  [tool.poetry.dependencies]
24
- python = "^3.11"
24
+ python = "^3.10"
25
25
  torch = "2.2.2"
26
26
  torchvision = "0.17.2"
27
27
  onnxruntime = "^1.16.2"
@@ -57,10 +57,23 @@ build-backend = "poetry.core.masonry.api"
57
57
 
58
58
  [tool.black]
59
59
  line-length = 88
60
- target-version = ["py311"]
60
+ target-version = ["py310"]
61
61
  include = '\.pyi?$'
62
62
 
63
63
  [tool.isort]
64
64
  profile = "black"
65
65
  line_length = 88
66
- py_version=311
66
+ py_version=310
67
+
68
+ [tool.semantic_release]
69
+ # for default values check:
70
+ # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
71
+
72
+ version_source = "tag_only"
73
+ branch = "main"
74
+
75
+ # configure types which should trigger minor and patch version bumps respectively
76
+ # (note that they must be a subset of the configured allowed types):
77
+ parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
78
+ parser_angular_minor_types = "feat"
79
+ parser_angular_patch_types = "fix,perf"