docling-ibm-models 0.2.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/PKG-INFO +17 -8
  2. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/README.md +16 -7
  3. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py +5 -5
  4. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/pyproject.toml +14 -1
  5. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/LICENSE +0 -0
  6. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/layoutmodel/layout_predictor.py +0 -0
  7. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/__init__.py +0 -0
  8. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/common.py +0 -0
  9. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/__init__.py +0 -0
  10. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/data_transformer.py +0 -0
  11. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/functional.py +0 -0
  12. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/matching_post_processor.py +0 -0
  13. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/tf_cell_matcher.py +0 -0
  14. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/tf_dataset.py +0 -0
  15. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/data_management/transforms.py +0 -0
  16. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/__init__.py +0 -0
  17. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/common/__init__.py +0 -0
  18. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/common/base_model.py +0 -0
  19. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/table04_rs/__init__.py +0 -0
  20. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py +0 -0
  21. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py +0 -0
  22. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py +0 -0
  23. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py +0 -0
  24. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/otsl.py +0 -0
  25. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/settings.py +0 -0
  26. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/test_dataset_cache.py +0 -0
  27. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/test_prepare_image.py +0 -0
  28. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/utils/__init__.py +0 -0
  29. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/utils/app_profiler.py +0 -0
  30. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/utils/torch_utils.py +0 -0
  31. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/utils/utils.py +0 -0
  32. {docling_ibm_models-0.2.0 → docling_ibm_models-1.0.0}/docling_ibm_models/tableformer/utils/variance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 0.2.0
3
+ Version: 1.0.0
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -32,7 +32,16 @@ Requires-Dist: torchvision (==0.17.2)
32
32
  Requires-Dist: tqdm (>=4.64.0,<5.0.0)
33
33
  Description-Content-Type: text/markdown
34
34
 
35
- # Docling-models
35
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
36
+ ![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue)
37
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
38
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
39
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
40
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
41
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
42
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
43
+
44
+ # Docling IBM models
36
45
 
37
46
  AI modules to support the Dockling PDF document conversion project.
38
47
 
@@ -41,7 +50,7 @@ AI modules to support the Dockling PDF document conversion project.
41
50
 
42
51
 
43
52
  ## Installation Instructions
44
-
53
+
45
54
  ### MacOS / Linux
46
55
 
47
56
  To install `poetry` locally, use either `pip` or `homebrew`.
@@ -49,8 +58,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
49
58
  To install `poetry` on a docker container, do the following:
50
59
  ```
51
60
  ENV POETRY_NO_INTERACTION=1 \
52
- POETRY_VIRTUALENVS_CREATE=false
53
-
61
+ POETRY_VIRTUALENVS_CREATE=false
62
+
54
63
  # Install poetry
55
64
  RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
56
65
  && python install-poetry.py \
@@ -58,7 +67,7 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
58
67
  && rm install-poetry.py
59
68
  ```
60
69
 
61
- To install and run the package, simply set up a poetry environment
70
+ To install and run the package, simply set up a poetry environment
62
71
 
63
72
  ```
64
73
  poetry env use $(which python3.11)
@@ -105,7 +114,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
105
114
  These are the main sections of the configuration file:
106
115
 
107
116
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
108
- - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
117
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
109
118
  trained checkpoint files.
110
119
  - `train`: Parameters for the training of the model.
111
120
  - `predict`: Parameters for the evaluation of the model.
@@ -131,7 +140,7 @@ First download the model weights (see above), then run:
131
140
  ./devtools/check_code.sh
132
141
  ```
133
142
 
134
- This will also generate prediction and matching visualizations that can be found here:
143
+ This will also generate prediction and matching visualizations that can be found here:
135
144
  `tests\test_data\viz\`
136
145
 
137
146
  Visualization outlines:
@@ -1,4 +1,13 @@
1
- # Docling-models
1
+ [![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
2
+ ![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue)
3
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
4
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
5
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
6
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
7
+ [![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
8
+ [![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
9
+
10
+ # Docling IBM models
2
11
 
3
12
  AI modules to support the Dockling PDF document conversion project.
4
13
 
@@ -7,7 +16,7 @@ AI modules to support the Dockling PDF document conversion project.
7
16
 
8
17
 
9
18
  ## Installation Instructions
10
-
19
+
11
20
  ### MacOS / Linux
12
21
 
13
22
  To install `poetry` locally, use either `pip` or `homebrew`.
@@ -15,8 +24,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
15
24
  To install `poetry` on a docker container, do the following:
16
25
  ```
17
26
  ENV POETRY_NO_INTERACTION=1 \
18
- POETRY_VIRTUALENVS_CREATE=false
19
-
27
+ POETRY_VIRTUALENVS_CREATE=false
28
+
20
29
  # Install poetry
21
30
  RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
22
31
  && python install-poetry.py \
@@ -24,7 +33,7 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
24
33
  && rm install-poetry.py
25
34
  ```
26
35
 
27
- To install and run the package, simply set up a poetry environment
36
+ To install and run the package, simply set up a poetry environment
28
37
 
29
38
  ```
30
39
  poetry env use $(which python3.11)
@@ -71,7 +80,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
71
80
  These are the main sections of the configuration file:
72
81
 
73
82
  - `dataset`: The directory for prepared data and the parameters used during the data loading.
74
- - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
83
+ - `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
75
84
  trained checkpoint files.
76
85
  - `train`: Parameters for the training of the model.
77
86
  - `predict`: Parameters for the evaluation of the model.
@@ -97,7 +106,7 @@ First download the model weights (see above), then run:
97
106
  ./devtools/check_code.sh
98
107
  ```
99
108
 
100
- This will also generate prediction and matching visualizations that can be found here:
109
+ This will also generate prediction and matching visualizations that can be found here:
101
110
  `tests\test_data\viz\`
102
111
 
103
112
  Visualization outlines:
@@ -177,7 +177,7 @@ class TFPredictor:
177
177
  self._model_type = self._config["model"]["type"]
178
178
  # Added import here to avoid loading turbotransformer library unnecessarily
179
179
  if self._model_type == "TableModel04_rs":
180
- from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa: F401
180
+ from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import ( # noqa
181
181
  TableModel04_rs,
182
182
  )
183
183
  for candidate in BaseModel.__subclasses__():
@@ -437,10 +437,10 @@ class TFPredictor:
437
437
  for pdf_cell in pdf_cells:
438
438
  if pdf_cell["id"] == docling_item["cell_id"]:
439
439
  text_cell_bbox = {
440
- "b": pdf_cell["bbox"][1],
440
+ "b": pdf_cell["bbox"][3],
441
441
  "l": pdf_cell["bbox"][0],
442
442
  "r": pdf_cell["bbox"][2],
443
- "t": pdf_cell["bbox"][3],
443
+ "t": pdf_cell["bbox"][1],
444
444
  "token": pdf_cell["text"],
445
445
  }
446
446
  tf_cells_map[cell_key]["text_cell_bboxes"].append(
@@ -468,10 +468,10 @@ class TFPredictor:
468
468
  for pdf_cell in pdf_cells:
469
469
  if pdf_cell["id"] == docling_item["cell_id"]:
470
470
  text_cell_bbox = {
471
- "b": pdf_cell["bbox"][1],
471
+ "b": pdf_cell["bbox"][3],
472
472
  "l": pdf_cell["bbox"][0],
473
473
  "r": pdf_cell["bbox"][2],
474
- "t": pdf_cell["bbox"][3],
474
+ "t": pdf_cell["bbox"][1],
475
475
  "token": pdf_cell["text"],
476
476
  }
477
477
  tf_cells_map[cell_key]["text_cell_bboxes"].append(
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-ibm-models"
3
- version = "0.2.0"
3
+ version = "1.0.0" # DO NOT EDIT, updated automatically
4
4
  description = "This package contains the AI models used by the Docling PDF conversion package"
5
5
  authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
6
6
  license = "MIT"
@@ -64,3 +64,16 @@ include = '\.pyi?$'
64
64
  profile = "black"
65
65
  line_length = 88
66
66
  py_version=311
67
+
68
+ [tool.semantic_release]
69
+ # for default values check:
70
+ # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
71
+
72
+ version_source = "tag_only"
73
+ branch = "main"
74
+
75
+ # configure types which should trigger minor and patch version bumps respectively
76
+ # (note that they must be a subset of the configured allowed types):
77
+ parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
78
+ parser_angular_minor_types = "feat"
79
+ parser_angular_patch_types = "fix,perf"