PyPI - docling-ibm-models - Versions diffs - 0.2.0__tar.gz → 1.1.0__tar.gz - Mend

docling-ibm-models 0.2.0tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 0.2.0
+Version: 1.1.0
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
 Author: Nikos Livathinos
 Author-email: nli@zurich.ibm.com
-Requires-Python: >=3.11,<4.0
+Requires-Python: >=3.10,<4.0
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -14,6 +14,7 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: POSIX :: Linux
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -32,7 +33,16 @@ Requires-Dist: torchvision (==0.17.2)
 Requires-Dist: tqdm (>=4.64.0,<5.0.0)
 Description-Content-Type: text/markdown
-# Docling-models
+[![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
+![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)
+[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
+[![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
+[![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
+# Docling IBM models
 AI modules to support the Dockling PDF document conversion project.
@@ -41,7 +51,7 @@ AI modules to support the Dockling PDF document conversion project.
 ## Installation Instructions
 ### MacOS / Linux
 To install `poetry` locally, use either `pip` or `homebrew`.
@@ -49,8 +59,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
 To install `poetry` on a docker container, do the following:
 ```
 ENV POETRY_NO_INTERACTION=1 \
-    POETRY_VIRTUALENVS_CREATE=false
+    POETRY_VIRTUALENVS_CREATE=false
 # Install poetry
 RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
     && python install-poetry.py \
@@ -58,10 +68,10 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
     && rm install-poetry.py
 ```
-To install and run the package, simply set up a poetry environment
+To install and run the package, simply set up a poetry environment
 ```
-poetry env use $(which python3.11)
+poetry env use $(which python3.10)
 poetry shell
 ```
@@ -105,7 +115,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
 These are the main sections of the configuration file:
 - `dataset`: The directory for prepared data and the parameters used during the data loading.
-- `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
+- `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
   trained checkpoint files.
 - `train`: Parameters for the training of the model.
 - `predict`: Parameters for the evaluation of the model.
@@ -131,7 +141,7 @@ First download the model weights (see above), then run:
 ./devtools/check_code.sh
 ```
-This will also generate prediction and matching visualizations that can be found here:
+This will also generate prediction and matching visualizations that can be found here:
 `tests\test_data\viz\`
 Visualization outlines:

{docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/README.md RENAMED Viewed

@@ -1,4 +1,13 @@
-# Docling-models
+[![PyPI version](https://img.shields.io/pypi/v/docling-ibm-models)](https://pypi.org/project/docling-ibm-models/)
+![Python](https://img.shields.io/badge/python-3.10%20|%203.11%20|%203.12-blue)
+[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
+[![Models on Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/ds4sd/docling-models/)
+[![License MIT](https://img.shields.io/github/license/ds4sd/deepsearch-toolkit)](https://opensource.org/licenses/MIT)
+# Docling IBM models
 AI modules to support the Dockling PDF document conversion project.
@@ -7,7 +16,7 @@ AI modules to support the Dockling PDF document conversion project.
 ## Installation Instructions
 ### MacOS / Linux
 To install `poetry` locally, use either `pip` or `homebrew`.
@@ -15,8 +24,8 @@ To install `poetry` locally, use either `pip` or `homebrew`.
 To install `poetry` on a docker container, do the following:
 ```
 ENV POETRY_NO_INTERACTION=1 \
-    POETRY_VIRTUALENVS_CREATE=false
+    POETRY_VIRTUALENVS_CREATE=false
 # Install poetry
 RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
     && python install-poetry.py \
@@ -24,10 +33,10 @@ RUN curl -sSL 'https://install.python-poetry.org' > install-poetry.py \
     && rm install-poetry.py
 ```
-To install and run the package, simply set up a poetry environment
+To install and run the package, simply set up a poetry environment
 ```
-poetry env use $(which python3.11)
+poetry env use $(which python3.10)
 poetry shell
 ```
@@ -71,7 +80,7 @@ Example configuration can be seen inside test `tests/test_tf_predictor.py`
 These are the main sections of the configuration file:
 - `dataset`: The directory for prepared data and the parameters used during the data loading.
-- `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
+- `model`: The type, name and hyperparameters of the model. Also the directory to save/load the
   trained checkpoint files.
 - `train`: Parameters for the training of the model.
 - `predict`: Parameters for the evaluation of the model.
@@ -97,7 +106,7 @@ First download the model weights (see above), then run:
 ./devtools/check_code.sh
 ```
-This will also generate prediction and matching visualizations that can be found here:
+This will also generate prediction and matching visualizations that can be found here:
 `tests\test_data\viz\`
 Visualization outlines:

{docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/layoutmodel/layout_predictor.py RENAMED Viewed

@@ -118,7 +118,7 @@ class LayoutPredictor:
         }
         return info
-    def predict(self, orig_img: Union[Image, np.array]) -> Iterable[dict]:
+    def predict(self, orig_img: Union[Image.Image, np.ndarray]) -> Iterable[dict]:
         r"""
         Predict bounding boxes for a given image.
         The origin (0, 0) is the top-left corner and the predicted bbox coords are provided as:

{docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/docling_ibm_models/tableformer/data_management/tf_predictor.py RENAMED Viewed

@@ -177,7 +177,7 @@ class TFPredictor:
         self._model_type = self._config["model"]["type"]
         # Added import here to avoid loading turbotransformer library unnecessarily
         if self._model_type == "TableModel04_rs":
-            from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa: F401
+            from docling_ibm_models.tableformer.models.table04_rs.tablemodel04_rs import (  # noqa
                 TableModel04_rs,
             )
         for candidate in BaseModel.__subclasses__():
@@ -437,10 +437,10 @@ class TFPredictor:
                 for pdf_cell in pdf_cells:
                     if pdf_cell["id"] == docling_item["cell_id"]:
                         text_cell_bbox = {
-                            "b": pdf_cell["bbox"][1],
+                            "b": pdf_cell["bbox"][3],
                             "l": pdf_cell["bbox"][0],
                             "r": pdf_cell["bbox"][2],
-                            "t": pdf_cell["bbox"][3],
+                            "t": pdf_cell["bbox"][1],
                             "token": pdf_cell["text"],
                         }
                         tf_cells_map[cell_key]["text_cell_bboxes"].append(
@@ -468,10 +468,10 @@ class TFPredictor:
                 for pdf_cell in pdf_cells:
                     if pdf_cell["id"] == docling_item["cell_id"]:
                         text_cell_bbox = {
-                            "b": pdf_cell["bbox"][1],
+                            "b": pdf_cell["bbox"][3],
                             "l": pdf_cell["bbox"][0],
                             "r": pdf_cell["bbox"][2],
-                            "t": pdf_cell["bbox"][3],
+                            "t": pdf_cell["bbox"][1],
                             "token": pdf_cell["text"],
                         }
                         tf_cells_map[cell_key]["text_cell_bboxes"].append(

{docling_ibm_models-0.2.0 → docling_ibm_models-1.1.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling-ibm-models"
-version = "0.2.0"
+version = "1.1.0"  # DO NOT EDIT, updated automatically
 description = "This package contains the AI models used by the Docling PDF conversion package"
 authors = ["Nikos Livathinos <nli@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -21,7 +21,7 @@ packages = [
 ]
 [tool.poetry.dependencies]
-python = "^3.11"
+python = "^3.10"
 torch = "2.2.2"
 torchvision = "0.17.2"
 onnxruntime = "^1.16.2"
@@ -57,10 +57,23 @@ build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
-target-version = ["py311"]
+target-version = ["py310"]
 include = '\.pyi?$'
 [tool.isort]
 profile = "black"
 line_length = 88
-py_version=311
+py_version=310
+[tool.semantic_release]
+# for default values check:
+# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
+version_source = "tag_only"
+branch = "main"
+# configure types which should trigger minor and patch version bumps respectively
+# (note that they must be a subset of the configured allowed types):
+parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
+parser_angular_minor_types = "feat"
+parser_angular_patch_types = "fix,perf"