PyPI - docling - Versions diffs - 1.19.0__tar.gz → 1.19.1__tar.gz - Mend

docling 1.19.0tar.gz → 1.19.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{docling-1.19.0 → docling-1.19.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 1.19.0
+Version: 1.19.1
 Summary: Docling PDF conversion package
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -22,12 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Provides-Extra: tesserocr
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=0.22.0,<0.23.0)
-Requires-Dist: docling-core (>=1.6.2,<2.0.0)
+Requires-Dist: docling-core (>=1.7.1,<2.0.0)
 Requires-Dist: docling-ibm-models (>=2.0.0,<3.0.0)
 Requires-Dist: docling-parse (>=1.4.1,<2.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
 Requires-Dist: filetype (>=1.2.0,<2.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
+Requires-Dist: pandas (>=2.1.4,<3.0.0)
 Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
 Requires-Dist: pydantic (>=2.0.0,<3.0.0)
 Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
@@ -335,15 +336,14 @@ from docling_core.transforms.chunker import HierarchicalChunker
 doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
 chunks = list(HierarchicalChunker().chunk(doc))
-# > [
-# >     ChunkWithMetadata(
-# >         path='$.main-text[0]',
-# >         text='DocLayNet: A Large Human-Annotated Dataset [...]',
-# >         page=1,
-# >         bbox=[107.30, 672.38, 505.19, 709.08]
-# >     ),
-# >     [...]
-# > ]
+print(chunks[0])
+# ChunkWithMetadata(
+#     path='#/main-text/1',
+#     text='DocLayNet: A Large Human-Annotated Dataset [...]',
+#     page=1,
+#     bbox=[107.30, 672.38, 505.19, 709.08],
+#     [...]
+# )
 ```

{docling-1.19.0 → docling-1.19.1}/README.md RENAMED Viewed

@@ -289,15 +289,14 @@ from docling_core.transforms.chunker import HierarchicalChunker
 doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
 chunks = list(HierarchicalChunker().chunk(doc))
-# > [
-# >     ChunkWithMetadata(
-# >         path='$.main-text[0]',
-# >         text='DocLayNet: A Large Human-Annotated Dataset [...]',
-# >         page=1,
-# >         bbox=[107.30, 672.38, 505.19, 709.08]
-# >     ),
-# >     [...]
-# > ]
+print(chunks[0])
+# ChunkWithMetadata(
+#     path='#/main-text/1',
+#     text='DocLayNet: A Large Human-Annotated Dataset [...]',
+#     page=1,
+#     bbox=[107.30, 672.38, 505.19, 709.08],
+#     [...]
+# )
 ```

{docling-1.19.0 → docling-1.19.1}/docling/models/tesseract_ocr_cli_model.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import io
 import logging
 import tempfile
-from subprocess import PIPE, Popen
+from subprocess import DEVNULL, PIPE, Popen
 from typing import Iterable, Tuple
 import pandas as pd
@@ -81,7 +81,7 @@ class TesseractOcrCliModel(BaseOcrModel):
         cmd += [ifilename, "stdout", "tsv"]
         _log.info("command: {}".format(" ".join(cmd)))
-        proc = Popen(cmd, stdout=PIPE)
+        proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
         output, _ = proc.communicate()
         # _log.info(output)

{docling-1.19.0 → docling-1.19.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "1.19.0"  # DO NOT EDIT, updated automatically
+version = "1.19.1"  # DO NOT EDIT, updated automatically
 description = "Docling PDF conversion package"
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -37,7 +37,7 @@ torchvision = [
 ######################
 python = "^3.10"
 pydantic = "^2.0.0"
-docling-core = "^1.6.2"
+docling-core = "^1.7.1"
 docling-ibm-models = "^2.0.0"
 deepsearch-glm = "^0.22.0"
 filetype = "^1.2.0"
@@ -53,6 +53,7 @@ rtree = "^1.3.0"
 scipy = "^1.14.1"
 pyarrow = "^16.1.0"
 typer = "^0.12.5"
+pandas = "^2.1.4"
 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "^24.4.2"}
@@ -67,7 +68,7 @@ pytest-xdist = "^3.3.1"
 types-requests = "^2.31.0.2"
 flake8-pyproject = "^1.2.3"
 pylint = "^2.17.5"
-pandas-stubs = "^2.2.2.240909"
+pandas-stubs = "^2.1.4.231227"
 ipykernel = "^6.29.5"
 ipywidgets = "^8.1.5"
 nbqa = "^1.9.0"
@@ -75,6 +76,9 @@ nbqa = "^1.9.0"
 [tool.poetry.group.examples.dependencies]
 datasets = "^2.21.0"
 python-dotenv = "^1.0.1"
+llama-index-readers-docling = "^0.1.0"
+llama-index-node-parser-docling = "^0.1.0"
+llama-index-readers-file = "^0.2.2"
 llama-index-embeddings-huggingface = "^0.3.1"
 llama-index-llms-huggingface-api = "^0.2.0"
 llama-index-vector-stores-milvus = "^0.2.1"