docling 1.19.0__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import io
2
2
  import logging
3
3
  import tempfile
4
- from subprocess import PIPE, Popen
4
+ from subprocess import DEVNULL, PIPE, Popen
5
5
  from typing import Iterable, Tuple
6
6
 
7
7
  import pandas as pd
@@ -81,7 +81,7 @@ class TesseractOcrCliModel(BaseOcrModel):
81
81
  cmd += [ifilename, "stdout", "tsv"]
82
82
  _log.info("command: {}".format(" ".join(cmd)))
83
83
 
84
- proc = Popen(cmd, stdout=PIPE)
84
+ proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
85
85
  output, _ = proc.communicate()
86
86
 
87
87
  # _log.info(output)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.19.0
3
+ Version: 1.19.1
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -22,12 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Provides-Extra: tesserocr
23
23
  Requires-Dist: certifi (>=2024.7.4)
24
24
  Requires-Dist: deepsearch-glm (>=0.22.0,<0.23.0)
25
- Requires-Dist: docling-core (>=1.6.2,<2.0.0)
25
+ Requires-Dist: docling-core (>=1.7.1,<2.0.0)
26
26
  Requires-Dist: docling-ibm-models (>=2.0.0,<3.0.0)
27
27
  Requires-Dist: docling-parse (>=1.4.1,<2.0.0)
28
28
  Requires-Dist: easyocr (>=1.7,<2.0)
29
29
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
30
30
  Requires-Dist: huggingface_hub (>=0.23,<1)
31
+ Requires-Dist: pandas (>=2.1.4,<3.0.0)
31
32
  Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
32
33
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
33
34
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
@@ -335,15 +336,14 @@ from docling_core.transforms.chunker import HierarchicalChunker
335
336
 
336
337
  doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
337
338
  chunks = list(HierarchicalChunker().chunk(doc))
338
- # > [
339
- # > ChunkWithMetadata(
340
- # > path='$.main-text[0]',
341
- # > text='DocLayNet: A Large Human-Annotated Dataset [...]',
342
- # > page=1,
343
- # > bbox=[107.30, 672.38, 505.19, 709.08]
344
- # > ),
345
- # > [...]
346
- # > ]
339
+ print(chunks[0])
340
+ # ChunkWithMetadata(
341
+ # path='#/main-text/1',
342
+ # text='DocLayNet: A Large Human-Annotated Dataset [...]',
343
+ # page=1,
344
+ # bbox=[107.30, 672.38, 505.19, 709.08],
345
+ # [...]
346
+ # )
347
347
  ```
348
348
 
349
349
 
@@ -18,7 +18,7 @@ docling/models/easyocr_model.py,sha256=fmfTvOfqo97n_xYQDPRMKlvMOs9QqgLgSTqwBDrjK
18
18
  docling/models/layout_model.py,sha256=5wCohyzGK1p5F5cTRY5QWbW2AI4eevXobJDJdSLhX7k,11272
19
19
  docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
20
20
  docling/models/table_structure_model.py,sha256=iHJjWdKCpTcH3l_ElMWnC5pt6tkUpIuByed304Fdq9w,6009
21
- docling/models/tesseract_ocr_cli_model.py,sha256=s2uzcjTPqbfqPCgtz8nvfK8QVtuvQBR8OpTR3H_XdR8,5504
21
+ docling/models/tesseract_ocr_cli_model.py,sha256=B7zPkpKgpfTTUWXm-_zvw7x0yvzVd85WguawFVDwdqI,5529
22
22
  docling/models/tesseract_ocr_model.py,sha256=PqQv1Hv3GC1FByjegWvjNFw15Jcw-mT25_MvFr3hAHQ,4575
23
23
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  docling/pipeline/base_model_pipeline.py,sha256=rrMog3EuiR5Gx9OWtfMj24rQvHCrWkxZ3g9OIr7LPSQ,607
@@ -27,8 +27,8 @@ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  docling/utils/export.py,sha256=bKLdbeUcR-rQsGPV1IqJkCHKMCv7X2QOHyxmjNuH3HE,4655
28
28
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
29
29
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
30
- docling-1.19.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
31
- docling-1.19.0.dist-info/METADATA,sha256=ZJ0aAmgxVDYaEKkBnm8yexI4zIcG2iqGC-NddLdabmM,16795
32
- docling-1.19.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
33
- docling-1.19.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
34
- docling-1.19.0.dist-info/RECORD,,
30
+ docling-1.19.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
31
+ docling-1.19.1.dist-info/METADATA,sha256=hCQeq3JVB16CfTwtjjwnX5u9bWYjD0CsSbn9h1tZZTM,16800
32
+ docling-1.19.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
33
+ docling-1.19.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
34
+ docling-1.19.1.dist-info/RECORD,,