docling 2.8.0__py3-none-any.whl → 2.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docling/cli/main.py CHANGED
@@ -32,6 +32,7 @@ from docling.datamodel.pipeline_options import (
32
32
  TesseractCliOcrOptions,
33
33
  TesseractOcrOptions,
34
34
  )
35
+ from docling.datamodel.settings import settings
35
36
  from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
36
37
 
37
38
  warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
@@ -212,6 +213,24 @@ def convert(
212
213
  help="Set the verbosity level. -v for info logging, -vv for debug logging.",
213
214
  ),
214
215
  ] = 0,
216
+ debug_visualize_cells: Annotated[
217
+ bool,
218
+ typer.Option(..., help="Enable debug output which visualizes the PDF cells"),
219
+ ] = False,
220
+ debug_visualize_ocr: Annotated[
221
+ bool,
222
+ typer.Option(..., help="Enable debug output which visualizes the OCR cells"),
223
+ ] = False,
224
+ debug_visualize_layout: Annotated[
225
+ bool,
226
+ typer.Option(
227
+ ..., help="Enable debug output which visualizes the layour clusters"
228
+ ),
229
+ ] = False,
230
+ debug_visualize_tables: Annotated[
231
+ bool,
232
+ typer.Option(..., help="Enable debug output which visualizes the table cells"),
233
+ ] = False,
215
234
  version: Annotated[
216
235
  Optional[bool],
217
236
  typer.Option(
@@ -229,6 +248,11 @@ def convert(
229
248
  elif verbose == 2:
230
249
  logging.basicConfig(level=logging.DEBUG)
231
250
 
251
+ settings.debug.visualize_cells = debug_visualize_cells
252
+ settings.debug.visualize_layout = debug_visualize_layout
253
+ settings.debug.visualize_tables = debug_visualize_tables
254
+ settings.debug.visualize_ocr = debug_visualize_ocr
255
+
232
256
  if from_formats is None:
233
257
  from_formats = [e for e in InputFormat]
234
258
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.8.0
3
+ Version: 2.8.1
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -39,7 +39,6 @@ Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (ex
39
39
  Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
40
40
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
41
41
  Requires-Dist: pandas (>=2.1.4,<3.0.0)
42
- Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
43
42
  Requires-Dist: pydantic (>=2.0.0,<2.10)
44
43
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
45
44
  Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
@@ -60,7 +59,7 @@ Description-Content-Type: text/markdown
60
59
  </a>
61
60
  </p>
62
61
 
63
- # Docling
62
+ # 🦆 Docling
64
63
 
65
64
  <p align="center">
66
65
  <a href="https://trendshift.io/repositories/12132" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12132" alt="DS4SD%2Fdocling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
@@ -85,7 +84,7 @@ Docling parses documents and exports them to the desired format with ease and sp
85
84
  * 🗂️ Reads popular document formats (PDF, DOCX, PPTX, XLSX, Images, HTML, AsciiDoc & Markdown) and exports to Markdown and JSON
86
85
  * 📑 Advanced PDF document understanding including page layout, reading order & table structures
87
86
  * 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
88
- * 🤖 Easy integration with LlamaIndex 🦙 & LangChain 🦜🔗 for powerful RAG / QA applications
87
+ * 🤖 Easy integration with 🦙 LlamaIndex & 🦜🔗 LangChain for powerful RAG / QA applications
89
88
  * 🔍 OCR support for scanned PDFs
90
89
  * 💻 Simple and convenient CLI
91
90
 
@@ -121,8 +120,24 @@ result = converter.convert(source)
121
120
  print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
122
121
  ```
123
122
 
124
- Check out [Getting started](https://ds4sd.github.io/docling/).
125
- You will find lots of tuning options to leverage all the advanced capabilities.
123
+ More [advanced usage options](https://ds4sd.github.io/docling/usage/) are available in
124
+ the docs.
125
+
126
+ ## Documentation
127
+
128
+ Check out Docling's [documentation](https://ds4sd.github.io/docling/), for details on
129
+ installation, usage, concepts, recipes, extensions, and more.
130
+
131
+ ## Examples
132
+
133
+ Go hands-on with our [examples](https://ds4sd.github.io/docling/examples/),
134
+ demonstrating how to address different application use cases with Docling.
135
+
136
+ ## Integrations
137
+
138
+ To further accelerate your AI application development, check out Docling's native
139
+ [integrations](https://ds4sd.github.io/docling/integrations/) with popular frameworks
140
+ and tools.
126
141
 
127
142
  ## Get help and support
128
143
 
@@ -12,7 +12,7 @@ docling/backend/msword_backend.py,sha256=VFHPr-gCak7w3NJToc5Cs-JaTb4Vm3a1JnnRIfJ
12
12
  docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
13
13
  docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
14
14
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- docling/cli/main.py,sha256=KxukTq155IFVkfc_aUpSL6laGG1KjnXE4oAau7B5xBA,10881
15
+ docling/cli/main.py,sha256=AgPD32NfM0_bmHeKjx5-fqk57ahX5tN3AeoDOerhTuE,11808
16
16
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  docling/datamodel/base_models.py,sha256=6qlwPamDZ3XUsE2kTAyGKG6O2IJClVjCqaE7DZ74KHU,5533
18
18
  docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
@@ -41,8 +41,8 @@ docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
41
41
  docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
42
42
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
43
43
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
44
- docling-2.8.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
45
- docling-2.8.0.dist-info/METADATA,sha256=4XSleijcmMxpwEFyjiNIh71ScIZUTApiKIfKDdM660A,7236
46
- docling-2.8.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
47
- docling-2.8.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
48
- docling-2.8.0.dist-info/RECORD,,
44
+ docling-2.8.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
45
+ docling-2.8.1.dist-info/METADATA,sha256=auj5PtDj-UBB72sW8jk1CSVSwQpd9q0nYzoAYIItl8o,7682
46
+ docling-2.8.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
47
+ docling-2.8.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
48
+ docling-2.8.1.dist-info/RECORD,,