docling 2.8.0__py3-none-any.whl → 2.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/cli/main.py +24 -0
- {docling-2.8.0.dist-info → docling-2.8.1.dist-info}/METADATA +21 -6
- {docling-2.8.0.dist-info → docling-2.8.1.dist-info}/RECORD +6 -6
- {docling-2.8.0.dist-info → docling-2.8.1.dist-info}/LICENSE +0 -0
- {docling-2.8.0.dist-info → docling-2.8.1.dist-info}/WHEEL +0 -0
- {docling-2.8.0.dist-info → docling-2.8.1.dist-info}/entry_points.txt +0 -0
docling/cli/main.py
CHANGED
@@ -32,6 +32,7 @@ from docling.datamodel.pipeline_options import (
|
|
32
32
|
TesseractCliOcrOptions,
|
33
33
|
TesseractOcrOptions,
|
34
34
|
)
|
35
|
+
from docling.datamodel.settings import settings
|
35
36
|
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
36
37
|
|
37
38
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
@@ -212,6 +213,24 @@ def convert(
|
|
212
213
|
help="Set the verbosity level. -v for info logging, -vv for debug logging.",
|
213
214
|
),
|
214
215
|
] = 0,
|
216
|
+
debug_visualize_cells: Annotated[
|
217
|
+
bool,
|
218
|
+
typer.Option(..., help="Enable debug output which visualizes the PDF cells"),
|
219
|
+
] = False,
|
220
|
+
debug_visualize_ocr: Annotated[
|
221
|
+
bool,
|
222
|
+
typer.Option(..., help="Enable debug output which visualizes the OCR cells"),
|
223
|
+
] = False,
|
224
|
+
debug_visualize_layout: Annotated[
|
225
|
+
bool,
|
226
|
+
typer.Option(
|
227
|
+
..., help="Enable debug output which visualizes the layour clusters"
|
228
|
+
),
|
229
|
+
] = False,
|
230
|
+
debug_visualize_tables: Annotated[
|
231
|
+
bool,
|
232
|
+
typer.Option(..., help="Enable debug output which visualizes the table cells"),
|
233
|
+
] = False,
|
215
234
|
version: Annotated[
|
216
235
|
Optional[bool],
|
217
236
|
typer.Option(
|
@@ -229,6 +248,11 @@ def convert(
|
|
229
248
|
elif verbose == 2:
|
230
249
|
logging.basicConfig(level=logging.DEBUG)
|
231
250
|
|
251
|
+
settings.debug.visualize_cells = debug_visualize_cells
|
252
|
+
settings.debug.visualize_layout = debug_visualize_layout
|
253
|
+
settings.debug.visualize_tables = debug_visualize_tables
|
254
|
+
settings.debug.visualize_ocr = debug_visualize_ocr
|
255
|
+
|
232
256
|
if from_formats is None:
|
233
257
|
from_formats = [e for e in InputFormat]
|
234
258
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.8.
|
3
|
+
Version: 2.8.1
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -39,7 +39,6 @@ Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (ex
|
|
39
39
|
Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
|
40
40
|
Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
|
41
41
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
42
|
-
Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
|
43
42
|
Requires-Dist: pydantic (>=2.0.0,<2.10)
|
44
43
|
Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
|
45
44
|
Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
|
@@ -60,7 +59,7 @@ Description-Content-Type: text/markdown
|
|
60
59
|
</a>
|
61
60
|
</p>
|
62
61
|
|
63
|
-
# Docling
|
62
|
+
# 🦆 Docling
|
64
63
|
|
65
64
|
<p align="center">
|
66
65
|
<a href="https://trendshift.io/repositories/12132" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12132" alt="DS4SD%2Fdocling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
@@ -85,7 +84,7 @@ Docling parses documents and exports them to the desired format with ease and sp
|
|
85
84
|
* 🗂️ Reads popular document formats (PDF, DOCX, PPTX, XLSX, Images, HTML, AsciiDoc & Markdown) and exports to Markdown and JSON
|
86
85
|
* 📑 Advanced PDF document understanding including page layout, reading order & table structures
|
87
86
|
* 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
|
88
|
-
* 🤖 Easy integration with LlamaIndex
|
87
|
+
* 🤖 Easy integration with 🦙 LlamaIndex & 🦜🔗 LangChain for powerful RAG / QA applications
|
89
88
|
* 🔍 OCR support for scanned PDFs
|
90
89
|
* 💻 Simple and convenient CLI
|
91
90
|
|
@@ -121,8 +120,24 @@ result = converter.convert(source)
|
|
121
120
|
print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
|
122
121
|
```
|
123
122
|
|
124
|
-
|
125
|
-
|
123
|
+
More [advanced usage options](https://ds4sd.github.io/docling/usage/) are available in
|
124
|
+
the docs.
|
125
|
+
|
126
|
+
## Documentation
|
127
|
+
|
128
|
+
Check out Docling's [documentation](https://ds4sd.github.io/docling/), for details on
|
129
|
+
installation, usage, concepts, recipes, extensions, and more.
|
130
|
+
|
131
|
+
## Examples
|
132
|
+
|
133
|
+
Go hands-on with our [examples](https://ds4sd.github.io/docling/examples/),
|
134
|
+
demonstrating how to address different application use cases with Docling.
|
135
|
+
|
136
|
+
## Integrations
|
137
|
+
|
138
|
+
To further accelerate your AI application development, check out Docling's native
|
139
|
+
[integrations](https://ds4sd.github.io/docling/integrations/) with popular frameworks
|
140
|
+
and tools.
|
126
141
|
|
127
142
|
## Get help and support
|
128
143
|
|
@@ -12,7 +12,7 @@ docling/backend/msword_backend.py,sha256=VFHPr-gCak7w3NJToc5Cs-JaTb4Vm3a1JnnRIfJ
|
|
12
12
|
docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
|
13
13
|
docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
|
14
14
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
docling/cli/main.py,sha256=
|
15
|
+
docling/cli/main.py,sha256=AgPD32NfM0_bmHeKjx5-fqk57ahX5tN3AeoDOerhTuE,11808
|
16
16
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
docling/datamodel/base_models.py,sha256=6qlwPamDZ3XUsE2kTAyGKG6O2IJClVjCqaE7DZ74KHU,5533
|
18
18
|
docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
|
@@ -41,8 +41,8 @@ docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
|
41
41
|
docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
|
42
42
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
43
43
|
docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
|
44
|
-
docling-2.8.
|
45
|
-
docling-2.8.
|
46
|
-
docling-2.8.
|
47
|
-
docling-2.8.
|
48
|
-
docling-2.8.
|
44
|
+
docling-2.8.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
45
|
+
docling-2.8.1.dist-info/METADATA,sha256=auj5PtDj-UBB72sW8jk1CSVSwQpd9q0nYzoAYIItl8o,7682
|
46
|
+
docling-2.8.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
47
|
+
docling-2.8.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
|
48
|
+
docling-2.8.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|