docling 1.9.0__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-1.9.0 → docling-1.10.0}/LICENSE +1 -1
- {docling-1.9.0 → docling-1.10.0}/PKG-INFO +13 -9
- {docling-1.9.0 → docling-1.10.0}/README.md +9 -5
- {docling-1.9.0 → docling-1.10.0}/docling/models/ds_glm_model.py +1 -1
- {docling-1.9.0 → docling-1.10.0}/pyproject.toml +4 -4
- {docling-1.9.0 → docling-1.10.0}/docling/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/backend/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/backend/abstract_backend.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/backend/docling_parse_backend.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/datamodel/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/datamodel/base_models.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/datamodel/document.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/datamodel/settings.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/document_converter.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/base_ocr_model.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/easyocr_model.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/layout_model.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/page_assemble_model.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/models/table_structure_model.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/pipeline/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/pipeline/base_model_pipeline.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/pipeline/standard_model_pipeline.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/utils/__init__.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/utils/export.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/utils/layout_utils.py +0 -0
- {docling-1.9.0 → docling-1.10.0}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.0
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -20,14 +20,14 @@ Classifier: Programming Language :: Python :: 3.11
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Requires-Dist: certifi (>=2024.7.4)
|
23
|
-
Requires-Dist: deepsearch-glm (>=0.
|
23
|
+
Requires-Dist: deepsearch-glm (>=0.21.0,<0.22.0)
|
24
24
|
Requires-Dist: docling-core (>=1.1.3,<2.0.0)
|
25
25
|
Requires-Dist: docling-ibm-models (>=1.1.3,<2.0.0)
|
26
|
-
Requires-Dist: docling-parse (>=1.
|
26
|
+
Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
|
27
27
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
28
28
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
29
29
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
30
|
-
Requires-Dist: pyarrow (>=
|
30
|
+
Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
|
31
31
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
32
32
|
Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
|
33
33
|
Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
|
@@ -63,7 +63,7 @@ Docling bundles PDF document conversion to JSON and Markdown in an easy, self-co
|
|
63
63
|
* 📝 Extracts metadata from the document, such as title, authors, references and language
|
64
64
|
* 🔍 Optionally applies OCR (use with scanned PDFs)
|
65
65
|
|
66
|
-
|
66
|
+
For RAG, check out [Quackling](https://github.com/DS4SD/quackling) to get the most out of your docs, be it using LlamaIndex, LangChain or your pipeline.
|
67
67
|
|
68
68
|
## Installation
|
69
69
|
|
@@ -183,6 +183,10 @@ results = doc_converter.convert(conv_input)
|
|
183
183
|
You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
|
184
184
|
|
185
185
|
|
186
|
+
## Technical report
|
187
|
+
|
188
|
+
For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
|
189
|
+
|
186
190
|
## Contributing
|
187
191
|
|
188
192
|
Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details.
|
@@ -196,10 +200,10 @@ If you use Docling in your projects, please consider citing the following:
|
|
196
200
|
@techreport{Docling,
|
197
201
|
author = {Deep Search Team},
|
198
202
|
month = {8},
|
199
|
-
title = {
|
200
|
-
url={https://arxiv.org/abs/2408.09869},
|
201
|
-
eprint={2408.09869},
|
202
|
-
doi =
|
203
|
+
title = {Docling Technical Report},
|
204
|
+
url = {https://arxiv.org/abs/2408.09869},
|
205
|
+
eprint = {2408.09869},
|
206
|
+
doi = {10.48550/arXiv.2408.09869},
|
203
207
|
version = {1.0.0},
|
204
208
|
year = {2024}
|
205
209
|
}
|
@@ -24,7 +24,7 @@ Docling bundles PDF document conversion to JSON and Markdown in an easy, self-co
|
|
24
24
|
* 📝 Extracts metadata from the document, such as title, authors, references and language
|
25
25
|
* 🔍 Optionally applies OCR (use with scanned PDFs)
|
26
26
|
|
27
|
-
|
27
|
+
For RAG, check out [Quackling](https://github.com/DS4SD/quackling) to get the most out of your docs, be it using LlamaIndex, LangChain or your pipeline.
|
28
28
|
|
29
29
|
## Installation
|
30
30
|
|
@@ -144,6 +144,10 @@ results = doc_converter.convert(conv_input)
|
|
144
144
|
You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
|
145
145
|
|
146
146
|
|
147
|
+
## Technical report
|
148
|
+
|
149
|
+
For more details on Docling's inner workings, check out the [Docling Technical Report](https://arxiv.org/abs/2408.09869).
|
150
|
+
|
147
151
|
## Contributing
|
148
152
|
|
149
153
|
Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details.
|
@@ -157,10 +161,10 @@ If you use Docling in your projects, please consider citing the following:
|
|
157
161
|
@techreport{Docling,
|
158
162
|
author = {Deep Search Team},
|
159
163
|
month = {8},
|
160
|
-
title = {
|
161
|
-
url={https://arxiv.org/abs/2408.09869},
|
162
|
-
eprint={2408.09869},
|
163
|
-
doi =
|
164
|
+
title = {Docling Technical Report},
|
165
|
+
url = {https://arxiv.org/abs/2408.09869},
|
166
|
+
eprint = {2408.09869},
|
167
|
+
doi = {10.48550/arXiv.2408.09869},
|
164
168
|
version = {1.0.0},
|
165
169
|
year = {2024}
|
166
170
|
}
|
@@ -2,7 +2,7 @@ import copy
|
|
2
2
|
import random
|
3
3
|
|
4
4
|
from deepsearch_glm.nlp_utils import init_nlp_model
|
5
|
-
from deepsearch_glm.utils.
|
5
|
+
from deepsearch_glm.utils.doc_utils import to_legacy_document_format
|
6
6
|
from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
|
7
7
|
from docling_core.types import BaseText
|
8
8
|
from docling_core.types import Document as DsDocument
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "1.
|
3
|
+
version = "1.10.0" # DO NOT EDIT, updated automatically
|
4
4
|
description = "Docling PDF conversion package"
|
5
5
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
@@ -25,18 +25,18 @@ python = "^3.10"
|
|
25
25
|
pydantic = "^2.0.0"
|
26
26
|
docling-core = "^1.1.3"
|
27
27
|
docling-ibm-models = "^1.1.3"
|
28
|
-
deepsearch-glm = "^0.
|
28
|
+
deepsearch-glm = "^0.21.0"
|
29
29
|
filetype = "^1.2.0"
|
30
30
|
pypdfium2 = "^4.30.0"
|
31
31
|
pydantic-settings = "^2.3.0"
|
32
32
|
huggingface_hub = ">=0.23,<1"
|
33
33
|
requests = "^2.32.3"
|
34
34
|
easyocr = "^1.7"
|
35
|
-
docling-parse = "^1.
|
35
|
+
docling-parse = "^1.2.0"
|
36
36
|
certifi = ">=2024.7.4"
|
37
37
|
rtree = "^1.3.0"
|
38
38
|
scipy = "^1.14.1"
|
39
|
-
pyarrow = "^
|
39
|
+
pyarrow = "^16.1.0"
|
40
40
|
|
41
41
|
[tool.poetry.group.dev.dependencies]
|
42
42
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|