docling 1.2.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-1.2.0 → docling-1.2.1}/PKG-INFO +8 -3
- {docling-1.2.0 → docling-1.2.1}/README.md +6 -2
- {docling-1.2.0 → docling-1.2.1}/docling/backend/abstract_backend.py +1 -1
- {docling-1.2.0 → docling-1.2.1}/docling/backend/docling_parse_backend.py +3 -2
- {docling-1.2.0 → docling-1.2.1}/docling/backend/pypdfium2_backend.py +1 -1
- {docling-1.2.0 → docling-1.2.1}/pyproject.toml +2 -1
- {docling-1.2.0 → docling-1.2.1}/LICENSE +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/backend/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/datamodel/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/datamodel/base_models.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/datamodel/document.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/datamodel/settings.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/document_converter.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/ds_glm_model.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/easyocr_model.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/layout_model.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/page_assemble_model.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/models/table_structure_model.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/pipeline/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/pipeline/base_model_pipeline.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/pipeline/standard_model_pipeline.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/utils/__init__.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/utils/layout_utils.py +0 -0
- {docling-1.2.0 → docling-1.2.1}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.1
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
22
22
|
Provides-Extra: easyocr
|
23
23
|
Provides-Extra: ocr
|
24
|
+
Requires-Dist: certifi (>=2024.7.4)
|
24
25
|
Requires-Dist: deepsearch-glm (>=0.19.0,<1)
|
25
26
|
Requires-Dist: docling-core (>=1.1.2,<2.0.0)
|
26
27
|
Requires-Dist: docling-ibm-models (>=1.1.0,<2.0.0)
|
@@ -93,17 +94,21 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate
|
|
93
94
|
|
94
95
|
### Convert a batch of documents
|
95
96
|
|
96
|
-
For an example of batch-converting documents, see [
|
97
|
+
For an example of batch-converting documents, see [batch_convert.py](https://github.com/DS4SD/docling/blob/main/examples/batch_convert.py).
|
97
98
|
|
98
99
|
From a local repo clone, you can run it with:
|
99
100
|
|
100
101
|
```
|
101
|
-
python examples/
|
102
|
+
python examples/batch_convert.py
|
102
103
|
```
|
103
104
|
The output of the above command will be written to `./scratch`.
|
104
105
|
|
105
106
|
### Adjust pipeline features
|
106
107
|
|
108
|
+
The example file [custom_convert.py](https://github.com/DS4SD/docling/blob/main/examples/custom_convert.py) contains multiple ways
|
109
|
+
one can adjust the conversion pipeline and features.
|
110
|
+
|
111
|
+
|
107
112
|
#### Control pipeline options
|
108
113
|
|
109
114
|
You can control if table structure recognition or OCR should be performed by arguments passed to `DocumentConverter`:
|
@@ -56,17 +56,21 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate
|
|
56
56
|
|
57
57
|
### Convert a batch of documents
|
58
58
|
|
59
|
-
For an example of batch-converting documents, see [
|
59
|
+
For an example of batch-converting documents, see [batch_convert.py](https://github.com/DS4SD/docling/blob/main/examples/batch_convert.py).
|
60
60
|
|
61
61
|
From a local repo clone, you can run it with:
|
62
62
|
|
63
63
|
```
|
64
|
-
python examples/
|
64
|
+
python examples/batch_convert.py
|
65
65
|
```
|
66
66
|
The output of the above command will be written to `./scratch`.
|
67
67
|
|
68
68
|
### Adjust pipeline features
|
69
69
|
|
70
|
+
The example file [custom_convert.py](https://github.com/DS4SD/docling/blob/main/examples/custom_convert.py) contains multiple ways
|
71
|
+
one can adjust the conversion pipeline and features.
|
72
|
+
|
73
|
+
|
70
74
|
#### Control pipeline options
|
71
75
|
|
72
76
|
You can control if table structure recognition or OCR should be performed by arguments passed to `DocumentConverter`:
|
@@ -146,11 +146,12 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|
146
146
|
|
147
147
|
|
148
148
|
class DoclingParseDocumentBackend(PdfDocumentBackend):
|
149
|
-
def __init__(self, path_or_stream:
|
149
|
+
def __init__(self, path_or_stream: Union[BytesIO, Path]):
|
150
150
|
super().__init__(path_or_stream)
|
151
151
|
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
152
152
|
# Parsing cells with docling_parser call
|
153
|
-
|
153
|
+
if isinstance(path_or_stream, BytesIO):
|
154
|
+
raise NotImplemented("This backend does not support byte streams yet.")
|
154
155
|
parser = pdf_parser()
|
155
156
|
self._parser_doc = parser.find_cells(str(path_or_stream))
|
156
157
|
|
@@ -199,7 +199,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
|
|
199
199
|
|
200
200
|
|
201
201
|
class PyPdfiumDocumentBackend(PdfDocumentBackend):
|
202
|
-
def __init__(self, path_or_stream:
|
202
|
+
def __init__(self, path_or_stream: Union[BytesIO, Path]):
|
203
203
|
super().__init__(path_or_stream)
|
204
204
|
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
205
205
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "1.2.
|
3
|
+
version = "1.2.1" # DO NOT EDIT, updated automatically
|
4
4
|
description = "Docling PDF conversion package"
|
5
5
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
@@ -33,6 +33,7 @@ huggingface_hub = ">=0.23,<1"
|
|
33
33
|
requests = "^2.32.3"
|
34
34
|
easyocr = { version = "^1.7", optional = true }
|
35
35
|
docling-parse = "^0.0.1"
|
36
|
+
certifi = ">=2024.7.4"
|
36
37
|
|
37
38
|
[tool.poetry.group.dev.dependencies]
|
38
39
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|