docling 1.13.0__tar.gz → 1.13.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-1.13.0 → docling-1.13.1}/PKG-INFO +4 -2
- {docling-1.13.0 → docling-1.13.1}/README.md +2 -0
- {docling-1.13.0 → docling-1.13.1}/docling/datamodel/document.py +18 -8
- {docling-1.13.0 → docling-1.13.1}/docling/utils/export.py +1 -1
- {docling-1.13.0 → docling-1.13.1}/pyproject.toml +2 -2
- {docling-1.13.0 → docling-1.13.1}/LICENSE +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/backend/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/backend/abstract_backend.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/backend/docling_parse_backend.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/cli/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/cli/main.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/datamodel/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/datamodel/base_models.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/datamodel/settings.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/document_converter.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/base_ocr_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/ds_glm_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/easyocr_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/layout_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/page_assemble_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/models/table_structure_model.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/pipeline/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/pipeline/base_model_pipeline.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/pipeline/standard_model_pipeline.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/utils/__init__.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/utils/layout_utils.py +0 -0
- {docling-1.13.0 → docling-1.13.1}/docling/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 1.13.
|
3
|
+
Version: 1.13.1
|
4
4
|
Summary: Docling PDF conversion package
|
5
5
|
Home-page: https://github.com/DS4SD/docling
|
6
6
|
License: MIT
|
@@ -22,7 +22,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
22
|
Provides-Extra: examples
|
23
23
|
Requires-Dist: certifi (>=2024.7.4)
|
24
24
|
Requires-Dist: deepsearch-glm (>=0.21.1,<0.22.0)
|
25
|
-
Requires-Dist: docling-core (>=1.
|
25
|
+
Requires-Dist: docling-core (>=1.5.0,<2.0.0)
|
26
26
|
Requires-Dist: docling-ibm-models (>=1.2.0,<2.0.0)
|
27
27
|
Requires-Dist: docling-parse (>=1.2.0,<2.0.0)
|
28
28
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
@@ -122,7 +122,9 @@ from docling.document_converter import DocumentConverter
|
|
122
122
|
source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
|
123
123
|
converter = DocumentConverter()
|
124
124
|
result = converter.convert_single(source)
|
125
|
+
|
125
126
|
print(result.render_as_markdown()) # output: "## Docling Technical Report[...]"
|
127
|
+
print(result.render_as_doctags()) # output: "<document><title><page_1><loc_20>..."
|
126
128
|
```
|
127
129
|
|
128
130
|
### Convert a batch of documents
|
@@ -70,7 +70,9 @@ from docling.document_converter import DocumentConverter
|
|
70
70
|
source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
|
71
71
|
converter = DocumentConverter()
|
72
72
|
result = converter.convert_single(source)
|
73
|
+
|
73
74
|
print(result.render_as_markdown()) # output: "## Docling Technical Report[...]"
|
75
|
+
print(result.render_as_doctags()) # output: "<document><title><page_1><loc_20>..."
|
74
76
|
```
|
75
77
|
|
76
78
|
### Convert a batch of documents
|
@@ -368,20 +368,30 @@ class ConvertedDocument(BaseModel):
|
|
368
368
|
"table",
|
369
369
|
"figure",
|
370
370
|
],
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
371
|
+
xsize: int = 100,
|
372
|
+
ysize: int = 100,
|
373
|
+
add_location: bool = True,
|
374
|
+
add_content: bool = True,
|
375
|
+
add_page_index: bool = True,
|
376
|
+
# table specific flags
|
377
|
+
add_table_cell_location: bool = False,
|
378
|
+
add_table_cell_label: bool = True,
|
379
|
+
add_table_cell_text: bool = True,
|
375
380
|
) -> str:
|
376
381
|
return self.output.export_to_document_tokens(
|
377
382
|
delim=delim,
|
378
383
|
main_text_start=main_text_start,
|
379
384
|
main_text_stop=main_text_stop,
|
380
385
|
main_text_labels=main_text_labels,
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
386
|
+
xsize=xsize,
|
387
|
+
ysize=ysize,
|
388
|
+
add_location=add_location,
|
389
|
+
add_content=add_content,
|
390
|
+
add_page_index=add_page_index,
|
391
|
+
# table specific flags
|
392
|
+
add_table_cell_location=add_table_cell_location,
|
393
|
+
add_table_cell_label=add_table_cell_label,
|
394
|
+
add_table_cell_text=add_table_cell_text,
|
385
395
|
)
|
386
396
|
|
387
397
|
def render_element_images(
|
@@ -111,7 +111,7 @@ def generate_multimodal_pages(
|
|
111
111
|
)
|
112
112
|
# No page-tagging since we only do 1 page at the time
|
113
113
|
content_dt = doc.export_to_document_tokens(
|
114
|
-
main_text_start=start_ix, main_text_stop=end_ix,
|
114
|
+
main_text_start=start_ix, main_text_stop=end_ix, add_page_index=False
|
115
115
|
)
|
116
116
|
|
117
117
|
return content_text, content_md, content_dt, page_cells, page_segments, page
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "docling"
|
3
|
-
version = "1.13.
|
3
|
+
version = "1.13.1" # DO NOT EDIT, updated automatically
|
4
4
|
description = "Docling PDF conversion package"
|
5
5
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
6
6
|
license = "MIT"
|
@@ -23,7 +23,7 @@ packages = [{include = "docling"}]
|
|
23
23
|
[tool.poetry.dependencies]
|
24
24
|
python = "^3.10"
|
25
25
|
pydantic = "^2.0.0"
|
26
|
-
docling-core = "^1.
|
26
|
+
docling-core = "^1.5.0"
|
27
27
|
docling-ibm-models = "^1.2.0"
|
28
28
|
deepsearch-glm = "^0.21.1"
|
29
29
|
filetype = "^1.2.0"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|