docling 2.47.0__tar.gz → 2.47.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-2.47.0 → docling-2.47.1}/PKG-INFO +2 -2
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/base_pipeline.py +3 -2
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/PKG-INFO +2 -2
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/requires.txt +1 -1
- {docling-2.47.0 → docling-2.47.1}/pyproject.toml +2 -2
- {docling-2.47.0 → docling-2.47.1}/LICENSE +0 -0
- {docling-2.47.0 → docling-2.47.1}/README.md +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/abstract_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/asciidoc_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/csv_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_v2_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docling_parse_v4_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/latex_dict.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/docx/latex/omml.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/html_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/json/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/json/docling_json_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/md_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/mets_gbs_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/msexcel_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/mspowerpoint_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/msword_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/noop_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/pdf_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/jats_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/backend/xml/uspto_backend.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/chunking/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/cli/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/cli/main.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/cli/models.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/cli/tools.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/accelerator_options.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/asr_model_specs.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/base_models.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/document.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/layout_model_specs.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options_asr_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/pipeline_options_vlm_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/settings.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/datamodel/vlm_model_specs.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/document_converter.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/exceptions.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/api_vlm_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/base_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/base_ocr_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/code_formula_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/document_picture_classifier.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/easyocr_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/factories/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/factories/base_factory.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/factories/ocr_factory.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/factories/picture_description_factory.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/layout_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/ocr_mac_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/page_assemble_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/page_preprocessing_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_api_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_base_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/picture_description_vlm_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/plugins/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/plugins/defaults.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/rapid_ocr_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/readingorder_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/table_structure_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/tesseract_ocr_cli_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/tesseract_ocr_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/utils/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/utils/hf_model_download.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/hf_transformers_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/mlx_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/models/vlm_models_inline/vllm_model.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/asr_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/simple_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/standard_pdf_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/threaded_standard_pdf_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/pipeline/vlm_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/py.typed +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/__init__.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/accelerator_utils.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/api_image_request.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/export.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/glm_utils.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/layout_postprocessor.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/locks.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/model_downloader.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/ocr_utils.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/orientation.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/profiling.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/utils.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling/utils/visualization.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/SOURCES.txt +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/dependency_links.txt +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/entry_points.txt +0 -0
- {docling-2.47.0 → docling-2.47.1}/docling.egg-info/top_level.txt +0 -0
- {docling-2.47.0 → docling-2.47.1}/setup.cfg +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_asr_pipeline.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_asciidoc.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_csv.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_json.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse_v2.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_docling_parse_v4.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_html.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_jats.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_markdown.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_mets_gbs.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_msexcel.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_msword.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_patent_uspto.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_pdfium.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_pptx.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_backend_webp.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_cli.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_code_formula.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_data_gen_flag.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_document_picture_classifier.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_e2e_conversion.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_e2e_ocr_conversion.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_input_doc.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_interfaces.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_invalid_input.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_legacy_format_transform.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_ocr_utils.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_options.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_settings_load.py +0 -0
- {docling-2.47.0 → docling-2.47.1}/tests/test_threaded_pipeline.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.47.
|
3
|
+
Version: 2.47.1
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -59,7 +59,7 @@ Provides-Extra: vlm
|
|
59
59
|
Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
|
60
60
|
Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
|
61
61
|
Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
62
|
-
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux") and extra == "vlm"
|
62
|
+
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
|
63
63
|
Provides-Extra: rapidocr
|
64
64
|
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
|
65
65
|
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
@@ -146,6 +146,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|
146
146
|
conv_res.pages.append(Page(page_no=i))
|
147
147
|
|
148
148
|
try:
|
149
|
+
total_pages_processed = 0
|
149
150
|
# Iterate batches of pages (page_batch_size) in the doc
|
150
151
|
for page_batch in chunkify(
|
151
152
|
conv_res.pages, settings.perf.page_batch_size
|
@@ -186,9 +187,9 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|
186
187
|
)
|
187
188
|
conv_res.status = ConversionStatus.PARTIAL_SUCCESS
|
188
189
|
break
|
189
|
-
|
190
|
+
total_pages_processed += len(page_batch)
|
190
191
|
_log.debug(
|
191
|
-
f"Finished converting
|
192
|
+
f"Finished converting pages {total_pages_processed}/{len(conv_res.pages)} time={end_batch_time:.3f}"
|
192
193
|
)
|
193
194
|
|
194
195
|
except Exception as e:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.47.
|
3
|
+
Version: 2.47.1
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -59,7 +59,7 @@ Provides-Extra: vlm
|
|
59
59
|
Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
|
60
60
|
Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
|
61
61
|
Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
62
|
-
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux") and extra == "vlm"
|
62
|
+
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
|
63
63
|
Provides-Extra: rapidocr
|
64
64
|
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
|
65
65
|
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
@@ -49,5 +49,5 @@ accelerate<2.0.0,>=1.2.1
|
|
49
49
|
[vlm:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
|
50
50
|
mlx-vlm<1.0.0,>=0.3.0
|
51
51
|
|
52
|
-
[vlm:python_version >= "3.10" and sys_platform == "linux"]
|
52
|
+
[vlm:python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"]
|
53
53
|
vllm<1.0.0,>=0.10.0
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "docling"
|
3
|
-
version = "2.47.
|
3
|
+
version = "2.47.1" # DO NOT EDIT, updated automatically
|
4
4
|
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
5
5
|
license = "MIT"
|
6
6
|
keywords = [
|
@@ -93,7 +93,7 @@ vlm = [
|
|
93
93
|
'transformers (>=4.46.0,<5.0.0)',
|
94
94
|
'accelerate (>=1.2.1,<2.0.0)',
|
95
95
|
'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
96
|
-
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux"',
|
96
|
+
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
|
97
97
|
]
|
98
98
|
rapidocr = [
|
99
99
|
'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|