docling 2.46.0__tar.gz → 2.47.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling-2.46.0 → docling-2.47.0}/PKG-INFO +2 -1
- {docling-2.46.0 → docling-2.47.0}/docling/backend/html_backend.py +111 -13
- {docling-2.46.0 → docling-2.47.0}/docling/backend/msword_backend.py +126 -16
- {docling-2.46.0 → docling-2.47.0}/docling/cli/main.py +14 -0
- {docling-2.46.0 → docling-2.47.0}/docling/cli/models.py +56 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/base_models.py +1 -1
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/pipeline_options.py +3 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/pipeline_options_vlm_model.py +5 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/vlm_model_specs.py +114 -1
- docling-2.47.0/docling/models/base_model.py +186 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/page_preprocessing_model.py +5 -1
- {docling-2.46.0 → docling-2.47.0}/docling/models/picture_description_vlm_model.py +4 -2
- docling-2.47.0/docling/models/vlm_models_inline/hf_transformers_model.py +314 -0
- docling-2.47.0/docling/models/vlm_models_inline/mlx_model.py +260 -0
- docling-2.47.0/docling/models/vlm_models_inline/vllm_model.py +235 -0
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/threaded_standard_pdf_pipeline.py +1 -1
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/vlm_pipeline.py +14 -1
- docling-2.47.0/docling/py.typed +1 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/layout_postprocessor.py +51 -43
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/PKG-INFO +2 -1
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/SOURCES.txt +1 -0
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/requires.txt +3 -0
- {docling-2.46.0 → docling-2.47.0}/pyproject.toml +3 -1
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_html.py +1 -1
- docling-2.46.0/docling/models/base_model.py +0 -93
- docling-2.46.0/docling/models/vlm_models_inline/hf_transformers_model.py +0 -214
- docling-2.46.0/docling/models/vlm_models_inline/mlx_model.py +0 -149
- docling-2.46.0/docling/utils/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/LICENSE +0 -0
- {docling-2.46.0 → docling-2.47.0}/README.md +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/abstract_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/asciidoc_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/csv_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docling_parse_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docling_parse_v2_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docling_parse_v4_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docx/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docx/latex/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docx/latex/latex_dict.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/docx/latex/omml.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/json/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/json/docling_json_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/md_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/mets_gbs_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/msexcel_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/mspowerpoint_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/noop_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/pdf_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/pypdfium2_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/xml/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/xml/jats_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/backend/xml/uspto_backend.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/chunking/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/cli/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/cli/tools.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/accelerator_options.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/asr_model_specs.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/document.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/layout_model_specs.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/pipeline_options_asr_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/datamodel/settings.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/document_converter.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/exceptions.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/api_vlm_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/base_ocr_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/code_formula_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/document_picture_classifier.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/easyocr_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/factories/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/factories/base_factory.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/factories/ocr_factory.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/factories/picture_description_factory.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/layout_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/ocr_mac_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/page_assemble_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/picture_description_api_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/picture_description_base_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/plugins/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/plugins/defaults.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/rapid_ocr_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/readingorder_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/table_structure_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/tesseract_ocr_cli_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/tesseract_ocr_model.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/utils/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/models/utils/hf_model_download.py +0 -0
- /docling-2.46.0/docling/py.typed → /docling-2.47.0/docling/models/vlm_models_inline/__init__.py +0 -0
- {docling-2.46.0/docling/models/vlm_models_inline → docling-2.47.0/docling/pipeline}/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/asr_pipeline.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/base_pipeline.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/simple_pipeline.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/pipeline/standard_pdf_pipeline.py +0 -0
- {docling-2.46.0/docling/pipeline → docling-2.47.0/docling/utils}/__init__.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/accelerator_utils.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/api_image_request.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/export.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/glm_utils.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/locks.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/model_downloader.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/ocr_utils.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/orientation.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/profiling.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/utils.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling/utils/visualization.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/dependency_links.txt +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/entry_points.txt +0 -0
- {docling-2.46.0 → docling-2.47.0}/docling.egg-info/top_level.txt +0 -0
- {docling-2.46.0 → docling-2.47.0}/setup.cfg +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_asr_pipeline.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_asciidoc.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_csv.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_docling_json.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_docling_parse.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_docling_parse_v2.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_docling_parse_v4.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_jats.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_markdown.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_mets_gbs.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_msexcel.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_msword.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_patent_uspto.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_pdfium.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_pptx.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_backend_webp.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_cli.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_code_formula.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_data_gen_flag.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_document_picture_classifier.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_e2e_conversion.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_e2e_ocr_conversion.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_input_doc.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_interfaces.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_invalid_input.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_legacy_format_transform.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_ocr_utils.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_options.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_settings_load.py +0 -0
- {docling-2.46.0 → docling-2.47.0}/tests/test_threaded_pipeline.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.47.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -59,6 +59,7 @@ Provides-Extra: vlm
|
|
59
59
|
Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
|
60
60
|
Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
|
61
61
|
Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
62
|
+
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux") and extra == "vlm"
|
62
63
|
Provides-Extra: rapidocr
|
63
64
|
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
|
64
65
|
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
@@ -20,7 +20,7 @@ from docling_core.types.doc import (
|
|
20
20
|
TableData,
|
21
21
|
TextItem,
|
22
22
|
)
|
23
|
-
from docling_core.types.doc.document import ContentLayer
|
23
|
+
from docling_core.types.doc.document import ContentLayer, Formatting, Script
|
24
24
|
from pydantic import AnyUrl, BaseModel, ValidationError
|
25
25
|
from typing_extensions import override
|
26
26
|
|
@@ -54,6 +54,21 @@ _BLOCK_TAGS: Final = {
|
|
54
54
|
"table",
|
55
55
|
}
|
56
56
|
|
57
|
+
_FORMAT_TAG_MAP: Final = {
|
58
|
+
"b": {"bold": True},
|
59
|
+
"strong": {"bold": True},
|
60
|
+
"i": {"italic": True},
|
61
|
+
"em": {"italic": True},
|
62
|
+
# "mark",
|
63
|
+
# "small",
|
64
|
+
"s": {"strikethrough": True},
|
65
|
+
"del": {"strikethrough": True},
|
66
|
+
"u": {"underline": True},
|
67
|
+
"ins": {"underline": True},
|
68
|
+
"sub": {"script": Script.SUB},
|
69
|
+
"sup": {"script": Script.SUPER},
|
70
|
+
}
|
71
|
+
|
57
72
|
|
58
73
|
class _Context(BaseModel):
|
59
74
|
list_ordered_flag_by_ref: dict[str, bool] = {}
|
@@ -63,23 +78,34 @@ class _Context(BaseModel):
|
|
63
78
|
class AnnotatedText(BaseModel):
|
64
79
|
text: str
|
65
80
|
hyperlink: Union[AnyUrl, Path, None] = None
|
81
|
+
formatting: Union[Formatting, None] = None
|
66
82
|
|
67
83
|
|
68
84
|
class AnnotatedTextList(list):
|
69
85
|
def to_single_text_element(self) -> AnnotatedText:
|
70
86
|
current_h = None
|
71
87
|
current_text = ""
|
88
|
+
current_f = None
|
72
89
|
for at in self:
|
73
90
|
t = at.text
|
74
91
|
h = at.hyperlink
|
92
|
+
f = at.formatting
|
75
93
|
current_text += t.strip() + " "
|
94
|
+
if f is not None and current_f is None:
|
95
|
+
current_f = f
|
96
|
+
elif f is not None and current_f is not None and f != current_f:
|
97
|
+
_log.warning(
|
98
|
+
f"Clashing formatting: '{f}' and '{current_f}'! Chose '{current_f}'"
|
99
|
+
)
|
76
100
|
if h is not None and current_h is None:
|
77
101
|
current_h = h
|
78
102
|
elif h is not None and current_h is not None and h != current_h:
|
79
103
|
_log.warning(
|
80
104
|
f"Clashing hyperlinks: '{h}' and '{current_h}'! Chose '{current_h}'"
|
81
105
|
)
|
82
|
-
return AnnotatedText(
|
106
|
+
return AnnotatedText(
|
107
|
+
text=current_text.strip(), hyperlink=current_h, formatting=current_f
|
108
|
+
)
|
83
109
|
|
84
110
|
def simplify_text_elements(self) -> "AnnotatedTextList":
|
85
111
|
simplified = AnnotatedTextList()
|
@@ -87,21 +113,27 @@ class AnnotatedTextList(list):
|
|
87
113
|
return self
|
88
114
|
text = self[0].text
|
89
115
|
hyperlink = self[0].hyperlink
|
116
|
+
formatting = self[0].formatting
|
90
117
|
last_elm = text
|
91
118
|
for i in range(1, len(self)):
|
92
|
-
if hyperlink == self[i].hyperlink:
|
119
|
+
if hyperlink == self[i].hyperlink and formatting == self[i].formatting:
|
93
120
|
sep = " "
|
94
121
|
if not self[i].text.strip() or not last_elm.strip():
|
95
122
|
sep = ""
|
96
123
|
text += sep + self[i].text
|
97
124
|
last_elm = self[i].text
|
98
125
|
else:
|
99
|
-
simplified.append(
|
126
|
+
simplified.append(
|
127
|
+
AnnotatedText(text=text, hyperlink=hyperlink, formatting=formatting)
|
128
|
+
)
|
100
129
|
text = self[i].text
|
101
130
|
last_elm = text
|
102
131
|
hyperlink = self[i].hyperlink
|
132
|
+
formatting = self[i].formatting
|
103
133
|
if text:
|
104
|
-
simplified.append(
|
134
|
+
simplified.append(
|
135
|
+
AnnotatedText(text=text, hyperlink=hyperlink, formatting=formatting)
|
136
|
+
)
|
105
137
|
return simplified
|
106
138
|
|
107
139
|
def split_by_newline(self):
|
@@ -144,6 +176,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
144
176
|
self.parents[i] = None
|
145
177
|
self.hyperlink = None
|
146
178
|
self.original_url = original_url
|
179
|
+
self.format_tags: list[str] = []
|
147
180
|
|
148
181
|
try:
|
149
182
|
raw = (
|
@@ -254,6 +287,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
254
287
|
label=DocItemLabel.TEXT,
|
255
288
|
text=seg_clean,
|
256
289
|
content_layer=self.content_layer,
|
290
|
+
formatting=annotated_text.formatting,
|
257
291
|
hyperlink=annotated_text.hyperlink,
|
258
292
|
)
|
259
293
|
|
@@ -263,6 +297,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
263
297
|
if name == "img":
|
264
298
|
flush_buffer()
|
265
299
|
self._emit_image(node, doc)
|
300
|
+
elif name in _FORMAT_TAG_MAP:
|
301
|
+
with self.use_format([name]):
|
302
|
+
self._walk(node, doc)
|
266
303
|
elif name == "a":
|
267
304
|
with self.use_hyperlink(node):
|
268
305
|
self._walk(node, doc)
|
@@ -292,6 +329,27 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
292
329
|
|
293
330
|
flush_buffer()
|
294
331
|
|
332
|
+
@staticmethod
|
333
|
+
def _collect_parent_format_tags(item: PageElement) -> list[str]:
|
334
|
+
tags = []
|
335
|
+
for format_tag in _FORMAT_TAG_MAP:
|
336
|
+
this_parent = item.parent
|
337
|
+
while this_parent is not None:
|
338
|
+
if this_parent.name == format_tag:
|
339
|
+
tags.append(format_tag)
|
340
|
+
break
|
341
|
+
this_parent = this_parent.parent
|
342
|
+
return tags
|
343
|
+
|
344
|
+
@property
|
345
|
+
def _formatting(self):
|
346
|
+
kwargs = {}
|
347
|
+
for t in self.format_tags:
|
348
|
+
kwargs.update(_FORMAT_TAG_MAP[t])
|
349
|
+
if not kwargs:
|
350
|
+
return None
|
351
|
+
return Formatting(**kwargs)
|
352
|
+
|
295
353
|
def _extract_text_and_hyperlink_recursively(
|
296
354
|
self,
|
297
355
|
item: PageElement,
|
@@ -302,15 +360,18 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
302
360
|
result: AnnotatedTextList = AnnotatedTextList()
|
303
361
|
|
304
362
|
# If find_parent_annotation, make sure that we keep track of
|
305
|
-
# any a-tag that has been present in the
|
363
|
+
# any a- or formatting-tag that has been present in the
|
364
|
+
# DOM-parents already.
|
306
365
|
if find_parent_annotation:
|
366
|
+
format_tags = self._collect_parent_format_tags(item)
|
307
367
|
this_parent = item.parent
|
308
368
|
while this_parent is not None:
|
309
369
|
if this_parent.name == "a" and this_parent.get("href"):
|
310
|
-
with self.
|
311
|
-
|
312
|
-
|
313
|
-
|
370
|
+
with self.use_format(format_tags):
|
371
|
+
with self.use_hyperlink(this_parent):
|
372
|
+
return self._extract_text_and_hyperlink_recursively(
|
373
|
+
item, ignore_list
|
374
|
+
)
|
314
375
|
this_parent = this_parent.parent
|
315
376
|
|
316
377
|
if isinstance(item, PreformattedString):
|
@@ -320,18 +381,37 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
320
381
|
text = item.strip()
|
321
382
|
if text:
|
322
383
|
return AnnotatedTextList(
|
323
|
-
[
|
384
|
+
[
|
385
|
+
AnnotatedText(
|
386
|
+
text=text,
|
387
|
+
hyperlink=self.hyperlink,
|
388
|
+
formatting=self._formatting,
|
389
|
+
)
|
390
|
+
]
|
324
391
|
)
|
325
392
|
if keep_newlines and item.strip("\n\r") == "":
|
326
393
|
return AnnotatedTextList(
|
327
|
-
[
|
394
|
+
[
|
395
|
+
AnnotatedText(
|
396
|
+
text="\n",
|
397
|
+
hyperlink=self.hyperlink,
|
398
|
+
formatting=self._formatting,
|
399
|
+
)
|
400
|
+
]
|
328
401
|
)
|
329
402
|
return AnnotatedTextList()
|
330
403
|
|
331
404
|
tag = cast(Tag, item)
|
332
405
|
if not ignore_list or (tag.name not in ["ul", "ol"]):
|
333
406
|
for child in tag:
|
334
|
-
if isinstance(child, Tag) and child.name
|
407
|
+
if isinstance(child, Tag) and child.name in _FORMAT_TAG_MAP:
|
408
|
+
with self.use_format([child.name]):
|
409
|
+
result.extend(
|
410
|
+
self._extract_text_and_hyperlink_recursively(
|
411
|
+
child, ignore_list, keep_newlines=keep_newlines
|
412
|
+
)
|
413
|
+
)
|
414
|
+
elif isinstance(child, Tag) and child.name == "a":
|
335
415
|
with self.use_hyperlink(child):
|
336
416
|
result.extend(
|
337
417
|
self._extract_text_and_hyperlink_recursively(
|
@@ -369,6 +449,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
369
449
|
if this_href:
|
370
450
|
self.hyperlink = old_hyperlink
|
371
451
|
|
452
|
+
@contextmanager
|
453
|
+
def use_format(self, tags: list[str]):
|
454
|
+
if not tags:
|
455
|
+
yield None
|
456
|
+
else:
|
457
|
+
self.format_tags.extend(tags)
|
458
|
+
try:
|
459
|
+
yield None
|
460
|
+
finally:
|
461
|
+
self.format_tags = self.format_tags[: -len(tags)]
|
462
|
+
|
372
463
|
@contextmanager
|
373
464
|
def use_inline_group(
|
374
465
|
self, annotated_text_list: AnnotatedTextList, doc: DoclingDocument
|
@@ -420,6 +511,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
420
511
|
self.parents[self.level + 1] = doc.add_title(
|
421
512
|
text_clean,
|
422
513
|
content_layer=self.content_layer,
|
514
|
+
formatting=annotated_text.formatting,
|
423
515
|
hyperlink=annotated_text.hyperlink,
|
424
516
|
)
|
425
517
|
# the other levels need to be lowered by 1 if a title was set
|
@@ -449,6 +541,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
449
541
|
orig=annotated_text.text,
|
450
542
|
level=self.level,
|
451
543
|
content_layer=self.content_layer,
|
544
|
+
formatting=annotated_text.formatting,
|
452
545
|
hyperlink=annotated_text.hyperlink,
|
453
546
|
)
|
454
547
|
self.level += 1
|
@@ -529,6 +622,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
529
622
|
label=DocItemLabel.TEXT,
|
530
623
|
text=li_clean,
|
531
624
|
content_layer=self.content_layer,
|
625
|
+
formatting=annotated_text.formatting,
|
532
626
|
hyperlink=annotated_text.hyperlink,
|
533
627
|
)
|
534
628
|
|
@@ -551,6 +645,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
551
645
|
orig=li_text,
|
552
646
|
parent=list_group,
|
553
647
|
content_layer=self.content_layer,
|
648
|
+
formatting=annotated_text.formatting,
|
554
649
|
hyperlink=annotated_text.hyperlink,
|
555
650
|
)
|
556
651
|
|
@@ -603,6 +698,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
603
698
|
label=DocItemLabel.TEXT,
|
604
699
|
text=seg_clean,
|
605
700
|
content_layer=self.content_layer,
|
701
|
+
formatting=annotated_text.formatting,
|
606
702
|
hyperlink=annotated_text.hyperlink,
|
607
703
|
)
|
608
704
|
|
@@ -637,6 +733,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
637
733
|
parent=self.parents[self.level],
|
638
734
|
text=text_clean,
|
639
735
|
content_layer=self.content_layer,
|
736
|
+
formatting=annotated_text.formatting,
|
640
737
|
hyperlink=annotated_text.hyperlink,
|
641
738
|
)
|
642
739
|
|
@@ -696,6 +793,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
696
793
|
text=text_clean,
|
697
794
|
orig=caption_anno_text.text,
|
698
795
|
content_layer=self.content_layer,
|
796
|
+
formatting=caption_anno_text.formatting,
|
699
797
|
hyperlink=caption_anno_text.hyperlink,
|
700
798
|
)
|
701
799
|
|
@@ -67,6 +67,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
67
67
|
|
68
68
|
self.level = 0
|
69
69
|
self.listIter = 0
|
70
|
+
# Track list counters per numId and ilvl
|
71
|
+
self.list_counters: dict[tuple[int, int], int] = {}
|
70
72
|
|
71
73
|
self.history: dict[str, Any] = {
|
72
74
|
"names": [None],
|
@@ -315,6 +317,108 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
315
317
|
|
316
318
|
return None, None # If the paragraph is not part of a list
|
317
319
|
|
320
|
+
def _get_list_counter(self, numid: int, ilvl: int) -> int:
|
321
|
+
"""Get and increment the counter for a specific numId and ilvl combination."""
|
322
|
+
key = (numid, ilvl)
|
323
|
+
if key not in self.list_counters:
|
324
|
+
self.list_counters[key] = 0
|
325
|
+
self.list_counters[key] += 1
|
326
|
+
return self.list_counters[key]
|
327
|
+
|
328
|
+
def _reset_list_counters_for_new_sequence(self, numid: int):
|
329
|
+
"""Reset counters when starting a new numbering sequence."""
|
330
|
+
# Reset all counters for this numid
|
331
|
+
keys_to_reset = [key for key in self.list_counters.keys() if key[0] == numid]
|
332
|
+
for key in keys_to_reset:
|
333
|
+
self.list_counters[key] = 0
|
334
|
+
|
335
|
+
def _is_numbered_list(self, docx_obj: DocxDocument, numId: int, ilvl: int) -> bool:
|
336
|
+
"""Check if a list is numbered based on its numFmt value."""
|
337
|
+
try:
|
338
|
+
# Access the numbering part of the document
|
339
|
+
if not hasattr(docx_obj, "part") or not hasattr(docx_obj.part, "package"):
|
340
|
+
return False
|
341
|
+
|
342
|
+
numbering_part = None
|
343
|
+
# Find the numbering part
|
344
|
+
for part in docx_obj.part.package.parts:
|
345
|
+
if "numbering" in part.partname:
|
346
|
+
numbering_part = part
|
347
|
+
break
|
348
|
+
|
349
|
+
if numbering_part is None:
|
350
|
+
return False
|
351
|
+
|
352
|
+
# Parse the numbering XML
|
353
|
+
numbering_root = numbering_part.element
|
354
|
+
namespaces = {
|
355
|
+
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
356
|
+
}
|
357
|
+
|
358
|
+
# Find the numbering definition with the given numId
|
359
|
+
num_xpath = f".//w:num[@w:numId='{numId}']"
|
360
|
+
num_element = numbering_root.find(num_xpath, namespaces=namespaces)
|
361
|
+
|
362
|
+
if num_element is None:
|
363
|
+
return False
|
364
|
+
|
365
|
+
# Get the abstractNumId from the num element
|
366
|
+
abstract_num_id_elem = num_element.find(
|
367
|
+
".//w:abstractNumId", namespaces=namespaces
|
368
|
+
)
|
369
|
+
if abstract_num_id_elem is None:
|
370
|
+
return False
|
371
|
+
|
372
|
+
abstract_num_id = abstract_num_id_elem.get(
|
373
|
+
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
|
374
|
+
)
|
375
|
+
if abstract_num_id is None:
|
376
|
+
return False
|
377
|
+
|
378
|
+
# Find the abstract numbering definition
|
379
|
+
abstract_num_xpath = (
|
380
|
+
f".//w:abstractNum[@w:abstractNumId='{abstract_num_id}']"
|
381
|
+
)
|
382
|
+
abstract_num_element = numbering_root.find(
|
383
|
+
abstract_num_xpath, namespaces=namespaces
|
384
|
+
)
|
385
|
+
|
386
|
+
if abstract_num_element is None:
|
387
|
+
return False
|
388
|
+
|
389
|
+
# Find the level definition for the given ilvl
|
390
|
+
lvl_xpath = f".//w:lvl[@w:ilvl='{ilvl}']"
|
391
|
+
lvl_element = abstract_num_element.find(lvl_xpath, namespaces=namespaces)
|
392
|
+
|
393
|
+
if lvl_element is None:
|
394
|
+
return False
|
395
|
+
|
396
|
+
# Get the numFmt element
|
397
|
+
num_fmt_element = lvl_element.find(".//w:numFmt", namespaces=namespaces)
|
398
|
+
if num_fmt_element is None:
|
399
|
+
return False
|
400
|
+
|
401
|
+
num_fmt = num_fmt_element.get(
|
402
|
+
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
|
403
|
+
)
|
404
|
+
|
405
|
+
# Numbered formats include: decimal, lowerRoman, upperRoman, lowerLetter, upperLetter
|
406
|
+
# Bullet formats include: bullet
|
407
|
+
numbered_formats = {
|
408
|
+
"decimal",
|
409
|
+
"lowerRoman",
|
410
|
+
"upperRoman",
|
411
|
+
"lowerLetter",
|
412
|
+
"upperLetter",
|
413
|
+
"decimalZero",
|
414
|
+
}
|
415
|
+
|
416
|
+
return num_fmt in numbered_formats
|
417
|
+
|
418
|
+
except Exception as e:
|
419
|
+
_log.debug(f"Error determining if list is numbered: {e}")
|
420
|
+
return False
|
421
|
+
|
318
422
|
def _get_heading_and_level(self, style_label: str) -> tuple[str, Optional[int]]:
|
319
423
|
parts = self._split_text_and_number(style_label)
|
320
424
|
|
@@ -713,8 +817,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
713
817
|
# Common styles for bullet and numbered lists.
|
714
818
|
# "List Bullet", "List Number", "List Paragraph"
|
715
819
|
# Identify whether list is a numbered list or not
|
716
|
-
# is_numbered = "List Bullet" not in paragraph.style.name
|
717
|
-
is_numbered = False
|
718
820
|
p_style_id, p_level = self._get_label_and_level(paragraph)
|
719
821
|
numid, ilevel = self._get_numId_and_ilvl(paragraph)
|
720
822
|
|
@@ -727,6 +829,9 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
727
829
|
and ilevel is not None
|
728
830
|
and p_style_id not in ["Title", "Heading"]
|
729
831
|
):
|
832
|
+
# Check if this is actually a numbered list by examining the numFmt
|
833
|
+
is_numbered = self._is_numbered_list(docx_obj, numid, ilevel)
|
834
|
+
|
730
835
|
self._add_list_item(
|
731
836
|
doc=doc,
|
732
837
|
numid=numid,
|
@@ -983,15 +1088,19 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
983
1088
|
if self._prev_numid() is None: # Open new list
|
984
1089
|
self.level_at_new_list = level
|
985
1090
|
|
1091
|
+
# Reset counters for the new numbering sequence
|
1092
|
+
self._reset_list_counters_for_new_sequence(numid)
|
1093
|
+
|
986
1094
|
self.parents[level] = doc.add_list_group(
|
987
1095
|
name="list", parent=self.parents[level - 1]
|
988
1096
|
)
|
989
1097
|
|
990
1098
|
# Set marker and enumerated arguments if this is an enumeration element.
|
991
|
-
self.listIter += 1
|
992
1099
|
if is_numbered:
|
993
|
-
|
994
|
-
|
1100
|
+
counter = self._get_list_counter(numid, ilevel)
|
1101
|
+
enum_marker = str(counter) + "."
|
1102
|
+
else:
|
1103
|
+
enum_marker = ""
|
995
1104
|
self._add_formatted_list_item(
|
996
1105
|
doc, elements, enum_marker, is_numbered, level
|
997
1106
|
)
|
@@ -1005,16 +1114,16 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
1005
1114
|
self.level_at_new_list + prev_indent + 1,
|
1006
1115
|
self.level_at_new_list + ilevel + 1,
|
1007
1116
|
):
|
1008
|
-
self.listIter = 0
|
1009
1117
|
self.parents[i] = doc.add_list_group(
|
1010
1118
|
name="list", parent=self.parents[i - 1]
|
1011
1119
|
)
|
1012
1120
|
|
1013
1121
|
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
1014
|
-
self.listIter += 1
|
1015
1122
|
if is_numbered:
|
1016
|
-
|
1017
|
-
|
1123
|
+
counter = self._get_list_counter(numid, ilevel)
|
1124
|
+
enum_marker = str(counter) + "."
|
1125
|
+
else:
|
1126
|
+
enum_marker = ""
|
1018
1127
|
self._add_formatted_list_item(
|
1019
1128
|
doc,
|
1020
1129
|
elements,
|
@@ -1033,10 +1142,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
1033
1142
|
self.parents[k] = None
|
1034
1143
|
|
1035
1144
|
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
1036
|
-
self.listIter += 1
|
1037
1145
|
if is_numbered:
|
1038
|
-
|
1039
|
-
|
1146
|
+
counter = self._get_list_counter(numid, ilevel)
|
1147
|
+
enum_marker = str(counter) + "."
|
1148
|
+
else:
|
1149
|
+
enum_marker = ""
|
1040
1150
|
self._add_formatted_list_item(
|
1041
1151
|
doc,
|
1042
1152
|
elements,
|
@@ -1044,14 +1154,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|
1044
1154
|
is_numbered,
|
1045
1155
|
self.level_at_new_list + ilevel,
|
1046
1156
|
)
|
1047
|
-
self.listIter = 0
|
1048
1157
|
|
1049
1158
|
elif self._prev_numid() == numid or prev_indent == ilevel:
|
1050
1159
|
# TODO: Set marker and enumerated arguments if this is an enumeration element.
|
1051
|
-
self.listIter += 1
|
1052
1160
|
if is_numbered:
|
1053
|
-
|
1054
|
-
|
1161
|
+
counter = self._get_list_counter(numid, ilevel)
|
1162
|
+
enum_marker = str(counter) + "."
|
1163
|
+
else:
|
1164
|
+
enum_marker = ""
|
1055
1165
|
self._add_formatted_list_item(
|
1056
1166
|
doc, elements, enum_marker, is_numbered, level - 1
|
1057
1167
|
)
|
@@ -60,10 +60,12 @@ from docling.datamodel.pipeline_options import (
|
|
60
60
|
)
|
61
61
|
from docling.datamodel.settings import settings
|
62
62
|
from docling.datamodel.vlm_model_specs import (
|
63
|
+
GOT2_TRANSFORMERS,
|
63
64
|
GRANITE_VISION_OLLAMA,
|
64
65
|
GRANITE_VISION_TRANSFORMERS,
|
65
66
|
SMOLDOCLING_MLX,
|
66
67
|
SMOLDOCLING_TRANSFORMERS,
|
68
|
+
SMOLDOCLING_VLLM,
|
67
69
|
VlmModelType,
|
68
70
|
)
|
69
71
|
from docling.document_converter import (
|
@@ -477,6 +479,13 @@ def convert( # noqa: C901
|
|
477
479
|
"--logo", callback=logo_callback, is_eager=True, help="Docling logo"
|
478
480
|
),
|
479
481
|
] = None,
|
482
|
+
page_batch_size: Annotated[
|
483
|
+
int,
|
484
|
+
typer.Option(
|
485
|
+
"--page-batch-size",
|
486
|
+
help=f"Number of pages processed in one batch. Default: {settings.perf.page_batch_size}",
|
487
|
+
),
|
488
|
+
] = settings.perf.page_batch_size,
|
480
489
|
):
|
481
490
|
log_format = "%(asctime)s\t%(levelname)s\t%(name)s: %(message)s"
|
482
491
|
|
@@ -491,6 +500,7 @@ def convert( # noqa: C901
|
|
491
500
|
settings.debug.visualize_layout = debug_visualize_layout
|
492
501
|
settings.debug.visualize_tables = debug_visualize_tables
|
493
502
|
settings.debug.visualize_ocr = debug_visualize_ocr
|
503
|
+
settings.perf.page_batch_size = page_batch_size
|
494
504
|
|
495
505
|
if from_formats is None:
|
496
506
|
from_formats = list(InputFormat)
|
@@ -631,6 +641,8 @@ def convert( # noqa: C901
|
|
631
641
|
pipeline_options.vlm_options = GRANITE_VISION_TRANSFORMERS
|
632
642
|
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
|
633
643
|
pipeline_options.vlm_options = GRANITE_VISION_OLLAMA
|
644
|
+
elif vlm_model == VlmModelType.GOT_OCR_2:
|
645
|
+
pipeline_options.vlm_options = GOT2_TRANSFORMERS
|
634
646
|
elif vlm_model == VlmModelType.SMOLDOCLING:
|
635
647
|
pipeline_options.vlm_options = SMOLDOCLING_TRANSFORMERS
|
636
648
|
if sys.platform == "darwin":
|
@@ -643,6 +655,8 @@ def convert( # noqa: C901
|
|
643
655
|
"To run SmolDocling faster, please install mlx-vlm:\n"
|
644
656
|
"pip install mlx-vlm"
|
645
657
|
)
|
658
|
+
elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
|
659
|
+
pipeline_options.vlm_options = SMOLDOCLING_VLLM
|
646
660
|
|
647
661
|
pdf_format_option = PdfFormatOption(
|
648
662
|
pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
|
@@ -9,6 +9,7 @@ from rich.console import Console
|
|
9
9
|
from rich.logging import RichHandler
|
10
10
|
|
11
11
|
from docling.datamodel.settings import settings
|
12
|
+
from docling.models.utils.hf_model_download import download_hf_model
|
12
13
|
from docling.utils.model_downloader import download_models
|
13
14
|
|
14
15
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
@@ -128,6 +129,61 @@ def download(
|
|
128
129
|
)
|
129
130
|
|
130
131
|
|
132
|
+
@app.command("download-hf-repo")
|
133
|
+
def download_hf_repo(
|
134
|
+
models: Annotated[
|
135
|
+
list[str],
|
136
|
+
typer.Argument(
|
137
|
+
help="Specific models to download from HuggingFace identified by their repo id. For example: ds4sd/docling-models .",
|
138
|
+
),
|
139
|
+
],
|
140
|
+
output_dir: Annotated[
|
141
|
+
Path,
|
142
|
+
typer.Option(
|
143
|
+
...,
|
144
|
+
"-o",
|
145
|
+
"--output-dir",
|
146
|
+
help="The directory where to download the models.",
|
147
|
+
),
|
148
|
+
] = (settings.cache_dir / "models"),
|
149
|
+
force: Annotated[
|
150
|
+
bool, typer.Option(..., help="If true, the download will be forced.")
|
151
|
+
] = False,
|
152
|
+
quiet: Annotated[
|
153
|
+
bool,
|
154
|
+
typer.Option(
|
155
|
+
...,
|
156
|
+
"-q",
|
157
|
+
"--quiet",
|
158
|
+
help="No extra output is generated, the CLI prints only the directory with the cached models.",
|
159
|
+
),
|
160
|
+
] = False,
|
161
|
+
):
|
162
|
+
if not quiet:
|
163
|
+
logging.basicConfig(
|
164
|
+
level=logging.INFO,
|
165
|
+
format="[blue]%(message)s[/blue]",
|
166
|
+
datefmt="[%X]",
|
167
|
+
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
|
168
|
+
)
|
169
|
+
|
170
|
+
for item in models:
|
171
|
+
typer.secho(f"\nDownloading {item} model from HuggingFace...")
|
172
|
+
download_hf_model(
|
173
|
+
repo_id=item,
|
174
|
+
# would be better to reuse "repo_cache_folder" property: https://github.com/docling-project/docling/blob/main/docling/datamodel/pipeline_options_vlm_model.py#L76
|
175
|
+
# but creating options objects seams like an overkill
|
176
|
+
local_dir=output_dir / item.replace("/", "--"),
|
177
|
+
force=force,
|
178
|
+
progress=(not quiet),
|
179
|
+
)
|
180
|
+
|
181
|
+
if quiet:
|
182
|
+
typer.echo(output_dir)
|
183
|
+
else:
|
184
|
+
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
|
185
|
+
|
186
|
+
|
131
187
|
click_app = typer.main.get_command(app)
|
132
188
|
|
133
189
|
if __name__ == "__main__":
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import math
|
2
2
|
from collections import defaultdict
|
3
3
|
from enum import Enum
|
4
|
-
from typing import TYPE_CHECKING,
|
4
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
from docling_core.types.doc import (
|
@@ -282,6 +282,9 @@ class LayoutOptions(BaseModel):
|
|
282
282
|
keep_empty_clusters: bool = (
|
283
283
|
False # Whether to keep clusters that contain no text cells
|
284
284
|
)
|
285
|
+
skip_cell_assignment: bool = (
|
286
|
+
False # Skip cell-to-cluster assignment for VLM-only processing
|
287
|
+
)
|
285
288
|
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
286
289
|
|
287
290
|
|
@@ -26,11 +26,14 @@ class ResponseFormat(str, Enum):
|
|
26
26
|
DOCTAGS = "doctags"
|
27
27
|
MARKDOWN = "markdown"
|
28
28
|
HTML = "html"
|
29
|
+
OTSL = "otsl"
|
30
|
+
PLAINTEXT = "plaintext"
|
29
31
|
|
30
32
|
|
31
33
|
class InferenceFramework(str, Enum):
|
32
34
|
MLX = "mlx"
|
33
35
|
TRANSFORMERS = "transformers"
|
36
|
+
VLLM = "vllm"
|
34
37
|
|
35
38
|
|
36
39
|
class TransformersModelType(str, Enum):
|
@@ -43,6 +46,7 @@ class TransformersModelType(str, Enum):
|
|
43
46
|
class TransformersPromptStyle(str, Enum):
|
44
47
|
CHAT = "chat"
|
45
48
|
RAW = "raw"
|
49
|
+
NONE = "none"
|
46
50
|
|
47
51
|
|
48
52
|
class InlineVlmOptions(BaseVlmOptions):
|
@@ -68,6 +72,7 @@ class InlineVlmOptions(BaseVlmOptions):
|
|
68
72
|
|
69
73
|
stop_strings: List[str] = []
|
70
74
|
extra_generation_config: Dict[str, Any] = {}
|
75
|
+
extra_processor_kwargs: Dict[str, Any] = {}
|
71
76
|
|
72
77
|
use_kv_cache: bool = True
|
73
78
|
max_new_tokens: int = 4096
|