docling 2.34.0__py3-none-any.whl → 2.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/xml/jats_backend.py +0 -0
- docling/cli/main.py +48 -18
- docling/datamodel/accelerator_options.py +68 -0
- docling/datamodel/base_models.py +10 -8
- docling/datamodel/document.py +7 -2
- docling/datamodel/pipeline_options.py +29 -161
- docling/datamodel/pipeline_options_vlm_model.py +81 -0
- docling/datamodel/vlm_model_specs.py +144 -0
- docling/document_converter.py +5 -0
- docling/models/api_vlm_model.py +1 -1
- docling/models/base_ocr_model.py +2 -1
- docling/models/code_formula_model.py +6 -11
- docling/models/document_picture_classifier.py +6 -11
- docling/models/easyocr_model.py +1 -2
- docling/models/layout_model.py +22 -17
- docling/models/ocr_mac_model.py +1 -1
- docling/models/page_preprocessing_model.py +11 -6
- docling/models/picture_description_api_model.py +1 -1
- docling/models/picture_description_base_model.py +1 -1
- docling/models/picture_description_vlm_model.py +7 -22
- docling/models/rapid_ocr_model.py +1 -2
- docling/models/table_structure_model.py +6 -12
- docling/models/tesseract_ocr_cli_model.py +1 -1
- docling/models/tesseract_ocr_model.py +1 -1
- docling/models/utils/__init__.py +0 -0
- docling/models/utils/hf_model_download.py +40 -0
- docling/models/vlm_models_inline/__init__.py +0 -0
- docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
- docling/models/{hf_mlx_model.py → vlm_models_inline/mlx_model.py} +56 -44
- docling/pipeline/standard_pdf_pipeline.py +69 -57
- docling/pipeline/vlm_pipeline.py +228 -61
- docling/utils/accelerator_utils.py +17 -2
- docling/utils/model_downloader.py +13 -12
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/METADATA +54 -55
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/RECORD +48 -41
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/WHEEL +2 -1
- docling-2.36.0.dist-info/entry_points.txt +6 -0
- docling-2.36.0.dist-info/top_level.txt +1 -0
- docling/models/hf_vlm_model.py +0 -182
- docling-2.34.0.dist-info/entry_points.txt +0 -7
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
import logging
|
2
|
+
from enum import Enum
|
3
|
+
|
4
|
+
from pydantic import (
|
5
|
+
AnyUrl,
|
6
|
+
)
|
7
|
+
|
8
|
+
from docling.datamodel.accelerator_options import AcceleratorDevice
|
9
|
+
from docling.datamodel.pipeline_options_vlm_model import (
|
10
|
+
ApiVlmOptions,
|
11
|
+
InferenceFramework,
|
12
|
+
InlineVlmOptions,
|
13
|
+
ResponseFormat,
|
14
|
+
TransformersModelType,
|
15
|
+
)
|
16
|
+
|
17
|
+
_log = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
# SmolDocling
|
21
|
+
SMOLDOCLING_MLX = InlineVlmOptions(
|
22
|
+
repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
|
23
|
+
prompt="Convert this page to docling.",
|
24
|
+
response_format=ResponseFormat.DOCTAGS,
|
25
|
+
inference_framework=InferenceFramework.MLX,
|
26
|
+
supported_devices=[AcceleratorDevice.MPS],
|
27
|
+
scale=2.0,
|
28
|
+
temperature=0.0,
|
29
|
+
)
|
30
|
+
|
31
|
+
SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
|
32
|
+
repo_id="ds4sd/SmolDocling-256M-preview",
|
33
|
+
prompt="Convert this page to docling.",
|
34
|
+
response_format=ResponseFormat.DOCTAGS,
|
35
|
+
inference_framework=InferenceFramework.TRANSFORMERS,
|
36
|
+
transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
|
37
|
+
supported_devices=[
|
38
|
+
AcceleratorDevice.CPU,
|
39
|
+
AcceleratorDevice.CUDA,
|
40
|
+
AcceleratorDevice.MPS,
|
41
|
+
],
|
42
|
+
scale=2.0,
|
43
|
+
temperature=0.0,
|
44
|
+
)
|
45
|
+
|
46
|
+
# GraniteVision
|
47
|
+
GRANITE_VISION_TRANSFORMERS = InlineVlmOptions(
|
48
|
+
repo_id="ibm-granite/granite-vision-3.2-2b",
|
49
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
50
|
+
response_format=ResponseFormat.MARKDOWN,
|
51
|
+
inference_framework=InferenceFramework.TRANSFORMERS,
|
52
|
+
transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
|
53
|
+
supported_devices=[
|
54
|
+
AcceleratorDevice.CPU,
|
55
|
+
AcceleratorDevice.CUDA,
|
56
|
+
AcceleratorDevice.MPS,
|
57
|
+
],
|
58
|
+
scale=2.0,
|
59
|
+
temperature=0.0,
|
60
|
+
)
|
61
|
+
|
62
|
+
GRANITE_VISION_OLLAMA = ApiVlmOptions(
|
63
|
+
url=AnyUrl("http://localhost:11434/v1/chat/completions"),
|
64
|
+
params={"model": "granite3.2-vision:2b"},
|
65
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
66
|
+
scale=1.0,
|
67
|
+
timeout=120,
|
68
|
+
response_format=ResponseFormat.MARKDOWN,
|
69
|
+
temperature=0.0,
|
70
|
+
)
|
71
|
+
|
72
|
+
# Pixtral
|
73
|
+
PIXTRAL_12B_TRANSFORMERS = InlineVlmOptions(
|
74
|
+
repo_id="mistral-community/pixtral-12b",
|
75
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
76
|
+
response_format=ResponseFormat.MARKDOWN,
|
77
|
+
inference_framework=InferenceFramework.TRANSFORMERS,
|
78
|
+
transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
|
79
|
+
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
|
80
|
+
scale=2.0,
|
81
|
+
temperature=0.0,
|
82
|
+
)
|
83
|
+
|
84
|
+
PIXTRAL_12B_MLX = InlineVlmOptions(
|
85
|
+
repo_id="mlx-community/pixtral-12b-bf16",
|
86
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
87
|
+
response_format=ResponseFormat.MARKDOWN,
|
88
|
+
inference_framework=InferenceFramework.MLX,
|
89
|
+
supported_devices=[AcceleratorDevice.MPS],
|
90
|
+
scale=2.0,
|
91
|
+
temperature=0.0,
|
92
|
+
)
|
93
|
+
|
94
|
+
# Phi4
|
95
|
+
PHI4_TRANSFORMERS = InlineVlmOptions(
|
96
|
+
repo_id="microsoft/Phi-4-multimodal-instruct",
|
97
|
+
prompt="Convert this page to MarkDown. Do not miss any text and only output the bare markdown",
|
98
|
+
trust_remote_code=True,
|
99
|
+
response_format=ResponseFormat.MARKDOWN,
|
100
|
+
inference_framework=InferenceFramework.TRANSFORMERS,
|
101
|
+
transformers_model_type=TransformersModelType.AUTOMODEL_CAUSALLM,
|
102
|
+
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
|
103
|
+
scale=2.0,
|
104
|
+
temperature=0.0,
|
105
|
+
extra_generation_config=dict(num_logits_to_keep=0),
|
106
|
+
)
|
107
|
+
|
108
|
+
# Qwen
|
109
|
+
QWEN25_VL_3B_MLX = InlineVlmOptions(
|
110
|
+
repo_id="mlx-community/Qwen2.5-VL-3B-Instruct-bf16",
|
111
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
112
|
+
response_format=ResponseFormat.MARKDOWN,
|
113
|
+
inference_framework=InferenceFramework.MLX,
|
114
|
+
supported_devices=[AcceleratorDevice.MPS],
|
115
|
+
scale=2.0,
|
116
|
+
temperature=0.0,
|
117
|
+
)
|
118
|
+
|
119
|
+
# Gemma-3
|
120
|
+
GEMMA3_12B_MLX = InlineVlmOptions(
|
121
|
+
repo_id="mlx-community/gemma-3-12b-it-bf16",
|
122
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
123
|
+
response_format=ResponseFormat.MARKDOWN,
|
124
|
+
inference_framework=InferenceFramework.MLX,
|
125
|
+
supported_devices=[AcceleratorDevice.MPS],
|
126
|
+
scale=2.0,
|
127
|
+
temperature=0.0,
|
128
|
+
)
|
129
|
+
|
130
|
+
GEMMA3_27B_MLX = InlineVlmOptions(
|
131
|
+
repo_id="mlx-community/gemma-3-27b-it-bf16",
|
132
|
+
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
133
|
+
response_format=ResponseFormat.MARKDOWN,
|
134
|
+
inference_framework=InferenceFramework.MLX,
|
135
|
+
supported_devices=[AcceleratorDevice.MPS],
|
136
|
+
scale=2.0,
|
137
|
+
temperature=0.0,
|
138
|
+
)
|
139
|
+
|
140
|
+
|
141
|
+
class VlmModelType(str, Enum):
|
142
|
+
SMOLDOCLING = "smoldocling"
|
143
|
+
GRANITE_VISION = "granite_vision"
|
144
|
+
GRANITE_VISION_OLLAMA = "granite_vision_ollama"
|
docling/document_converter.py
CHANGED
@@ -186,6 +186,11 @@ class DocumentConverter:
|
|
186
186
|
Tuple[Type[BasePipeline], str], BasePipeline
|
187
187
|
] = {}
|
188
188
|
|
189
|
+
def _get_initialized_pipelines(
|
190
|
+
self,
|
191
|
+
) -> dict[tuple[Type[BasePipeline], str], BasePipeline]:
|
192
|
+
return self.initialized_pipelines
|
193
|
+
|
189
194
|
def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str:
|
190
195
|
"""Generate a hash of pipeline options to use as part of the cache key."""
|
191
196
|
options_str = str(pipeline_options.model_dump())
|
docling/models/api_vlm_model.py
CHANGED
@@ -3,7 +3,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
3
3
|
|
4
4
|
from docling.datamodel.base_models import Page, VlmPrediction
|
5
5
|
from docling.datamodel.document import ConversionResult
|
6
|
-
from docling.datamodel.
|
6
|
+
from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions
|
7
7
|
from docling.exceptions import OperationNotAllowed
|
8
8
|
from docling.models.base_model import BasePageModel
|
9
9
|
from docling.utils.api_image_request import api_image_request
|
docling/models/base_ocr_model.py
CHANGED
@@ -11,9 +11,10 @@ from PIL import Image, ImageDraw
|
|
11
11
|
from rtree import index
|
12
12
|
from scipy.ndimage import binary_dilation, find_objects, label
|
13
13
|
|
14
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
14
15
|
from docling.datamodel.base_models import Page
|
15
16
|
from docling.datamodel.document import ConversionResult
|
16
|
-
from docling.datamodel.pipeline_options import
|
17
|
+
from docling.datamodel.pipeline_options import OcrOptions
|
17
18
|
from docling.datamodel.settings import settings
|
18
19
|
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
19
20
|
|
@@ -16,9 +16,10 @@ from docling_core.types.doc.labels import CodeLanguageLabel
|
|
16
16
|
from PIL import Image, ImageOps
|
17
17
|
from pydantic import BaseModel
|
18
18
|
|
19
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
19
20
|
from docling.datamodel.base_models import ItemAndImageEnrichmentElement
|
20
|
-
from docling.datamodel.pipeline_options import AcceleratorOptions
|
21
21
|
from docling.models.base_model import BaseItemAndImageEnrichmentModel
|
22
|
+
from docling.models.utils.hf_model_download import download_hf_model
|
22
23
|
from docling.utils.accelerator_utils import decide_device
|
23
24
|
|
24
25
|
|
@@ -117,20 +118,14 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|
117
118
|
force: bool = False,
|
118
119
|
progress: bool = False,
|
119
120
|
) -> Path:
|
120
|
-
|
121
|
-
from huggingface_hub.utils import disable_progress_bars
|
122
|
-
|
123
|
-
if not progress:
|
124
|
-
disable_progress_bars()
|
125
|
-
download_path = snapshot_download(
|
121
|
+
return download_hf_model(
|
126
122
|
repo_id="ds4sd/CodeFormula",
|
127
|
-
force_download=force,
|
128
|
-
local_dir=local_dir,
|
129
123
|
revision="v1.0.2",
|
124
|
+
local_dir=local_dir,
|
125
|
+
force=force,
|
126
|
+
progress=progress,
|
130
127
|
)
|
131
128
|
|
132
|
-
return Path(download_path)
|
133
|
-
|
134
129
|
def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
|
135
130
|
"""
|
136
131
|
Determines if a given element in a document can be processed by the model.
|
@@ -13,8 +13,9 @@ from docling_core.types.doc import (
|
|
13
13
|
from PIL import Image
|
14
14
|
from pydantic import BaseModel
|
15
15
|
|
16
|
-
from docling.datamodel.
|
16
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
17
17
|
from docling.models.base_model import BaseEnrichmentModel
|
18
|
+
from docling.models.utils.hf_model_download import download_hf_model
|
18
19
|
from docling.utils.accelerator_utils import decide_device
|
19
20
|
|
20
21
|
|
@@ -105,20 +106,14 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|
105
106
|
def download_models(
|
106
107
|
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
107
108
|
) -> Path:
|
108
|
-
|
109
|
-
from huggingface_hub.utils import disable_progress_bars
|
110
|
-
|
111
|
-
if not progress:
|
112
|
-
disable_progress_bars()
|
113
|
-
download_path = snapshot_download(
|
109
|
+
return download_hf_model(
|
114
110
|
repo_id="ds4sd/DocumentFigureClassifier",
|
115
|
-
force_download=force,
|
116
|
-
local_dir=local_dir,
|
117
111
|
revision="v1.0.1",
|
112
|
+
local_dir=local_dir,
|
113
|
+
force=force,
|
114
|
+
progress=progress,
|
118
115
|
)
|
119
116
|
|
120
|
-
return Path(download_path)
|
121
|
-
|
122
117
|
def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
|
123
118
|
"""
|
124
119
|
Determines if the given element can be processed by the classifier.
|
docling/models/easyocr_model.py
CHANGED
@@ -9,11 +9,10 @@ import numpy
|
|
9
9
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
10
10
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
11
11
|
|
12
|
+
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
12
13
|
from docling.datamodel.base_models import Page
|
13
14
|
from docling.datamodel.document import ConversionResult
|
14
15
|
from docling.datamodel.pipeline_options import (
|
15
|
-
AcceleratorDevice,
|
16
|
-
AcceleratorOptions,
|
17
16
|
EasyOcrOptions,
|
18
17
|
OcrOptions,
|
19
18
|
)
|
docling/models/layout_model.py
CHANGED
@@ -10,11 +10,12 @@ from docling_core.types.doc import DocItemLabel
|
|
10
10
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
11
11
|
from PIL import Image
|
12
12
|
|
13
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
13
14
|
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
14
15
|
from docling.datamodel.document import ConversionResult
|
15
|
-
from docling.datamodel.pipeline_options import AcceleratorOptions
|
16
16
|
from docling.datamodel.settings import settings
|
17
17
|
from docling.models.base_model import BasePageModel
|
18
|
+
from docling.models.utils.hf_model_download import download_hf_model
|
18
19
|
from docling.utils.accelerator_utils import decide_device
|
19
20
|
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
20
21
|
from docling.utils.profiling import TimeRecorder
|
@@ -83,20 +84,14 @@ class LayoutModel(BasePageModel):
|
|
83
84
|
force: bool = False,
|
84
85
|
progress: bool = False,
|
85
86
|
) -> Path:
|
86
|
-
|
87
|
-
from huggingface_hub.utils import disable_progress_bars
|
88
|
-
|
89
|
-
if not progress:
|
90
|
-
disable_progress_bars()
|
91
|
-
download_path = snapshot_download(
|
87
|
+
return download_hf_model(
|
92
88
|
repo_id="ds4sd/docling-models",
|
93
|
-
|
89
|
+
revision="v2.2.0",
|
94
90
|
local_dir=local_dir,
|
95
|
-
|
91
|
+
force=force,
|
92
|
+
progress=progress,
|
96
93
|
)
|
97
94
|
|
98
|
-
return Path(download_path)
|
99
|
-
|
100
95
|
def draw_clusters_and_cells_side_by_side(
|
101
96
|
self, conv_res, page, clusters, mode_prefix: str, show: bool = False
|
102
97
|
):
|
@@ -185,13 +180,23 @@ class LayoutModel(BasePageModel):
|
|
185
180
|
).postprocess()
|
186
181
|
# processed_clusters, processed_cells = clusters, page.cells
|
187
182
|
|
188
|
-
|
189
|
-
|
190
|
-
|
183
|
+
with warnings.catch_warnings():
|
184
|
+
warnings.filterwarnings(
|
185
|
+
"ignore",
|
186
|
+
"Mean of empty slice|invalid value encountered in scalar divide",
|
187
|
+
RuntimeWarning,
|
188
|
+
"numpy",
|
189
|
+
)
|
191
190
|
|
192
|
-
|
193
|
-
|
194
|
-
|
191
|
+
conv_res.confidence.pages[page.page_no].layout_score = float(
|
192
|
+
np.mean([c.confidence for c in processed_clusters])
|
193
|
+
)
|
194
|
+
|
195
|
+
conv_res.confidence.pages[page.page_no].ocr_score = float(
|
196
|
+
np.mean(
|
197
|
+
[c.confidence for c in processed_cells if c.from_ocr]
|
198
|
+
)
|
199
|
+
)
|
195
200
|
|
196
201
|
page.cells = processed_cells
|
197
202
|
page.predictions.layout = LayoutPrediction(
|
docling/models/ocr_mac_model.py
CHANGED
@@ -8,10 +8,10 @@ from typing import Optional, Type
|
|
8
8
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
9
9
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
10
10
|
|
11
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
11
12
|
from docling.datamodel.base_models import Page
|
12
13
|
from docling.datamodel.document import ConversionResult
|
13
14
|
from docling.datamodel.pipeline_options import (
|
14
|
-
AcceleratorOptions,
|
15
15
|
OcrMacOptions,
|
16
16
|
OcrOptions,
|
17
17
|
)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import re
|
2
|
+
import warnings
|
2
3
|
from collections.abc import Iterable
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Optional
|
@@ -7,7 +8,7 @@ import numpy as np
|
|
7
8
|
from PIL import ImageDraw
|
8
9
|
from pydantic import BaseModel
|
9
10
|
|
10
|
-
from docling.datamodel.base_models import Page
|
11
|
+
from docling.datamodel.base_models import Page
|
11
12
|
from docling.datamodel.document import ConversionResult
|
12
13
|
from docling.datamodel.settings import settings
|
13
14
|
from docling.models.base_model import BasePageModel
|
@@ -76,11 +77,15 @@ class PagePreprocessingModel(BasePageModel):
|
|
76
77
|
score = self.rate_text_quality(c.text)
|
77
78
|
text_scores.append(score)
|
78
79
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
)
|
83
|
-
|
80
|
+
with warnings.catch_warnings():
|
81
|
+
warnings.filterwarnings(
|
82
|
+
"ignore", "Mean of empty slice", RuntimeWarning, "numpy"
|
83
|
+
)
|
84
|
+
conv_res.confidence.pages[page.page_no].parse_score = float(
|
85
|
+
np.nanquantile(
|
86
|
+
text_scores, q=0.10
|
87
|
+
) # To emphasise problems in the parse_score, we take the 10% percentile score of all text cells.
|
88
|
+
)
|
84
89
|
|
85
90
|
# DEBUG code:
|
86
91
|
def draw_text_boxes(image, cells, show: bool = False):
|
@@ -5,8 +5,8 @@ from typing import Optional, Type, Union
|
|
5
5
|
|
6
6
|
from PIL import Image
|
7
7
|
|
8
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
8
9
|
from docling.datamodel.pipeline_options import (
|
9
|
-
AcceleratorOptions,
|
10
10
|
PictureDescriptionApiOptions,
|
11
11
|
PictureDescriptionBaseOptions,
|
12
12
|
)
|
@@ -13,8 +13,8 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
|
13
13
|
)
|
14
14
|
from PIL import Image
|
15
15
|
|
16
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
16
17
|
from docling.datamodel.pipeline_options import (
|
17
|
-
AcceleratorOptions,
|
18
18
|
PictureDescriptionBaseOptions,
|
19
19
|
)
|
20
20
|
from docling.models.base_model import (
|
@@ -4,16 +4,21 @@ from typing import Optional, Type, Union
|
|
4
4
|
|
5
5
|
from PIL import Image
|
6
6
|
|
7
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
7
8
|
from docling.datamodel.pipeline_options import (
|
8
|
-
AcceleratorOptions,
|
9
9
|
PictureDescriptionBaseOptions,
|
10
10
|
PictureDescriptionVlmOptions,
|
11
11
|
)
|
12
12
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
13
|
+
from docling.models.utils.hf_model_download import (
|
14
|
+
HuggingFaceModelDownloadMixin,
|
15
|
+
)
|
13
16
|
from docling.utils.accelerator_utils import decide_device
|
14
17
|
|
15
18
|
|
16
|
-
class PictureDescriptionVlmModel(
|
19
|
+
class PictureDescriptionVlmModel(
|
20
|
+
PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
|
21
|
+
):
|
17
22
|
@classmethod
|
18
23
|
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
|
19
24
|
return PictureDescriptionVlmOptions
|
@@ -66,26 +71,6 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
|
|
66
71
|
|
67
72
|
self.provenance = f"{self.options.repo_id}"
|
68
73
|
|
69
|
-
@staticmethod
|
70
|
-
def download_models(
|
71
|
-
repo_id: str,
|
72
|
-
local_dir: Optional[Path] = None,
|
73
|
-
force: bool = False,
|
74
|
-
progress: bool = False,
|
75
|
-
) -> Path:
|
76
|
-
from huggingface_hub import snapshot_download
|
77
|
-
from huggingface_hub.utils import disable_progress_bars
|
78
|
-
|
79
|
-
if not progress:
|
80
|
-
disable_progress_bars()
|
81
|
-
download_path = snapshot_download(
|
82
|
-
repo_id=repo_id,
|
83
|
-
force_download=force,
|
84
|
-
local_dir=local_dir,
|
85
|
-
)
|
86
|
-
|
87
|
-
return Path(download_path)
|
88
|
-
|
89
74
|
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
|
90
75
|
from transformers import GenerationConfig
|
91
76
|
|
@@ -7,11 +7,10 @@ import numpy
|
|
7
7
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
8
8
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
9
9
|
|
10
|
+
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
10
11
|
from docling.datamodel.base_models import Page
|
11
12
|
from docling.datamodel.document import ConversionResult
|
12
13
|
from docling.datamodel.pipeline_options import (
|
13
|
-
AcceleratorDevice,
|
14
|
-
AcceleratorOptions,
|
15
14
|
OcrOptions,
|
16
15
|
RapidOcrOptions,
|
17
16
|
)
|
@@ -13,16 +13,16 @@ from docling_core.types.doc.page import (
|
|
13
13
|
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
14
14
|
from PIL import ImageDraw
|
15
15
|
|
16
|
+
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
16
17
|
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
17
18
|
from docling.datamodel.document import ConversionResult
|
18
19
|
from docling.datamodel.pipeline_options import (
|
19
|
-
AcceleratorDevice,
|
20
|
-
AcceleratorOptions,
|
21
20
|
TableFormerMode,
|
22
21
|
TableStructureOptions,
|
23
22
|
)
|
24
23
|
from docling.datamodel.settings import settings
|
25
24
|
from docling.models.base_model import BasePageModel
|
25
|
+
from docling.models.utils.hf_model_download import download_hf_model
|
26
26
|
from docling.utils.accelerator_utils import decide_device
|
27
27
|
from docling.utils.profiling import TimeRecorder
|
28
28
|
|
@@ -90,20 +90,14 @@ class TableStructureModel(BasePageModel):
|
|
90
90
|
def download_models(
|
91
91
|
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
92
92
|
) -> Path:
|
93
|
-
|
94
|
-
from huggingface_hub.utils import disable_progress_bars
|
95
|
-
|
96
|
-
if not progress:
|
97
|
-
disable_progress_bars()
|
98
|
-
download_path = snapshot_download(
|
93
|
+
return download_hf_model(
|
99
94
|
repo_id="ds4sd/docling-models",
|
100
|
-
force_download=force,
|
101
|
-
local_dir=local_dir,
|
102
95
|
revision="v2.2.0",
|
96
|
+
local_dir=local_dir,
|
97
|
+
force=force,
|
98
|
+
progress=progress,
|
103
99
|
)
|
104
100
|
|
105
|
-
return Path(download_path)
|
106
|
-
|
107
101
|
def draw_table_and_cells(
|
108
102
|
self,
|
109
103
|
conv_res: ConversionResult,
|
@@ -13,10 +13,10 @@ import pandas as pd
|
|
13
13
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
14
14
|
from docling_core.types.doc.page import TextCell
|
15
15
|
|
16
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
16
17
|
from docling.datamodel.base_models import Page
|
17
18
|
from docling.datamodel.document import ConversionResult
|
18
19
|
from docling.datamodel.pipeline_options import (
|
19
|
-
AcceleratorOptions,
|
20
20
|
OcrOptions,
|
21
21
|
TesseractCliOcrOptions,
|
22
22
|
)
|
@@ -7,10 +7,10 @@ from typing import Iterable, Optional, Type
|
|
7
7
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
8
8
|
from docling_core.types.doc.page import TextCell
|
9
9
|
|
10
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
10
11
|
from docling.datamodel.base_models import Page
|
11
12
|
from docling.datamodel.document import ConversionResult
|
12
13
|
from docling.datamodel.pipeline_options import (
|
13
|
-
AcceleratorOptions,
|
14
14
|
OcrOptions,
|
15
15
|
TesseractOcrOptions,
|
16
16
|
)
|
File without changes
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
_log = logging.getLogger(__name__)
|
6
|
+
|
7
|
+
|
8
|
+
def download_hf_model(
|
9
|
+
repo_id: str,
|
10
|
+
local_dir: Optional[Path] = None,
|
11
|
+
force: bool = False,
|
12
|
+
progress: bool = False,
|
13
|
+
revision: Optional[str] = None,
|
14
|
+
) -> Path:
|
15
|
+
from huggingface_hub import snapshot_download
|
16
|
+
from huggingface_hub.utils import disable_progress_bars
|
17
|
+
|
18
|
+
if not progress:
|
19
|
+
disable_progress_bars()
|
20
|
+
download_path = snapshot_download(
|
21
|
+
repo_id=repo_id,
|
22
|
+
force_download=force,
|
23
|
+
local_dir=local_dir,
|
24
|
+
revision=revision,
|
25
|
+
)
|
26
|
+
|
27
|
+
return Path(download_path)
|
28
|
+
|
29
|
+
|
30
|
+
class HuggingFaceModelDownloadMixin:
|
31
|
+
@staticmethod
|
32
|
+
def download_models(
|
33
|
+
repo_id: str,
|
34
|
+
local_dir: Optional[Path] = None,
|
35
|
+
force: bool = False,
|
36
|
+
progress: bool = False,
|
37
|
+
) -> Path:
|
38
|
+
return download_hf_model(
|
39
|
+
repo_id=repo_id, local_dir=local_dir, force=force, progress=progress
|
40
|
+
)
|
File without changes
|