docling 2.34.0__py3-none-any.whl → 2.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/xml/jats_backend.py +0 -0
- docling/cli/main.py +48 -18
- docling/datamodel/accelerator_options.py +68 -0
- docling/datamodel/base_models.py +10 -8
- docling/datamodel/document.py +7 -2
- docling/datamodel/pipeline_options.py +29 -161
- docling/datamodel/pipeline_options_vlm_model.py +81 -0
- docling/datamodel/vlm_model_specs.py +144 -0
- docling/document_converter.py +5 -0
- docling/models/api_vlm_model.py +1 -1
- docling/models/base_ocr_model.py +2 -1
- docling/models/code_formula_model.py +6 -11
- docling/models/document_picture_classifier.py +6 -11
- docling/models/easyocr_model.py +1 -2
- docling/models/layout_model.py +22 -17
- docling/models/ocr_mac_model.py +1 -1
- docling/models/page_preprocessing_model.py +11 -6
- docling/models/picture_description_api_model.py +1 -1
- docling/models/picture_description_base_model.py +1 -1
- docling/models/picture_description_vlm_model.py +7 -22
- docling/models/rapid_ocr_model.py +1 -2
- docling/models/table_structure_model.py +6 -12
- docling/models/tesseract_ocr_cli_model.py +1 -1
- docling/models/tesseract_ocr_model.py +1 -1
- docling/models/utils/__init__.py +0 -0
- docling/models/utils/hf_model_download.py +40 -0
- docling/models/vlm_models_inline/__init__.py +0 -0
- docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
- docling/models/{hf_mlx_model.py → vlm_models_inline/mlx_model.py} +56 -44
- docling/pipeline/standard_pdf_pipeline.py +69 -57
- docling/pipeline/vlm_pipeline.py +228 -61
- docling/utils/accelerator_utils.py +17 -2
- docling/utils/model_downloader.py +13 -12
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/METADATA +54 -55
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/RECORD +48 -41
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info}/WHEEL +2 -1
- docling-2.36.0.dist-info/entry_points.txt +6 -0
- docling-2.36.0.dist-info/top_level.txt +1 -0
- docling/models/hf_vlm_model.py +0 -182
- docling-2.34.0.dist-info/entry_points.txt +0 -7
- {docling-2.34.0.dist-info → docling-2.36.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,194 @@
|
|
1
|
+
import importlib.metadata
|
2
|
+
import logging
|
3
|
+
import time
|
4
|
+
from collections.abc import Iterable
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Any, Optional
|
7
|
+
|
8
|
+
from docling.datamodel.accelerator_options import (
|
9
|
+
AcceleratorOptions,
|
10
|
+
)
|
11
|
+
from docling.datamodel.base_models import Page, VlmPrediction
|
12
|
+
from docling.datamodel.document import ConversionResult
|
13
|
+
from docling.datamodel.pipeline_options_vlm_model import (
|
14
|
+
InlineVlmOptions,
|
15
|
+
TransformersModelType,
|
16
|
+
)
|
17
|
+
from docling.models.base_model import BasePageModel
|
18
|
+
from docling.models.utils.hf_model_download import (
|
19
|
+
HuggingFaceModelDownloadMixin,
|
20
|
+
)
|
21
|
+
from docling.utils.accelerator_utils import decide_device
|
22
|
+
from docling.utils.profiling import TimeRecorder
|
23
|
+
|
24
|
+
_log = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
enabled: bool,
|
31
|
+
artifacts_path: Optional[Path],
|
32
|
+
accelerator_options: AcceleratorOptions,
|
33
|
+
vlm_options: InlineVlmOptions,
|
34
|
+
):
|
35
|
+
self.enabled = enabled
|
36
|
+
|
37
|
+
self.vlm_options = vlm_options
|
38
|
+
|
39
|
+
if self.enabled:
|
40
|
+
import torch
|
41
|
+
from transformers import (
|
42
|
+
AutoModel,
|
43
|
+
AutoModelForCausalLM,
|
44
|
+
AutoModelForVision2Seq,
|
45
|
+
AutoProcessor,
|
46
|
+
BitsAndBytesConfig,
|
47
|
+
GenerationConfig,
|
48
|
+
)
|
49
|
+
|
50
|
+
transformers_version = importlib.metadata.version("transformers")
|
51
|
+
if (
|
52
|
+
self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct"
|
53
|
+
and transformers_version >= "4.52.0"
|
54
|
+
):
|
55
|
+
raise NotImplementedError(
|
56
|
+
f"Phi 4 only works with transformers<4.52.0 but you have {transformers_version=}. Please downgrage running pip install -U 'transformers<4.52.0'."
|
57
|
+
)
|
58
|
+
|
59
|
+
self.device = decide_device(
|
60
|
+
accelerator_options.device,
|
61
|
+
supported_devices=vlm_options.supported_devices,
|
62
|
+
)
|
63
|
+
_log.debug(f"Available device for VLM: {self.device}")
|
64
|
+
|
65
|
+
self.use_cache = vlm_options.use_kv_cache
|
66
|
+
self.max_new_tokens = vlm_options.max_new_tokens
|
67
|
+
self.temperature = vlm_options.temperature
|
68
|
+
|
69
|
+
repo_cache_folder = vlm_options.repo_id.replace("/", "--")
|
70
|
+
|
71
|
+
if artifacts_path is None:
|
72
|
+
artifacts_path = self.download_models(self.vlm_options.repo_id)
|
73
|
+
elif (artifacts_path / repo_cache_folder).exists():
|
74
|
+
artifacts_path = artifacts_path / repo_cache_folder
|
75
|
+
|
76
|
+
self.param_quantization_config: Optional[BitsAndBytesConfig] = None
|
77
|
+
if vlm_options.quantized:
|
78
|
+
self.param_quantization_config = BitsAndBytesConfig(
|
79
|
+
load_in_8bit=vlm_options.load_in_8bit,
|
80
|
+
llm_int8_threshold=vlm_options.llm_int8_threshold,
|
81
|
+
)
|
82
|
+
|
83
|
+
model_cls: Any = AutoModel
|
84
|
+
if (
|
85
|
+
self.vlm_options.transformers_model_type
|
86
|
+
== TransformersModelType.AUTOMODEL_CAUSALLM
|
87
|
+
):
|
88
|
+
model_cls = AutoModelForCausalLM
|
89
|
+
elif (
|
90
|
+
self.vlm_options.transformers_model_type
|
91
|
+
== TransformersModelType.AUTOMODEL_VISION2SEQ
|
92
|
+
):
|
93
|
+
model_cls = AutoModelForVision2Seq
|
94
|
+
|
95
|
+
self.processor = AutoProcessor.from_pretrained(
|
96
|
+
artifacts_path,
|
97
|
+
trust_remote_code=vlm_options.trust_remote_code,
|
98
|
+
)
|
99
|
+
self.vlm_model = model_cls.from_pretrained(
|
100
|
+
artifacts_path,
|
101
|
+
device_map=self.device,
|
102
|
+
_attn_implementation=(
|
103
|
+
"flash_attention_2"
|
104
|
+
if self.device.startswith("cuda")
|
105
|
+
and accelerator_options.cuda_use_flash_attention2
|
106
|
+
else "eager"
|
107
|
+
),
|
108
|
+
trust_remote_code=vlm_options.trust_remote_code,
|
109
|
+
)
|
110
|
+
|
111
|
+
# Load generation config
|
112
|
+
self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
|
113
|
+
|
114
|
+
def __call__(
|
115
|
+
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
116
|
+
) -> Iterable[Page]:
|
117
|
+
for page in page_batch:
|
118
|
+
assert page._backend is not None
|
119
|
+
if not page._backend.is_valid():
|
120
|
+
yield page
|
121
|
+
else:
|
122
|
+
with TimeRecorder(conv_res, "vlm"):
|
123
|
+
assert page.size is not None
|
124
|
+
|
125
|
+
hi_res_image = page.get_image(scale=self.vlm_options.scale)
|
126
|
+
|
127
|
+
# Define prompt structure
|
128
|
+
prompt = self.formulate_prompt()
|
129
|
+
|
130
|
+
inputs = self.processor(
|
131
|
+
text=prompt, images=[hi_res_image], return_tensors="pt"
|
132
|
+
).to(self.device)
|
133
|
+
|
134
|
+
start_time = time.time()
|
135
|
+
# Call model to generate:
|
136
|
+
generated_ids = self.vlm_model.generate(
|
137
|
+
**inputs,
|
138
|
+
max_new_tokens=self.max_new_tokens,
|
139
|
+
use_cache=self.use_cache,
|
140
|
+
temperature=self.temperature,
|
141
|
+
generation_config=self.generation_config,
|
142
|
+
**self.vlm_options.extra_generation_config,
|
143
|
+
)
|
144
|
+
|
145
|
+
generation_time = time.time() - start_time
|
146
|
+
generated_texts = self.processor.batch_decode(
|
147
|
+
generated_ids[:, inputs["input_ids"].shape[1] :],
|
148
|
+
skip_special_tokens=False,
|
149
|
+
)[0]
|
150
|
+
|
151
|
+
num_tokens = len(generated_ids[0])
|
152
|
+
_log.debug(
|
153
|
+
f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
|
154
|
+
)
|
155
|
+
page.predictions.vlm_response = VlmPrediction(
|
156
|
+
text=generated_texts,
|
157
|
+
generation_time=generation_time,
|
158
|
+
)
|
159
|
+
|
160
|
+
yield page
|
161
|
+
|
162
|
+
def formulate_prompt(self) -> str:
|
163
|
+
"""Formulate a prompt for the VLM."""
|
164
|
+
|
165
|
+
if self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct":
|
166
|
+
_log.debug("Using specialized prompt for Phi-4")
|
167
|
+
# more info here: https://huggingface.co/microsoft/Phi-4-multimodal-instruct#loading-the-model-locally
|
168
|
+
|
169
|
+
user_prompt = "<|user|>"
|
170
|
+
assistant_prompt = "<|assistant|>"
|
171
|
+
prompt_suffix = "<|end|>"
|
172
|
+
|
173
|
+
prompt = f"{user_prompt}<|image_1|>{self.vlm_options.prompt}{prompt_suffix}{assistant_prompt}"
|
174
|
+
_log.debug(f"prompt for {self.vlm_options.repo_id}: {prompt}")
|
175
|
+
|
176
|
+
return prompt
|
177
|
+
|
178
|
+
messages = [
|
179
|
+
{
|
180
|
+
"role": "user",
|
181
|
+
"content": [
|
182
|
+
{
|
183
|
+
"type": "text",
|
184
|
+
"text": "This is a page from a document.",
|
185
|
+
},
|
186
|
+
{"type": "image"},
|
187
|
+
{"type": "text", "text": self.vlm_options.prompt},
|
188
|
+
],
|
189
|
+
}
|
190
|
+
]
|
191
|
+
prompt = self.processor.apply_chat_template(
|
192
|
+
messages, add_generation_prompt=False
|
193
|
+
)
|
194
|
+
return prompt
|
@@ -4,29 +4,34 @@ from collections.abc import Iterable
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import Optional
|
6
6
|
|
7
|
-
from docling.datamodel.
|
8
|
-
from docling.datamodel.document import ConversionResult
|
9
|
-
from docling.datamodel.pipeline_options import (
|
7
|
+
from docling.datamodel.accelerator_options import (
|
10
8
|
AcceleratorOptions,
|
11
|
-
HuggingFaceVlmOptions,
|
12
9
|
)
|
10
|
+
from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
|
11
|
+
from docling.datamodel.document import ConversionResult
|
12
|
+
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
13
13
|
from docling.models.base_model import BasePageModel
|
14
|
+
from docling.models.utils.hf_model_download import (
|
15
|
+
HuggingFaceModelDownloadMixin,
|
16
|
+
)
|
14
17
|
from docling.utils.profiling import TimeRecorder
|
15
18
|
|
16
19
|
_log = logging.getLogger(__name__)
|
17
20
|
|
18
21
|
|
19
|
-
class HuggingFaceMlxModel(BasePageModel):
|
22
|
+
class HuggingFaceMlxModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
20
23
|
def __init__(
|
21
24
|
self,
|
22
25
|
enabled: bool,
|
23
26
|
artifacts_path: Optional[Path],
|
24
27
|
accelerator_options: AcceleratorOptions,
|
25
|
-
vlm_options:
|
28
|
+
vlm_options: InlineVlmOptions,
|
26
29
|
):
|
27
30
|
self.enabled = enabled
|
28
31
|
|
29
32
|
self.vlm_options = vlm_options
|
33
|
+
self.max_tokens = vlm_options.max_new_tokens
|
34
|
+
self.temperature = vlm_options.temperature
|
30
35
|
|
31
36
|
if self.enabled:
|
32
37
|
try:
|
@@ -39,42 +44,24 @@ class HuggingFaceMlxModel(BasePageModel):
|
|
39
44
|
)
|
40
45
|
|
41
46
|
repo_cache_folder = vlm_options.repo_id.replace("/", "--")
|
47
|
+
|
42
48
|
self.apply_chat_template = apply_chat_template
|
43
49
|
self.stream_generate = stream_generate
|
44
50
|
|
45
51
|
# PARAMETERS:
|
46
52
|
if artifacts_path is None:
|
47
|
-
artifacts_path = self.download_models(
|
53
|
+
artifacts_path = self.download_models(
|
54
|
+
self.vlm_options.repo_id,
|
55
|
+
)
|
48
56
|
elif (artifacts_path / repo_cache_folder).exists():
|
49
57
|
artifacts_path = artifacts_path / repo_cache_folder
|
50
58
|
|
51
|
-
self.param_question = vlm_options.prompt
|
59
|
+
self.param_question = vlm_options.prompt
|
52
60
|
|
53
61
|
## Load the model
|
54
62
|
self.vlm_model, self.processor = load(artifacts_path)
|
55
63
|
self.config = load_config(artifacts_path)
|
56
64
|
|
57
|
-
@staticmethod
|
58
|
-
def download_models(
|
59
|
-
repo_id: str,
|
60
|
-
local_dir: Optional[Path] = None,
|
61
|
-
force: bool = False,
|
62
|
-
progress: bool = False,
|
63
|
-
) -> Path:
|
64
|
-
from huggingface_hub import snapshot_download
|
65
|
-
from huggingface_hub.utils import disable_progress_bars
|
66
|
-
|
67
|
-
if not progress:
|
68
|
-
disable_progress_bars()
|
69
|
-
download_path = snapshot_download(
|
70
|
-
repo_id=repo_id,
|
71
|
-
force_download=force,
|
72
|
-
local_dir=local_dir,
|
73
|
-
# revision="v0.0.1",
|
74
|
-
)
|
75
|
-
|
76
|
-
return Path(download_path)
|
77
|
-
|
78
65
|
def __call__(
|
79
66
|
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
80
67
|
) -> Iterable[Page]:
|
@@ -83,12 +70,10 @@ class HuggingFaceMlxModel(BasePageModel):
|
|
83
70
|
if not page._backend.is_valid():
|
84
71
|
yield page
|
85
72
|
else:
|
86
|
-
with TimeRecorder(conv_res, "vlm"):
|
73
|
+
with TimeRecorder(conv_res, f"vlm-mlx-{self.vlm_options.repo_id}"):
|
87
74
|
assert page.size is not None
|
88
75
|
|
89
|
-
hi_res_image = page.get_image(scale=
|
90
|
-
# hi_res_image = page.get_image(scale=1.0) # 72dpi
|
91
|
-
|
76
|
+
hi_res_image = page.get_image(scale=self.vlm_options.scale)
|
92
77
|
if hi_res_image is not None:
|
93
78
|
im_width, im_height = hi_res_image.size
|
94
79
|
|
@@ -104,16 +89,45 @@ class HuggingFaceMlxModel(BasePageModel):
|
|
104
89
|
)
|
105
90
|
|
106
91
|
start_time = time.time()
|
92
|
+
_log.debug("start generating ...")
|
93
|
+
|
107
94
|
# Call model to generate:
|
95
|
+
tokens: list[VlmPredictionToken] = []
|
96
|
+
|
108
97
|
output = ""
|
109
98
|
for token in self.stream_generate(
|
110
99
|
self.vlm_model,
|
111
100
|
self.processor,
|
112
101
|
prompt,
|
113
102
|
[hi_res_image],
|
114
|
-
max_tokens=
|
103
|
+
max_tokens=self.max_tokens,
|
115
104
|
verbose=False,
|
105
|
+
temp=self.temperature,
|
116
106
|
):
|
107
|
+
if len(token.logprobs.shape) == 1:
|
108
|
+
tokens.append(
|
109
|
+
VlmPredictionToken(
|
110
|
+
text=token.text,
|
111
|
+
token=token.token,
|
112
|
+
logprob=token.logprobs[token.token],
|
113
|
+
)
|
114
|
+
)
|
115
|
+
elif (
|
116
|
+
len(token.logprobs.shape) == 2
|
117
|
+
and token.logprobs.shape[0] == 1
|
118
|
+
):
|
119
|
+
tokens.append(
|
120
|
+
VlmPredictionToken(
|
121
|
+
text=token.text,
|
122
|
+
token=token.token,
|
123
|
+
logprob=token.logprobs[0, token.token],
|
124
|
+
)
|
125
|
+
)
|
126
|
+
else:
|
127
|
+
_log.warning(
|
128
|
+
f"incompatible shape for logprobs: {token.logprobs.shape}"
|
129
|
+
)
|
130
|
+
|
117
131
|
output += token.text
|
118
132
|
if "</doctag>" in token.text:
|
119
133
|
break
|
@@ -121,15 +135,13 @@ class HuggingFaceMlxModel(BasePageModel):
|
|
121
135
|
generation_time = time.time() - start_time
|
122
136
|
page_tags = output
|
123
137
|
|
124
|
-
_log.debug(
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# print("")
|
133
|
-
page.predictions.vlm_response = VlmPrediction(text=page_tags)
|
138
|
+
_log.debug(
|
139
|
+
f"{generation_time:.2f} seconds for {len(tokens)} tokens ({len(tokens) / generation_time} tokens/sec)."
|
140
|
+
)
|
141
|
+
page.predictions.vlm_response = VlmPrediction(
|
142
|
+
text=page_tags,
|
143
|
+
generation_time=generation_time,
|
144
|
+
generated_tokens=tokens,
|
145
|
+
)
|
134
146
|
|
135
147
|
yield page
|
@@ -8,7 +8,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
|
8
8
|
|
9
9
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
10
10
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
11
|
-
from docling.datamodel.base_models import AssembledUnit, Page
|
11
|
+
from docling.datamodel.base_models import AssembledUnit, Page
|
12
12
|
from docling.datamodel.document import ConversionResult
|
13
13
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
14
14
|
from docling.datamodel.settings import settings
|
@@ -55,11 +55,13 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
55
55
|
"When defined, it must point to a folder containing all models required by the pipeline."
|
56
56
|
)
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
with warnings.catch_warnings(): # deprecated generate_table_images
|
59
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
60
|
+
self.keep_images = (
|
61
|
+
self.pipeline_options.generate_page_images
|
62
|
+
or self.pipeline_options.generate_picture_images
|
63
|
+
or self.pipeline_options.generate_table_images
|
64
|
+
)
|
63
65
|
|
64
66
|
self.reading_order_model = ReadingOrderModel(options=ReadingOrderOptions())
|
65
67
|
|
@@ -210,64 +212,74 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
210
212
|
)
|
211
213
|
|
212
214
|
# Generate images of the requested element types
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
215
|
+
with warnings.catch_warnings(): # deprecated generate_table_images
|
216
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
217
|
+
if (
|
218
|
+
self.pipeline_options.generate_picture_images
|
219
|
+
or self.pipeline_options.generate_table_images
|
220
|
+
):
|
221
|
+
scale = self.pipeline_options.images_scale
|
222
|
+
for element, _level in conv_res.document.iterate_items():
|
223
|
+
if not isinstance(element, DocItem) or len(element.prov) == 0:
|
224
|
+
continue
|
225
|
+
if (
|
226
|
+
isinstance(element, PictureItem)
|
227
|
+
and self.pipeline_options.generate_picture_images
|
228
|
+
) or (
|
229
|
+
isinstance(element, TableItem)
|
230
|
+
and self.pipeline_options.generate_table_images
|
231
|
+
):
|
232
|
+
page_ix = element.prov[0].page_no - 1
|
233
|
+
page = next(
|
234
|
+
(p for p in conv_res.pages if p.page_no == page_ix),
|
235
|
+
cast("Page", None),
|
236
|
+
)
|
237
|
+
assert page is not None
|
238
|
+
assert page.size is not None
|
239
|
+
assert page.image is not None
|
240
|
+
|
241
|
+
crop_bbox = (
|
242
|
+
element.prov[0]
|
243
|
+
.bbox.scaled(scale=scale)
|
244
|
+
.to_top_left_origin(
|
245
|
+
page_height=page.size.height * scale
|
246
|
+
)
|
247
|
+
)
|
248
|
+
|
249
|
+
cropped_im = page.image.crop(crop_bbox.as_tuple())
|
250
|
+
element.image = ImageRef.from_pil(
|
251
|
+
cropped_im, dpi=int(72 * scale)
|
252
|
+
)
|
247
253
|
|
248
254
|
# Aggregate confidence values for document:
|
249
255
|
if len(conv_res.pages) > 0:
|
250
|
-
|
251
|
-
|
252
|
-
|
256
|
+
with warnings.catch_warnings():
|
257
|
+
warnings.filterwarnings(
|
258
|
+
"ignore",
|
259
|
+
category=RuntimeWarning,
|
260
|
+
message="Mean of empty slice|All-NaN slice encountered",
|
253
261
|
)
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
q=0.1, # parse score should relate to worst 10% of pages.
|
262
|
+
conv_res.confidence.layout_score = float(
|
263
|
+
np.nanmean(
|
264
|
+
[c.layout_score for c in conv_res.confidence.pages.values()]
|
265
|
+
)
|
259
266
|
)
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
267
|
+
conv_res.confidence.parse_score = float(
|
268
|
+
np.nanquantile(
|
269
|
+
[c.parse_score for c in conv_res.confidence.pages.values()],
|
270
|
+
q=0.1, # parse score should relate to worst 10% of pages.
|
271
|
+
)
|
264
272
|
)
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
273
|
+
conv_res.confidence.table_score = float(
|
274
|
+
np.nanmean(
|
275
|
+
[c.table_score for c in conv_res.confidence.pages.values()]
|
276
|
+
)
|
277
|
+
)
|
278
|
+
conv_res.confidence.ocr_score = float(
|
279
|
+
np.nanmean(
|
280
|
+
[c.ocr_score for c in conv_res.confidence.pages.values()]
|
281
|
+
)
|
269
282
|
)
|
270
|
-
)
|
271
283
|
|
272
284
|
return conv_res
|
273
285
|
|