docling 2.39.0__py3-none-any.whl → 2.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/docling_parse_v4_backend.py +14 -4
- docling/backend/msexcel_backend.py +33 -14
- docling/datamodel/asr_model_specs.py +6 -6
- docling/datamodel/base_models.py +23 -1
- docling/datamodel/layout_model_specs.py +90 -0
- docling/datamodel/pipeline_options.py +18 -0
- docling/datamodel/pipeline_options_vlm_model.py +11 -3
- docling/models/api_vlm_model.py +7 -5
- docling/models/base_ocr_model.py +6 -2
- docling/models/document_picture_classifier.py +12 -13
- docling/models/layout_model.py +27 -18
- docling/models/picture_description_vlm_model.py +16 -11
- docling/models/plugins/defaults.py +9 -9
- docling/models/readingorder_model.py +8 -1
- docling/models/table_structure_model.py +3 -1
- docling/models/tesseract_ocr_model.py +10 -4
- docling/models/vlm_models_inline/hf_transformers_model.py +39 -20
- docling/models/vlm_models_inline/mlx_model.py +5 -3
- docling/pipeline/standard_pdf_pipeline.py +3 -3
- docling/pipeline/vlm_pipeline.py +1 -0
- docling/utils/accelerator_utils.py +2 -2
- docling/utils/layout_postprocessor.py +7 -2
- docling/utils/model_downloader.py +2 -1
- docling/utils/ocr_utils.py +1 -1
- docling/utils/orientation.py +22 -28
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/METADATA +5 -5
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/RECORD +31 -30
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/WHEEL +0 -0
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/entry_points.txt +0 -0
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/licenses/LICENSE +0 -0
- {docling-2.39.0.dist-info → docling-2.41.0.dist-info}/top_level.txt +0 -0
@@ -187,7 +187,17 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
|
|
187
187
|
|
188
188
|
def unload(self):
|
189
189
|
super().unload()
|
190
|
-
|
191
|
-
|
192
|
-
self.
|
193
|
-
|
190
|
+
# Unload docling-parse document first
|
191
|
+
if self.dp_doc is not None:
|
192
|
+
self.dp_doc.unload()
|
193
|
+
self.dp_doc = None
|
194
|
+
|
195
|
+
# Then close pypdfium2 document with proper locking
|
196
|
+
if self._pdoc is not None:
|
197
|
+
with pypdfium2_lock:
|
198
|
+
try:
|
199
|
+
self._pdoc.close()
|
200
|
+
except Exception:
|
201
|
+
# Ignore cleanup errors
|
202
|
+
pass
|
203
|
+
self._pdoc = None
|
@@ -337,10 +337,17 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|
337
337
|
# Collect the data within the bounds
|
338
338
|
data = []
|
339
339
|
visited_cells: set[tuple[int, int]] = set()
|
340
|
-
for ri in
|
341
|
-
|
342
|
-
|
343
|
-
|
340
|
+
for ri, row in enumerate(
|
341
|
+
sheet.iter_rows(
|
342
|
+
min_row=start_row + 1, # start_row is 0-based but iter_rows is 1-based
|
343
|
+
max_row=max_row + 1,
|
344
|
+
min_col=start_col + 1,
|
345
|
+
max_col=max_col + 1,
|
346
|
+
values_only=False,
|
347
|
+
),
|
348
|
+
start_row,
|
349
|
+
):
|
350
|
+
for rj, cell in enumerate(row, start_col):
|
344
351
|
# Check if the cell belongs to a merged range
|
345
352
|
row_span = 1
|
346
353
|
col_span = 1
|
@@ -397,10 +404,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|
397
404
|
"""
|
398
405
|
max_row: int = start_row
|
399
406
|
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
407
|
+
for ri, (cell,) in enumerate(
|
408
|
+
sheet.iter_rows(
|
409
|
+
min_row=start_row + 2,
|
410
|
+
max_row=sheet.max_row,
|
411
|
+
min_col=start_col + 1,
|
412
|
+
max_col=start_col + 1,
|
413
|
+
values_only=False,
|
414
|
+
),
|
415
|
+
start_row + 1,
|
416
|
+
):
|
404
417
|
# Check if the cell is part of a merged range
|
405
418
|
merged_range = next(
|
406
419
|
(mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
|
@@ -414,7 +427,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|
414
427
|
if merged_range:
|
415
428
|
max_row = max(max_row, merged_range.max_row - 1)
|
416
429
|
else:
|
417
|
-
max_row
|
430
|
+
max_row = ri
|
418
431
|
|
419
432
|
return max_row
|
420
433
|
|
@@ -433,10 +446,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|
433
446
|
"""
|
434
447
|
max_col: int = start_col
|
435
448
|
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
449
|
+
for rj, (cell,) in enumerate(
|
450
|
+
sheet.iter_cols(
|
451
|
+
min_row=start_row + 1,
|
452
|
+
max_row=start_row + 1,
|
453
|
+
min_col=start_col + 2,
|
454
|
+
max_col=sheet.max_column,
|
455
|
+
values_only=False,
|
456
|
+
),
|
457
|
+
start_col + 1,
|
458
|
+
):
|
440
459
|
# Check if the cell is part of a merged range
|
441
460
|
merged_range = next(
|
442
461
|
(mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
|
@@ -450,7 +469,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
|
|
450
469
|
if merged_range:
|
451
470
|
max_col = max(max_col, merged_range.max_col - 1)
|
452
471
|
else:
|
453
|
-
max_col
|
472
|
+
max_col = rj
|
454
473
|
|
455
474
|
return max_col
|
456
475
|
|
@@ -22,7 +22,7 @@ WHISPER_TINY = InlineAsrNativeWhisperOptions(
|
|
22
22
|
verbose=True,
|
23
23
|
timestamps=True,
|
24
24
|
word_timestamps=True,
|
25
|
-
|
25
|
+
temperature=0.0,
|
26
26
|
max_new_tokens=256,
|
27
27
|
max_time_chunk=30.0,
|
28
28
|
)
|
@@ -33,7 +33,7 @@ WHISPER_SMALL = InlineAsrNativeWhisperOptions(
|
|
33
33
|
verbose=True,
|
34
34
|
timestamps=True,
|
35
35
|
word_timestamps=True,
|
36
|
-
|
36
|
+
temperature=0.0,
|
37
37
|
max_new_tokens=256,
|
38
38
|
max_time_chunk=30.0,
|
39
39
|
)
|
@@ -44,7 +44,7 @@ WHISPER_MEDIUM = InlineAsrNativeWhisperOptions(
|
|
44
44
|
verbose=True,
|
45
45
|
timestamps=True,
|
46
46
|
word_timestamps=True,
|
47
|
-
|
47
|
+
temperature=0.0,
|
48
48
|
max_new_tokens=256,
|
49
49
|
max_time_chunk=30.0,
|
50
50
|
)
|
@@ -55,7 +55,7 @@ WHISPER_BASE = InlineAsrNativeWhisperOptions(
|
|
55
55
|
verbose=True,
|
56
56
|
timestamps=True,
|
57
57
|
word_timestamps=True,
|
58
|
-
|
58
|
+
temperature=0.0,
|
59
59
|
max_new_tokens=256,
|
60
60
|
max_time_chunk=30.0,
|
61
61
|
)
|
@@ -66,7 +66,7 @@ WHISPER_LARGE = InlineAsrNativeWhisperOptions(
|
|
66
66
|
verbose=True,
|
67
67
|
timestamps=True,
|
68
68
|
word_timestamps=True,
|
69
|
-
|
69
|
+
temperature=0.0,
|
70
70
|
max_new_tokens=256,
|
71
71
|
max_time_chunk=30.0,
|
72
72
|
)
|
@@ -77,7 +77,7 @@ WHISPER_TURBO = InlineAsrNativeWhisperOptions(
|
|
77
77
|
verbose=True,
|
78
78
|
timestamps=True,
|
79
79
|
word_timestamps=True,
|
80
|
-
|
80
|
+
temperature=0.0,
|
81
81
|
max_new_tokens=256,
|
82
82
|
max_time_chunk=30.0,
|
83
83
|
)
|
docling/datamodel/base_models.py
CHANGED
@@ -12,6 +12,7 @@ from docling_core.types.doc import (
|
|
12
12
|
Size,
|
13
13
|
TableCell,
|
14
14
|
)
|
15
|
+
from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
|
15
16
|
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
16
17
|
from docling_core.types.io import (
|
17
18
|
DocumentStream,
|
@@ -19,7 +20,14 @@ from docling_core.types.io import (
|
|
19
20
|
|
20
21
|
# DO NOT REMOVE; explicitly exposed from this location
|
21
22
|
from PIL.Image import Image
|
22
|
-
from pydantic import
|
23
|
+
from pydantic import (
|
24
|
+
BaseModel,
|
25
|
+
ConfigDict,
|
26
|
+
Field,
|
27
|
+
FieldSerializationInfo,
|
28
|
+
computed_field,
|
29
|
+
field_serializer,
|
30
|
+
)
|
23
31
|
|
24
32
|
if TYPE_CHECKING:
|
25
33
|
from docling.backend.pdf_backend import PdfPageBackend
|
@@ -142,6 +150,10 @@ class Cluster(BaseModel):
|
|
142
150
|
cells: List[TextCell] = []
|
143
151
|
children: List["Cluster"] = [] # Add child cluster support
|
144
152
|
|
153
|
+
@field_serializer("confidence")
|
154
|
+
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
155
|
+
return round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
|
156
|
+
|
145
157
|
|
146
158
|
class BasePageElement(BaseModel):
|
147
159
|
label: DocItemLabel
|
@@ -194,6 +206,16 @@ class FigureElement(BasePageElement):
|
|
194
206
|
predicted_class: Optional[str] = None
|
195
207
|
confidence: Optional[float] = None
|
196
208
|
|
209
|
+
@field_serializer("confidence")
|
210
|
+
def _serialize(
|
211
|
+
self, value: Optional[float], info: FieldSerializationInfo
|
212
|
+
) -> Optional[float]:
|
213
|
+
return (
|
214
|
+
round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
|
215
|
+
if value is not None
|
216
|
+
else None
|
217
|
+
)
|
218
|
+
|
197
219
|
|
198
220
|
class FigureClassificationPrediction(BaseModel):
|
199
221
|
figure_count: int = 0
|
@@ -0,0 +1,90 @@
|
|
1
|
+
import logging
|
2
|
+
from enum import Enum
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from pydantic import BaseModel
|
7
|
+
|
8
|
+
from docling.datamodel.accelerator_options import AcceleratorDevice
|
9
|
+
|
10
|
+
_log = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
class LayoutModelConfig(BaseModel):
|
14
|
+
name: str
|
15
|
+
repo_id: str
|
16
|
+
revision: str
|
17
|
+
model_path: str
|
18
|
+
supported_devices: list[AcceleratorDevice] = [
|
19
|
+
AcceleratorDevice.CPU,
|
20
|
+
AcceleratorDevice.CUDA,
|
21
|
+
AcceleratorDevice.MPS,
|
22
|
+
]
|
23
|
+
|
24
|
+
@property
|
25
|
+
def model_repo_folder(self) -> str:
|
26
|
+
return self.repo_id.replace("/", "--")
|
27
|
+
|
28
|
+
|
29
|
+
# HuggingFace Layout Models
|
30
|
+
|
31
|
+
# Default Docling Layout Model
|
32
|
+
DOCLING_LAYOUT_V2 = LayoutModelConfig(
|
33
|
+
name="docling_layout_v2",
|
34
|
+
repo_id="ds4sd/docling-layout-old",
|
35
|
+
revision="main",
|
36
|
+
model_path="",
|
37
|
+
)
|
38
|
+
|
39
|
+
DOCLING_LAYOUT_HERON = LayoutModelConfig(
|
40
|
+
name="docling_layout_heron",
|
41
|
+
repo_id="ds4sd/docling-layout-heron",
|
42
|
+
revision="main",
|
43
|
+
model_path="",
|
44
|
+
)
|
45
|
+
|
46
|
+
DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
|
47
|
+
name="docling_layout_heron_101",
|
48
|
+
repo_id="ds4sd/docling-layout-heron-101",
|
49
|
+
revision="main",
|
50
|
+
model_path="",
|
51
|
+
)
|
52
|
+
|
53
|
+
DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
|
54
|
+
name="docling_layout_egret_medium",
|
55
|
+
repo_id="ds4sd/docling-layout-egret-medium",
|
56
|
+
revision="main",
|
57
|
+
model_path="",
|
58
|
+
)
|
59
|
+
|
60
|
+
DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
|
61
|
+
name="docling_layout_egret_large",
|
62
|
+
repo_id="ds4sd/docling-layout-egret-large",
|
63
|
+
revision="main",
|
64
|
+
model_path="",
|
65
|
+
)
|
66
|
+
|
67
|
+
DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
|
68
|
+
name="docling_layout_egret_xlarge",
|
69
|
+
repo_id="ds4sd/docling-layout-egret-xlarge",
|
70
|
+
revision="main",
|
71
|
+
model_path="",
|
72
|
+
)
|
73
|
+
|
74
|
+
# Example for a hypothetical alternative model
|
75
|
+
# ALTERNATIVE_LAYOUT = LayoutModelConfig(
|
76
|
+
# name="alternative_layout",
|
77
|
+
# repo_id="someorg/alternative-layout",
|
78
|
+
# revision="main",
|
79
|
+
# model_path="model_artifacts/layout_alt",
|
80
|
+
# )
|
81
|
+
|
82
|
+
|
83
|
+
class LayoutModelType(str, Enum):
|
84
|
+
DOCLING_LAYOUT_V2 = "docling_layout_v2"
|
85
|
+
DOCLING_LAYOUT_HERON = "docling_layout_heron"
|
86
|
+
DOCLING_LAYOUT_HERON_101 = "docling_layout_heron_101"
|
87
|
+
DOCLING_LAYOUT_EGRET_MEDIUM = "docling_layout_egret_medium"
|
88
|
+
DOCLING_LAYOUT_EGRET_LARGE = "docling_layout_egret_large"
|
89
|
+
DOCLING_LAYOUT_EGRET_XLARGE = "docling_layout_egret_xlarge"
|
90
|
+
# ALTERNATIVE_LAYOUT = "alternative_layout"
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
from datetime import datetime
|
2
3
|
from enum import Enum
|
3
4
|
from pathlib import Path
|
4
5
|
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
|
@@ -15,6 +16,15 @@ from docling.datamodel import asr_model_specs
|
|
15
16
|
|
16
17
|
# Import the following for backwards compatibility
|
17
18
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
19
|
+
from docling.datamodel.layout_model_specs import (
|
20
|
+
DOCLING_LAYOUT_EGRET_LARGE,
|
21
|
+
DOCLING_LAYOUT_EGRET_MEDIUM,
|
22
|
+
DOCLING_LAYOUT_EGRET_XLARGE,
|
23
|
+
DOCLING_LAYOUT_HERON,
|
24
|
+
DOCLING_LAYOUT_HERON_101,
|
25
|
+
DOCLING_LAYOUT_V2,
|
26
|
+
LayoutModelConfig,
|
27
|
+
)
|
18
28
|
from docling.datamodel.pipeline_options_asr_model import (
|
19
29
|
InlineAsrOptions,
|
20
30
|
)
|
@@ -265,6 +275,13 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
|
265
275
|
)
|
266
276
|
|
267
277
|
|
278
|
+
class LayoutOptions(BaseModel):
|
279
|
+
"""Options for layout processing."""
|
280
|
+
|
281
|
+
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
282
|
+
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
283
|
+
|
284
|
+
|
268
285
|
class AsrPipelineOptions(PipelineOptions):
|
269
286
|
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
|
270
287
|
artifacts_path: Optional[Union[Path, str]] = None
|
@@ -289,6 +306,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
|
|
289
306
|
picture_description_options: PictureDescriptionBaseOptions = (
|
290
307
|
smolvlm_picture_description
|
291
308
|
)
|
309
|
+
layout_options: LayoutOptions = LayoutOptions()
|
292
310
|
|
293
311
|
images_scale: float = 1.0
|
294
312
|
generate_page_images: bool = False
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from enum import Enum
|
2
|
-
from typing import Any, Dict, List, Literal, Optional, Union
|
2
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
3
3
|
|
4
|
+
from docling_core.types.doc.page import SegmentedPage
|
4
5
|
from pydantic import AnyUrl, BaseModel
|
5
6
|
from typing_extensions import deprecated
|
6
7
|
|
@@ -9,9 +10,10 @@ from docling.datamodel.accelerator_options import AcceleratorDevice
|
|
9
10
|
|
10
11
|
class BaseVlmOptions(BaseModel):
|
11
12
|
kind: str
|
12
|
-
prompt: str
|
13
|
+
prompt: Union[str, Callable[[Optional[SegmentedPage]], str]]
|
13
14
|
scale: float = 2.0
|
14
15
|
max_size: Optional[int] = None
|
16
|
+
temperature: float = 0.0
|
15
17
|
|
16
18
|
|
17
19
|
class ResponseFormat(str, Enum):
|
@@ -29,6 +31,12 @@ class TransformersModelType(str, Enum):
|
|
29
31
|
AUTOMODEL = "automodel"
|
30
32
|
AUTOMODEL_VISION2SEQ = "automodel-vision2seq"
|
31
33
|
AUTOMODEL_CAUSALLM = "automodel-causallm"
|
34
|
+
AUTOMODEL_IMAGETEXTTOTEXT = "automodel-imagetexttotext"
|
35
|
+
|
36
|
+
|
37
|
+
class TransformersPromptStyle(str, Enum):
|
38
|
+
CHAT = "chat"
|
39
|
+
RAW = "raw"
|
32
40
|
|
33
41
|
|
34
42
|
class InlineVlmOptions(BaseVlmOptions):
|
@@ -42,6 +50,7 @@ class InlineVlmOptions(BaseVlmOptions):
|
|
42
50
|
|
43
51
|
inference_framework: InferenceFramework
|
44
52
|
transformers_model_type: TransformersModelType = TransformersModelType.AUTOMODEL
|
53
|
+
transformers_prompt_style: TransformersPromptStyle = TransformersPromptStyle.CHAT
|
45
54
|
response_format: ResponseFormat
|
46
55
|
|
47
56
|
torch_dtype: Optional[str] = None
|
@@ -51,7 +60,6 @@ class InlineVlmOptions(BaseVlmOptions):
|
|
51
60
|
AcceleratorDevice.MPS,
|
52
61
|
]
|
53
62
|
|
54
|
-
temperature: float = 0.0
|
55
63
|
stop_strings: List[str] = []
|
56
64
|
extra_generation_config: Dict[str, Any] = {}
|
57
65
|
|
docling/models/api_vlm_model.py
CHANGED
@@ -29,12 +29,9 @@ class ApiVlmModel(BasePageModel):
|
|
29
29
|
|
30
30
|
self.timeout = self.vlm_options.timeout
|
31
31
|
self.concurrency = self.vlm_options.concurrency
|
32
|
-
self.prompt_content = (
|
33
|
-
f"This is a page from a document.\n{self.vlm_options.prompt}"
|
34
|
-
)
|
35
32
|
self.params = {
|
36
33
|
**self.vlm_options.params,
|
37
|
-
"temperature":
|
34
|
+
"temperature": self.vlm_options.temperature,
|
38
35
|
}
|
39
36
|
|
40
37
|
def __call__(
|
@@ -56,9 +53,14 @@ class ApiVlmModel(BasePageModel):
|
|
56
53
|
if hi_res_image.mode != "RGB":
|
57
54
|
hi_res_image = hi_res_image.convert("RGB")
|
58
55
|
|
56
|
+
if callable(self.vlm_options.prompt):
|
57
|
+
prompt = self.vlm_options.prompt(page.parsed_page)
|
58
|
+
else:
|
59
|
+
prompt = self.vlm_options.prompt
|
60
|
+
|
59
61
|
page_tags = api_image_request(
|
60
62
|
image=hi_res_image,
|
61
|
-
prompt=
|
63
|
+
prompt=prompt,
|
62
64
|
url=self.vlm_options.url,
|
63
65
|
timeout=self.timeout,
|
64
66
|
headers=self.vlm_options.headers,
|
docling/models/base_ocr_model.py
CHANGED
@@ -3,14 +3,13 @@ import logging
|
|
3
3
|
from abc import abstractmethod
|
4
4
|
from collections.abc import Iterable
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import List, Optional, Type
|
6
|
+
from typing import TYPE_CHECKING, List, Optional, Type
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
10
10
|
from docling_core.types.doc.page import TextCell
|
11
11
|
from PIL import Image, ImageDraw
|
12
12
|
from rtree import index
|
13
|
-
from scipy.ndimage import binary_dilation, find_objects, label
|
14
13
|
|
15
14
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
16
15
|
from docling.datamodel.base_models import Page
|
@@ -31,11 +30,16 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
|
31
30
|
options: OcrOptions,
|
32
31
|
accelerator_options: AcceleratorOptions,
|
33
32
|
):
|
33
|
+
# Make sure any delay/error from import occurs on ocr model init and not first use
|
34
|
+
from scipy.ndimage import binary_dilation, find_objects, label
|
35
|
+
|
34
36
|
self.enabled = enabled
|
35
37
|
self.options = options
|
36
38
|
|
37
39
|
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
38
40
|
def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
|
41
|
+
from scipy.ndimage import binary_dilation, find_objects, label
|
42
|
+
|
39
43
|
BITMAP_COVERAGE_TRESHOLD = 0.75
|
40
44
|
assert page.size is not None
|
41
45
|
|
@@ -14,7 +14,8 @@ from PIL import Image
|
|
14
14
|
from pydantic import BaseModel
|
15
15
|
|
16
16
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
17
|
-
from docling.
|
17
|
+
from docling.datamodel.base_models import ItemAndImageEnrichmentElement
|
18
|
+
from docling.models.base_model import BaseItemAndImageEnrichmentModel
|
18
19
|
from docling.models.utils.hf_model_download import download_hf_model
|
19
20
|
from docling.utils.accelerator_utils import decide_device
|
20
21
|
|
@@ -32,7 +33,7 @@ class DocumentPictureClassifierOptions(BaseModel):
|
|
32
33
|
kind: Literal["document_picture_classifier"] = "document_picture_classifier"
|
33
34
|
|
34
35
|
|
35
|
-
class DocumentPictureClassifier(
|
36
|
+
class DocumentPictureClassifier(BaseItemAndImageEnrichmentModel):
|
36
37
|
"""
|
37
38
|
A model for classifying pictures in documents.
|
38
39
|
|
@@ -135,7 +136,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|
135
136
|
def __call__(
|
136
137
|
self,
|
137
138
|
doc: DoclingDocument,
|
138
|
-
element_batch: Iterable[
|
139
|
+
element_batch: Iterable[ItemAndImageEnrichmentElement],
|
139
140
|
) -> Iterable[NodeItem]:
|
140
141
|
"""
|
141
142
|
Processes a batch of elements and enriches them with classification predictions.
|
@@ -144,7 +145,7 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|
144
145
|
----------
|
145
146
|
doc : DoclingDocument
|
146
147
|
The document containing the elements to be processed.
|
147
|
-
element_batch : Iterable[
|
148
|
+
element_batch : Iterable[ItemAndImageEnrichmentElement]
|
148
149
|
A batch of pictures to classify.
|
149
150
|
|
150
151
|
Returns
|
@@ -155,22 +156,20 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|
155
156
|
"""
|
156
157
|
if not self.enabled:
|
157
158
|
for element in element_batch:
|
158
|
-
yield element
|
159
|
+
yield element.item
|
159
160
|
return
|
160
161
|
|
161
162
|
images: List[Union[Image.Image, np.ndarray]] = []
|
162
163
|
elements: List[PictureItem] = []
|
163
164
|
for el in element_batch:
|
164
|
-
assert isinstance(el, PictureItem)
|
165
|
-
elements.append(el)
|
166
|
-
|
167
|
-
assert img is not None
|
168
|
-
images.append(img)
|
165
|
+
assert isinstance(el.item, PictureItem)
|
166
|
+
elements.append(el.item)
|
167
|
+
images.append(el.image)
|
169
168
|
|
170
169
|
outputs = self.document_picture_classifier.predict(images)
|
171
170
|
|
172
|
-
for
|
173
|
-
|
171
|
+
for item, output in zip(elements, outputs):
|
172
|
+
item.annotations.append(
|
174
173
|
PictureClassificationData(
|
175
174
|
provenance="DocumentPictureClassifier",
|
176
175
|
predicted_classes=[
|
@@ -183,4 +182,4 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
|
183
182
|
)
|
184
183
|
)
|
185
184
|
|
186
|
-
yield
|
185
|
+
yield item
|
docling/models/layout_model.py
CHANGED
@@ -7,12 +7,13 @@ from typing import Optional
|
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
from docling_core.types.doc import DocItemLabel
|
10
|
-
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
11
10
|
from PIL import Image
|
12
11
|
|
13
12
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
14
13
|
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
15
14
|
from docling.datamodel.document import ConversionResult
|
15
|
+
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2, LayoutModelConfig
|
16
|
+
from docling.datamodel.pipeline_options import LayoutOptions
|
16
17
|
from docling.datamodel.settings import settings
|
17
18
|
from docling.models.base_model import BasePageModel
|
18
19
|
from docling.models.utils.hf_model_download import download_hf_model
|
@@ -25,9 +26,6 @@ _log = logging.getLogger(__name__)
|
|
25
26
|
|
26
27
|
|
27
28
|
class LayoutModel(BasePageModel):
|
28
|
-
_model_repo_folder = "ds4sd--docling-models"
|
29
|
-
_model_path = "model_artifacts/layout"
|
30
|
-
|
31
29
|
TEXT_ELEM_LABELS = [
|
32
30
|
DocItemLabel.TEXT,
|
33
31
|
DocItemLabel.FOOTNOTE,
|
@@ -49,28 +47,38 @@ class LayoutModel(BasePageModel):
|
|
49
47
|
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
|
50
48
|
|
51
49
|
def __init__(
|
52
|
-
self,
|
50
|
+
self,
|
51
|
+
artifacts_path: Optional[Path],
|
52
|
+
accelerator_options: AcceleratorOptions,
|
53
|
+
options: LayoutOptions,
|
53
54
|
):
|
55
|
+
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
56
|
+
|
57
|
+
self.options = options
|
58
|
+
|
54
59
|
device = decide_device(accelerator_options.device)
|
60
|
+
layout_model_config = options.model_spec
|
61
|
+
model_repo_folder = layout_model_config.model_repo_folder
|
62
|
+
model_path = layout_model_config.model_path
|
55
63
|
|
56
64
|
if artifacts_path is None:
|
57
|
-
artifacts_path =
|
65
|
+
artifacts_path = (
|
66
|
+
self.download_models(layout_model_config=layout_model_config)
|
67
|
+
/ model_path
|
68
|
+
)
|
58
69
|
else:
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
artifacts_path / self._model_repo_folder / self._model_path
|
63
|
-
)
|
64
|
-
elif (artifacts_path / self._model_path).exists():
|
70
|
+
if (artifacts_path / model_repo_folder).exists():
|
71
|
+
artifacts_path = artifacts_path / model_repo_folder / model_path
|
72
|
+
elif (artifacts_path / model_path).exists():
|
65
73
|
warnings.warn(
|
66
74
|
"The usage of artifacts_path containing directly "
|
67
|
-
f"{
|
75
|
+
f"{model_path} is deprecated. Please point "
|
68
76
|
"the artifacts_path to the parent containing "
|
69
|
-
f"the {
|
77
|
+
f"the {model_repo_folder} folder.",
|
70
78
|
DeprecationWarning,
|
71
79
|
stacklevel=3,
|
72
80
|
)
|
73
|
-
artifacts_path = artifacts_path /
|
81
|
+
artifacts_path = artifacts_path / model_path
|
74
82
|
|
75
83
|
self.layout_predictor = LayoutPredictor(
|
76
84
|
artifact_path=str(artifacts_path),
|
@@ -83,10 +91,11 @@ class LayoutModel(BasePageModel):
|
|
83
91
|
local_dir: Optional[Path] = None,
|
84
92
|
force: bool = False,
|
85
93
|
progress: bool = False,
|
94
|
+
layout_model_config: LayoutModelConfig = DOCLING_LAYOUT_V2,
|
86
95
|
) -> Path:
|
87
96
|
return download_hf_model(
|
88
|
-
repo_id=
|
89
|
-
revision=
|
97
|
+
repo_id=layout_model_config.repo_id,
|
98
|
+
revision=layout_model_config.revision,
|
90
99
|
local_dir=local_dir,
|
91
100
|
force=force,
|
92
101
|
progress=progress,
|
@@ -176,7 +185,7 @@ class LayoutModel(BasePageModel):
|
|
176
185
|
# Apply postprocessing
|
177
186
|
|
178
187
|
processed_clusters, processed_cells = LayoutPostprocessor(
|
179
|
-
page, clusters
|
188
|
+
page, clusters, self.options
|
180
189
|
).postprocess()
|
181
190
|
# Note: LayoutPostprocessor updates page.cells and page.parsed_page internally
|
182
191
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import threading
|
1
2
|
from collections.abc import Iterable
|
2
3
|
from pathlib import Path
|
3
4
|
from typing import Optional, Type, Union
|
@@ -15,6 +16,9 @@ from docling.models.utils.hf_model_download import (
|
|
15
16
|
)
|
16
17
|
from docling.utils.accelerator_utils import decide_device
|
17
18
|
|
19
|
+
# Global lock for model initialization to prevent threading issues
|
20
|
+
_model_init_lock = threading.Lock()
|
21
|
+
|
18
22
|
|
19
23
|
class PictureDescriptionVlmModel(
|
20
24
|
PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
|
@@ -57,17 +61,18 @@ class PictureDescriptionVlmModel(
|
|
57
61
|
)
|
58
62
|
|
59
63
|
# Initialize processor and model
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
64
|
+
with _model_init_lock:
|
65
|
+
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
66
|
+
self.model = AutoModelForVision2Seq.from_pretrained(
|
67
|
+
artifacts_path,
|
68
|
+
torch_dtype=torch.bfloat16,
|
69
|
+
_attn_implementation=(
|
70
|
+
"flash_attention_2"
|
71
|
+
if self.device.startswith("cuda")
|
72
|
+
and accelerator_options.cuda_use_flash_attention2
|
73
|
+
else "eager"
|
74
|
+
),
|
75
|
+
).to(self.device)
|
71
76
|
|
72
77
|
self.provenance = f"{self.options.repo_id}"
|
73
78
|
|
@@ -1,13 +1,10 @@
|
|
1
|
-
from docling.models.easyocr_model import EasyOcrModel
|
2
|
-
from docling.models.ocr_mac_model import OcrMacModel
|
3
|
-
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
4
|
-
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
5
|
-
from docling.models.rapid_ocr_model import RapidOcrModel
|
6
|
-
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
7
|
-
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
8
|
-
|
9
|
-
|
10
1
|
def ocr_engines():
|
2
|
+
from docling.models.easyocr_model import EasyOcrModel
|
3
|
+
from docling.models.ocr_mac_model import OcrMacModel
|
4
|
+
from docling.models.rapid_ocr_model import RapidOcrModel
|
5
|
+
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
6
|
+
from docling.models.tesseract_ocr_model import TesseractOcrModel
|
7
|
+
|
11
8
|
return {
|
12
9
|
"ocr_engines": [
|
13
10
|
EasyOcrModel,
|
@@ -20,6 +17,9 @@ def ocr_engines():
|
|
20
17
|
|
21
18
|
|
22
19
|
def picture_description():
|
20
|
+
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
21
|
+
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
22
|
+
|
23
23
|
return {
|
24
24
|
"picture_description": [
|
25
25
|
PictureDescriptionVlmModel,
|
@@ -12,6 +12,9 @@ from docling_core.types.doc import (
|
|
12
12
|
TableData,
|
13
13
|
)
|
14
14
|
from docling_core.types.doc.document import ContentLayer
|
15
|
+
from docling_ibm_models.list_item_normalizer.list_marker_processor import (
|
16
|
+
ListItemMarkerProcessor,
|
17
|
+
)
|
15
18
|
from docling_ibm_models.reading_order.reading_order_rb import (
|
16
19
|
PageElement as ReadingOrderPageElement,
|
17
20
|
ReadingOrderPredictor,
|
@@ -40,6 +43,7 @@ class ReadingOrderModel:
|
|
40
43
|
def __init__(self, options: ReadingOrderOptions):
|
41
44
|
self.options = options
|
42
45
|
self.ro_model = ReadingOrderPredictor()
|
46
|
+
self.list_item_processor = ListItemMarkerProcessor()
|
43
47
|
|
44
48
|
def _assembled_to_readingorder_elements(
|
45
49
|
self, conv_res: ConversionResult
|
@@ -92,7 +96,8 @@ class ReadingOrderModel:
|
|
92
96
|
)
|
93
97
|
if c_label == DocItemLabel.LIST_ITEM:
|
94
98
|
# TODO: Infer if this is a numbered or a bullet list item
|
95
|
-
doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
|
99
|
+
l_item = doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
|
100
|
+
self.list_item_processor.process_list_item(l_item)
|
96
101
|
elif c_label == DocItemLabel.SECTION_HEADER:
|
97
102
|
doc.add_heading(parent=doc_item, text=c_text, prov=c_prov)
|
98
103
|
else:
|
@@ -301,6 +306,8 @@ class ReadingOrderModel:
|
|
301
306
|
new_item = out_doc.add_list_item(
|
302
307
|
text=cap_text, enumerated=False, prov=prov, parent=current_list
|
303
308
|
)
|
309
|
+
self.list_item_processor.process_list_item(new_item)
|
310
|
+
|
304
311
|
elif label == DocItemLabel.SECTION_HEADER:
|
305
312
|
current_list = None
|
306
313
|
|
@@ -10,7 +10,6 @@ from docling_core.types.doc.page import (
|
|
10
10
|
BoundingRectangle,
|
11
11
|
TextCellUnit,
|
12
12
|
)
|
13
|
-
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
14
13
|
from PIL import ImageDraw
|
15
14
|
|
16
15
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
@@ -70,6 +69,9 @@ class TableStructureModel(BasePageModel):
|
|
70
69
|
|
71
70
|
# Third Party
|
72
71
|
import docling_ibm_models.tableformer.common as c
|
72
|
+
from docling_ibm_models.tableformer.data_management.tf_predictor import (
|
73
|
+
TFPredictor,
|
74
|
+
)
|
73
75
|
|
74
76
|
device = decide_device(accelerator_options.device)
|
75
77
|
|
@@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel):
|
|
144
144
|
|
145
145
|
local_reader = self.reader
|
146
146
|
self.osd_reader.SetImage(high_res_image)
|
147
|
+
|
148
|
+
doc_orientation = 0
|
147
149
|
osd = self.osd_reader.DetectOrientationScript()
|
150
|
+
|
148
151
|
# No text, or Orientation and Script detection failure
|
149
152
|
if osd is None:
|
150
153
|
_log.error(
|
@@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel):
|
|
158
161
|
# to OCR in the hope OCR will succeed while OSD failed
|
159
162
|
if self._is_auto:
|
160
163
|
continue
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
-doc_orientation, expand=True
|
164
|
+
else:
|
165
|
+
doc_orientation = parse_tesseract_orientation(
|
166
|
+
osd["orient_deg"]
|
165
167
|
)
|
168
|
+
if doc_orientation != 0:
|
169
|
+
high_res_image = high_res_image.rotate(
|
170
|
+
-doc_orientation, expand=True
|
171
|
+
)
|
166
172
|
if self._is_auto:
|
167
173
|
script = osd["script_name"]
|
168
174
|
script = map_tesseract_script(script)
|
@@ -13,6 +13,7 @@ from docling.datamodel.document import ConversionResult
|
|
13
13
|
from docling.datamodel.pipeline_options_vlm_model import (
|
14
14
|
InlineVlmOptions,
|
15
15
|
TransformersModelType,
|
16
|
+
TransformersPromptStyle,
|
16
17
|
)
|
17
18
|
from docling.models.base_model import BasePageModel
|
18
19
|
from docling.models.utils.hf_model_download import (
|
@@ -41,6 +42,7 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
|
|
41
42
|
from transformers import (
|
42
43
|
AutoModel,
|
43
44
|
AutoModelForCausalLM,
|
45
|
+
AutoModelForImageTextToText,
|
44
46
|
AutoModelForVision2Seq,
|
45
47
|
AutoProcessor,
|
46
48
|
BitsAndBytesConfig,
|
@@ -91,6 +93,11 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
|
|
91
93
|
== TransformersModelType.AUTOMODEL_VISION2SEQ
|
92
94
|
):
|
93
95
|
model_cls = AutoModelForVision2Seq
|
96
|
+
elif (
|
97
|
+
self.vlm_options.transformers_model_type
|
98
|
+
== TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT
|
99
|
+
):
|
100
|
+
model_cls = AutoModelForImageTextToText
|
94
101
|
|
95
102
|
self.processor = AutoProcessor.from_pretrained(
|
96
103
|
artifacts_path,
|
@@ -128,7 +135,11 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
|
|
128
135
|
)
|
129
136
|
|
130
137
|
# Define prompt structure
|
131
|
-
|
138
|
+
if callable(self.vlm_options.prompt):
|
139
|
+
user_prompt = self.vlm_options.prompt(page.parsed_page)
|
140
|
+
else:
|
141
|
+
user_prompt = self.vlm_options.prompt
|
142
|
+
prompt = self.formulate_prompt(user_prompt)
|
132
143
|
|
133
144
|
inputs = self.processor(
|
134
145
|
text=prompt, images=[hi_res_image], return_tensors="pt"
|
@@ -162,10 +173,13 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
|
|
162
173
|
|
163
174
|
yield page
|
164
175
|
|
165
|
-
def formulate_prompt(self) -> str:
|
176
|
+
def formulate_prompt(self, user_prompt: str) -> str:
|
166
177
|
"""Formulate a prompt for the VLM."""
|
167
178
|
|
168
|
-
if self.vlm_options.
|
179
|
+
if self.vlm_options.transformers_prompt_style == TransformersPromptStyle.RAW:
|
180
|
+
return user_prompt
|
181
|
+
|
182
|
+
elif self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct":
|
169
183
|
_log.debug("Using specialized prompt for Phi-4")
|
170
184
|
# more info here: https://huggingface.co/microsoft/Phi-4-multimodal-instruct#loading-the-model-locally
|
171
185
|
|
@@ -173,25 +187,30 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
|
|
173
187
|
assistant_prompt = "<|assistant|>"
|
174
188
|
prompt_suffix = "<|end|>"
|
175
189
|
|
176
|
-
prompt = f"{user_prompt}<|image_1|>{
|
190
|
+
prompt = f"{user_prompt}<|image_1|>{user_prompt}{prompt_suffix}{assistant_prompt}"
|
177
191
|
_log.debug(f"prompt for {self.vlm_options.repo_id}: {prompt}")
|
178
192
|
|
179
193
|
return prompt
|
180
194
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
195
|
+
elif self.vlm_options.transformers_prompt_style == TransformersPromptStyle.CHAT:
|
196
|
+
messages = [
|
197
|
+
{
|
198
|
+
"role": "user",
|
199
|
+
"content": [
|
200
|
+
{
|
201
|
+
"type": "text",
|
202
|
+
"text": "This is a page from a document.",
|
203
|
+
},
|
204
|
+
{"type": "image"},
|
205
|
+
{"type": "text", "text": user_prompt},
|
206
|
+
],
|
207
|
+
}
|
208
|
+
]
|
209
|
+
prompt = self.processor.apply_chat_template(
|
210
|
+
messages, add_generation_prompt=False
|
211
|
+
)
|
212
|
+
return prompt
|
213
|
+
|
214
|
+
raise RuntimeError(
|
215
|
+
f"Uknown prompt style `{self.vlm_options.transformers_prompt_style}`. Valid values are {', '.join(s.value for s in TransformersPromptStyle)}."
|
196
216
|
)
|
197
|
-
return prompt
|
@@ -56,8 +56,6 @@ class HuggingFaceMlxModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
|
56
56
|
elif (artifacts_path / repo_cache_folder).exists():
|
57
57
|
artifacts_path = artifacts_path / repo_cache_folder
|
58
58
|
|
59
|
-
self.param_question = vlm_options.prompt
|
60
|
-
|
61
59
|
## Load the model
|
62
60
|
self.vlm_model, self.processor = load(artifacts_path)
|
63
61
|
self.config = load_config(artifacts_path)
|
@@ -86,8 +84,12 @@ class HuggingFaceMlxModel(BasePageModel, HuggingFaceModelDownloadMixin):
|
|
86
84
|
if hi_res_image.mode != "RGB":
|
87
85
|
hi_res_image = hi_res_image.convert("RGB")
|
88
86
|
|
87
|
+
if callable(self.vlm_options.prompt):
|
88
|
+
user_prompt = self.vlm_options.prompt(page.parsed_page)
|
89
|
+
else:
|
90
|
+
user_prompt = self.vlm_options.prompt
|
89
91
|
prompt = self.apply_chat_template(
|
90
|
-
self.processor, self.config,
|
92
|
+
self.processor, self.config, user_prompt, num_images=1
|
91
93
|
)
|
92
94
|
|
93
95
|
start_time = time.time()
|
@@ -10,6 +10,7 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
|
|
10
10
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
11
11
|
from docling.datamodel.base_models import AssembledUnit, Page
|
12
12
|
from docling.datamodel.document import ConversionResult
|
13
|
+
from docling.datamodel.layout_model_specs import LayoutModelConfig
|
13
14
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
14
15
|
from docling.datamodel.settings import settings
|
15
16
|
from docling.models.base_ocr_model import BaseOcrModel
|
@@ -36,9 +37,6 @@ _log = logging.getLogger(__name__)
|
|
36
37
|
|
37
38
|
|
38
39
|
class StandardPdfPipeline(PaginatedPipeline):
|
39
|
-
_layout_model_path = LayoutModel._model_path
|
40
|
-
_table_model_path = TableStructureModel._model_path
|
41
|
-
|
42
40
|
def __init__(self, pipeline_options: PdfPipelineOptions):
|
43
41
|
super().__init__(pipeline_options)
|
44
42
|
self.pipeline_options: PdfPipelineOptions
|
@@ -80,6 +78,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
80
78
|
LayoutModel(
|
81
79
|
artifacts_path=artifacts_path,
|
82
80
|
accelerator_options=pipeline_options.accelerator_options,
|
81
|
+
options=pipeline_options.layout_options,
|
83
82
|
),
|
84
83
|
# Table structure model
|
85
84
|
TableStructureModel(
|
@@ -128,6 +127,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|
128
127
|
if (
|
129
128
|
self.pipeline_options.do_formula_enrichment
|
130
129
|
or self.pipeline_options.do_code_enrichment
|
130
|
+
or self.pipeline_options.do_picture_classification
|
131
131
|
or self.pipeline_options.do_picture_description
|
132
132
|
):
|
133
133
|
self.keep_backend = True
|
docling/pipeline/vlm_pipeline.py
CHANGED
@@ -117,6 +117,7 @@ class VlmPipeline(PaginatedPipeline):
|
|
117
117
|
page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
|
118
118
|
if page._backend is not None and page._backend.is_valid():
|
119
119
|
page.size = page._backend.get_size()
|
120
|
+
page.parsed_page = page._backend.get_segmented_page()
|
120
121
|
|
121
122
|
return page
|
122
123
|
|
@@ -1,8 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
-
import torch
|
5
|
-
|
6
4
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
7
5
|
|
8
6
|
_log = logging.getLogger(__name__)
|
@@ -18,6 +16,8 @@ def decide_device(
|
|
18
16
|
1. AUTO: Check for the best available device on the system.
|
19
17
|
2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
|
20
18
|
"""
|
19
|
+
import torch
|
20
|
+
|
21
21
|
device = "cpu"
|
22
22
|
|
23
23
|
has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
|
@@ -9,6 +9,7 @@ from docling_core.types.doc.page import TextCell
|
|
9
9
|
from rtree import index
|
10
10
|
|
11
11
|
from docling.datamodel.base_models import BoundingBox, Cluster, Page
|
12
|
+
from docling.datamodel.pipeline_options import LayoutOptions
|
12
13
|
|
13
14
|
_log = logging.getLogger(__name__)
|
14
15
|
|
@@ -194,12 +195,16 @@ class LayoutPostprocessor:
|
|
194
195
|
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
|
195
196
|
}
|
196
197
|
|
197
|
-
def __init__(
|
198
|
+
def __init__(
|
199
|
+
self, page: Page, clusters: List[Cluster], options: LayoutOptions
|
200
|
+
) -> None:
|
198
201
|
"""Initialize processor with page and clusters."""
|
202
|
+
|
199
203
|
self.cells = page.cells
|
200
204
|
self.page = page
|
201
205
|
self.page_size = page.size
|
202
206
|
self.all_clusters = clusters
|
207
|
+
self.options = options
|
203
208
|
self.regular_clusters = [
|
204
209
|
c for c in clusters if c.label not in self.SPECIAL_TYPES
|
205
210
|
]
|
@@ -267,7 +272,7 @@ class LayoutPostprocessor:
|
|
267
272
|
|
268
273
|
# Handle orphaned cells
|
269
274
|
unassigned = self._find_unassigned_cells(clusters)
|
270
|
-
if unassigned:
|
275
|
+
if unassigned and self.options.create_orphan_clusters:
|
271
276
|
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
272
277
|
orphan_clusters = []
|
273
278
|
for i, cell in enumerate(unassigned):
|
@@ -2,6 +2,7 @@ import logging
|
|
2
2
|
from pathlib import Path
|
3
3
|
from typing import Optional
|
4
4
|
|
5
|
+
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
|
5
6
|
from docling.datamodel.pipeline_options import (
|
6
7
|
granite_picture_description,
|
7
8
|
smolvlm_picture_description,
|
@@ -46,7 +47,7 @@ def download_models(
|
|
46
47
|
if with_layout:
|
47
48
|
_log.info("Downloading layout model...")
|
48
49
|
LayoutModel.download_models(
|
49
|
-
local_dir=output_dir /
|
50
|
+
local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
|
50
51
|
force=force,
|
51
52
|
progress=progress,
|
52
53
|
)
|
docling/utils/ocr_utils.py
CHANGED
@@ -41,7 +41,7 @@ def tesseract_box_to_bounding_rectangle(
|
|
41
41
|
im_size: Tuple[int, int],
|
42
42
|
) -> BoundingRectangle:
|
43
43
|
# box is in the top, left, height, width format, top left coordinates
|
44
|
-
rect = rotate_bounding_box(bbox, angle
|
44
|
+
rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size)
|
45
45
|
rect = BoundingRectangle(
|
46
46
|
r_x0=rect.r_x0 / scale,
|
47
47
|
r_y0=rect.r_y0 / scale,
|
docling/utils/orientation.py
CHANGED
@@ -14,43 +14,36 @@ def rotate_bounding_box(
|
|
14
14
|
# coordinate system. Then other corners are found rotating counterclockwise
|
15
15
|
bbox = bbox.to_top_left_origin(im_size[1])
|
16
16
|
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
|
17
|
-
|
17
|
+
im_w, im_h = im_size
|
18
18
|
angle = angle % 360
|
19
19
|
if angle == 0:
|
20
|
-
|
21
|
-
r_y0 = top + height
|
22
|
-
r_x1 = r_x0 + width
|
23
|
-
r_y1 = r_y0
|
24
|
-
r_x2 = r_x0 + width
|
25
|
-
r_y2 = r_y0 - height
|
26
|
-
r_x3 = r_x0
|
27
|
-
r_y3 = r_y0 - height
|
20
|
+
return BoundingRectangle.from_bounding_box(bbox)
|
28
21
|
elif angle == 90:
|
29
|
-
r_x0 =
|
30
|
-
r_y0 = left
|
22
|
+
r_x0 = top + height
|
23
|
+
r_y0 = im_w - left
|
31
24
|
r_x1 = r_x0
|
32
|
-
r_y1 = r_y0
|
33
|
-
r_x2 =
|
34
|
-
r_y2 =
|
35
|
-
r_x3 =
|
36
|
-
r_y3 = r_y0
|
25
|
+
r_y1 = r_y0 - width
|
26
|
+
r_x2 = r_x1 - height
|
27
|
+
r_y2 = r_y1
|
28
|
+
r_x3 = r_x2
|
29
|
+
r_y3 = r_y0
|
37
30
|
elif angle == 180:
|
38
|
-
r_x0 =
|
39
|
-
r_y0 =
|
31
|
+
r_x0 = im_w - left
|
32
|
+
r_y0 = im_h - (top + height)
|
40
33
|
r_x1 = r_x0 - width
|
41
34
|
r_y1 = r_y0
|
42
|
-
r_x2 =
|
43
|
-
r_y2 =
|
35
|
+
r_x2 = r_x1
|
36
|
+
r_y2 = r_y1 + height
|
44
37
|
r_x3 = r_x0
|
45
|
-
r_y3 =
|
38
|
+
r_y3 = r_y2
|
46
39
|
elif angle == 270:
|
47
|
-
r_x0 = top + height
|
48
|
-
r_y0 =
|
40
|
+
r_x0 = im_h - (top + height)
|
41
|
+
r_y0 = left
|
49
42
|
r_x1 = r_x0
|
50
|
-
r_y1 = r_y0
|
51
|
-
r_x2 =
|
52
|
-
r_y2 =
|
53
|
-
r_x3 =
|
43
|
+
r_y1 = r_y0 + width
|
44
|
+
r_x2 = r_x1 + height
|
45
|
+
r_y2 = r_y1
|
46
|
+
r_x3 = r_x2
|
54
47
|
r_y3 = r_y0
|
55
48
|
else:
|
56
49
|
msg = (
|
@@ -58,7 +51,7 @@ def rotate_bounding_box(
|
|
58
51
|
f" {sorted(CLIPPED_ORIENTATIONS)}"
|
59
52
|
)
|
60
53
|
raise ValueError(msg)
|
61
|
-
|
54
|
+
rectangle = BoundingRectangle(
|
62
55
|
r_x0=r_x0,
|
63
56
|
r_y0=r_y0,
|
64
57
|
r_x1=r_x1,
|
@@ -69,3 +62,4 @@ def rotate_bounding_box(
|
|
69
62
|
r_y3=r_y3,
|
70
63
|
coord_origin=CoordOrigin.TOPLEFT,
|
71
64
|
)
|
65
|
+
return rectangle
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.41.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -26,9 +26,9 @@ Requires-Python: <4.0,>=3.9
|
|
26
26
|
Description-Content-Type: text/markdown
|
27
27
|
License-File: LICENSE
|
28
28
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
29
|
-
Requires-Dist: docling-core[chunking]<3.0.0,>=2.
|
30
|
-
Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
|
29
|
+
Requires-Dist: docling-core[chunking]<3.0.0,>=2.42.0
|
31
30
|
Requires-Dist: docling-parse<5.0.0,>=4.0.0
|
31
|
+
Requires-Dist: docling-ibm-models<4,>=3.6.0
|
32
32
|
Requires-Dist: filetype<2.0.0,>=1.2.0
|
33
33
|
Requires-Dist: pypdfium2<5.0.0,>=4.30.0
|
34
34
|
Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
|
@@ -57,12 +57,12 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
|
|
57
57
|
Provides-Extra: vlm
|
58
58
|
Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
|
59
59
|
Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
|
60
|
-
Requires-Dist: mlx-vlm
|
60
|
+
Requires-Dist: mlx-vlm<0.2,>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
61
61
|
Provides-Extra: rapidocr
|
62
62
|
Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
|
63
63
|
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
64
64
|
Provides-Extra: asr
|
65
|
-
Requires-Dist: openai-whisper>=
|
65
|
+
Requires-Dist: openai-whisper>=20250625; extra == "asr"
|
66
66
|
Dynamic: license-file
|
67
67
|
|
68
68
|
<p align="center">
|
@@ -8,10 +8,10 @@ docling/backend/asciidoc_backend.py,sha256=RDNLrPJHxROiM7-NQdZn3DdvAyiPAndbSWcZo
|
|
8
8
|
docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
|
9
9
|
docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3Uea00XrLluTg,7918
|
10
10
|
docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
|
11
|
-
docling/backend/docling_parse_v4_backend.py,sha256=
|
11
|
+
docling/backend/docling_parse_v4_backend.py,sha256=qR_WRVq9JGtRioWCw6MnLWgbvXbC6Y1yds7Ol1-E6UQ,6550
|
12
12
|
docling/backend/html_backend.py,sha256=Z959dzqYQO2pPE4xgPRxC5MR9j3nFGtiD6_F_osQ2iI,20670
|
13
13
|
docling/backend/md_backend.py,sha256=mfwGj8g2hGC-Q_HREtl_Web65uMVXD-Ie1nRqWTXzF0,21013
|
14
|
-
docling/backend/msexcel_backend.py,sha256=
|
14
|
+
docling/backend/msexcel_backend.py,sha256=cq8MQ2RSh6pqCiVrldjOerSww7dOPTWmCQoCBI57i6w,18579
|
15
15
|
docling/backend/mspowerpoint_backend.py,sha256=wJgB2JStEPfD7MPpWQlpPN7bffPxaHFUnKD4wj8SLxU,15114
|
16
16
|
docling/backend/msword_backend.py,sha256=7mzPCF4bGWZPst5ntoV3aSxH5WUu2nBP-l8lgQT3tdw,44544
|
17
17
|
docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
|
@@ -33,66 +33,67 @@ docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
|
|
33
33
|
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
34
34
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
35
|
docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
|
36
|
-
docling/datamodel/asr_model_specs.py,sha256=
|
37
|
-
docling/datamodel/base_models.py,sha256=
|
36
|
+
docling/datamodel/asr_model_specs.py,sha256=Wg7z3zm_wXIWu122iPVy0RMECsA_JCFHrlFF-xxHoVQ,2187
|
37
|
+
docling/datamodel/base_models.py,sha256=9FslHkGUNmBp264LpLL_2JTfDAdaikldYs3SiQOHb5A,11828
|
38
38
|
docling/datamodel/document.py,sha256=CA_dgt4V_phze5HXpfgfKNBKd1cPC1o3WE_IENX63EM,16252
|
39
|
-
docling/datamodel/
|
39
|
+
docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
|
40
|
+
docling/datamodel/pipeline_options.py,sha256=aMwpbyEMbAC-xGJnjQp8iw2ocpSU4eiD8D73gHf7T4U,10033
|
40
41
|
docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
|
41
|
-
docling/datamodel/pipeline_options_vlm_model.py,sha256=
|
42
|
+
docling/datamodel/pipeline_options_vlm_model.py,sha256=z-pUqwRA8nJp6C3SEXZLem2zvSYdgavaAVYa8wkAIZY,2400
|
42
43
|
docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
|
43
44
|
docling/datamodel/vlm_model_specs.py,sha256=--jZexGeu-s_lWp7y_WwWEf6CD1J4XqADrS1-OY_pWM,4737
|
44
45
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
-
docling/models/api_vlm_model.py,sha256=
|
46
|
+
docling/models/api_vlm_model.py,sha256=foBvzaWeHFH1t-VdvRWLdiXiiofhvhjvHqRI0eNA_3w,2923
|
46
47
|
docling/models/base_model.py,sha256=NNjIapqCruAEAWR-CCdsNgXc2QkwiPYAcaQ_ZYe1W28,2978
|
47
|
-
docling/models/base_ocr_model.py,sha256=
|
48
|
+
docling/models/base_ocr_model.py,sha256=kT8TylASOpPlY60rIG6VL6_eLVsfg5KvEVnZHzDWtR0,8193
|
48
49
|
docling/models/code_formula_model.py,sha256=5uWh-eI-Ejmv3DujKJoKKgJBuvPLokt7AJ_ybt8VHEw,11373
|
49
|
-
docling/models/document_picture_classifier.py,sha256=
|
50
|
+
docling/models/document_picture_classifier.py,sha256=9JvoWeH5uQBC7levjM8zptk7UT-b8EQnD-2EnxTjTT4,6202
|
50
51
|
docling/models/easyocr_model.py,sha256=ECPBd-48cCw5s935NsPJO_C_1QuK_yAUGloMM77WqIM,7387
|
51
|
-
docling/models/layout_model.py,sha256=
|
52
|
+
docling/models/layout_model.py,sha256=8bfLVKCS2A-ePTQK-T4M2K_Ah-jUVj71YOtwZvZ9rsU,8825
|
52
53
|
docling/models/ocr_mac_model.py,sha256=y-1DSFDbACHpEwNTfQwzN9ab8r5j5rBFNPtQ48BzsrA,5396
|
53
54
|
docling/models/page_assemble_model.py,sha256=TvN1naez7dUodLxpUUBzpuMCpqZBTf6YSpewxgjzmrg,6323
|
54
55
|
docling/models/page_preprocessing_model.py,sha256=x8MI4mvjizqEqAb5511dtrNRCJSb-lSmwHw0tmHPFiI,5103
|
55
56
|
docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCrS_btclO_ZCLAUqrfl0,2377
|
56
57
|
docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
|
57
|
-
docling/models/picture_description_vlm_model.py,sha256=
|
58
|
+
docling/models/picture_description_vlm_model.py,sha256=nAUt-eZOX2GvaCiV2BJO7VppxUbP7udVIF4oe_sEYXo,4000
|
58
59
|
docling/models/rapid_ocr_model.py,sha256=AMdc66s_iWO4p6nQ0LNjQMUYVxrDSxMyLNPpjPYt6N8,5916
|
59
|
-
docling/models/readingorder_model.py,sha256=
|
60
|
-
docling/models/table_structure_model.py,sha256=
|
60
|
+
docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
|
61
|
+
docling/models/table_structure_model.py,sha256=RFXo73f2q4XuKyaSqbxpznh7JVtlLcT0FsOWl9oZbSg,12518
|
61
62
|
docling/models/tesseract_ocr_cli_model.py,sha256=qcM3-n7Z_dm1CGBhVUcNr2XT41iXnU32zk4RqKHBl9I,12775
|
62
|
-
docling/models/tesseract_ocr_model.py,sha256=
|
63
|
+
docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
|
63
64
|
docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
|
64
65
|
docling/models/factories/base_factory.py,sha256=MfWIljMETi5aaVR-6qLTelW8u1gwDAQsOwg3fu7O4Qc,4028
|
65
66
|
docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
|
66
67
|
docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
|
67
68
|
docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
|
-
docling/models/plugins/defaults.py,sha256=
|
69
|
+
docling/models/plugins/defaults.py,sha256=OAHWW2tCcUXSyDMFxV_lXVRjSBJ1n6z-Eb3R8cDucU4,886
|
69
70
|
docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
71
|
docling/models/utils/hf_model_download.py,sha256=scBEfsM4yl7xPzqe7UtPvDh9RfQZQnuOhqQKilYBHls,984
|
71
72
|
docling/models/vlm_models_inline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
|
-
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=
|
73
|
-
docling/models/vlm_models_inline/mlx_model.py,sha256=
|
73
|
+
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=LAnWFIHGblWln6DQMLtCQQW3-YUPDMbgeD2tjfM8vLM,8415
|
74
|
+
docling/models/vlm_models_inline/mlx_model.py,sha256=p-H6wG31iVRoOjsqYaCVa4pEzxMP3vzLcsUatMjDJDQ,5948
|
74
75
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
76
|
docling/pipeline/asr_pipeline.py,sha256=tQkhu9fXdkSuYIL22xzV2YRUlQh-9qktHBbs2qeXhJI,9070
|
76
77
|
docling/pipeline/base_pipeline.py,sha256=14yQrDjsojl4RgbBjKFSEfVBYR_sULZfBI1uDzFLi8Y,9331
|
77
78
|
docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
|
78
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=
|
79
|
-
docling/pipeline/vlm_pipeline.py,sha256=
|
79
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=yFishq4Cu01BiBGHk3Irr7ogcTQKeSC0QZImQVAhIaY,12740
|
80
|
+
docling/pipeline/vlm_pipeline.py,sha256=0lj8tbXNpYF8OLBoLqP2BZfFpTHi40RoHVfvO_Nah4Q,15349
|
80
81
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
|
-
docling/utils/accelerator_utils.py,sha256=
|
82
|
+
docling/utils/accelerator_utils.py,sha256=DSajLxVx1JEVT0zt5de26llciLNlVfIDfSa2zYCFJzQ,2909
|
82
83
|
docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
|
83
84
|
docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
|
84
85
|
docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
|
85
|
-
docling/utils/layout_postprocessor.py,sha256=
|
86
|
+
docling/utils/layout_postprocessor.py,sha256=QuTZZq4LNs1eM_n_2gubVfAuLBMkJiozfs3hp-jUpK4,24399
|
86
87
|
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
87
|
-
docling/utils/model_downloader.py,sha256=
|
88
|
-
docling/utils/ocr_utils.py,sha256=
|
89
|
-
docling/utils/orientation.py,sha256=
|
88
|
+
docling/utils/model_downloader.py,sha256=3vijCsAIVwWqehGBDRxRq7mJ3yRb9-zBsG00iqjqegU,4076
|
89
|
+
docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
|
90
|
+
docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,1842
|
90
91
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
91
92
|
docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
|
92
93
|
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
93
|
-
docling-2.
|
94
|
-
docling-2.
|
95
|
-
docling-2.
|
96
|
-
docling-2.
|
97
|
-
docling-2.
|
98
|
-
docling-2.
|
94
|
+
docling-2.41.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
95
|
+
docling-2.41.0.dist-info/METADATA,sha256=KYqB0miKX2x2ESNy8tNHdAlyTCONqhwGLR2iag2PcQ0,10274
|
96
|
+
docling-2.41.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
97
|
+
docling-2.41.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
|
98
|
+
docling-2.41.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
|
99
|
+
docling-2.41.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|