docling 2.49.0__py3-none-any.whl → 2.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/html_backend.py +3 -2
- docling/datamodel/pipeline_options.py +1 -1
- docling/models/layout_model.py +3 -3
- docling/models/page_preprocessing_model.py +1 -1
- docling/models/table_structure_model.py +1 -1
- docling/utils/model_downloader.py +2 -1
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/METADATA +2 -2
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/RECORD +12 -12
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/WHEEL +0 -0
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/entry_points.txt +0 -0
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/licenses/LICENSE +0 -0
- {docling-2.49.0.dist-info → docling-2.50.0.dist-info}/top_level.txt +0 -0
docling/backend/html_backend.py
CHANGED
@@ -467,13 +467,14 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|
467
467
|
|
468
468
|
@contextmanager
|
469
469
|
def _use_hyperlink(self, tag: Tag):
|
470
|
+
old_hyperlink: Union[AnyUrl, Path, None] = None
|
471
|
+
new_hyperlink: Union[AnyUrl, Path, None] = None
|
470
472
|
this_href = tag.get("href")
|
471
473
|
if this_href is None:
|
472
474
|
yield None
|
473
475
|
else:
|
474
476
|
if isinstance(this_href, str) and this_href:
|
475
|
-
old_hyperlink
|
476
|
-
new_hyperlink: Union[AnyUrl, Path, None] = None
|
477
|
+
old_hyperlink = self.hyperlink
|
477
478
|
if self.original_url is not None:
|
478
479
|
this_href = urljoin(str(self.original_url), str(this_href))
|
479
480
|
# ugly fix for relative links since pydantic does not support them.
|
@@ -283,10 +283,10 @@ class LayoutOptions(BaseModel):
|
|
283
283
|
keep_empty_clusters: bool = (
|
284
284
|
False # Whether to keep clusters that contain no text cells
|
285
285
|
)
|
286
|
+
model_spec: LayoutModelConfig = DOCLING_LAYOUT_HERON
|
286
287
|
skip_cell_assignment: bool = (
|
287
288
|
False # Skip cell-to-cluster assignment for VLM-only processing
|
288
289
|
)
|
289
|
-
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
290
290
|
|
291
291
|
|
292
292
|
class AsrPipelineOptions(PipelineOptions):
|
docling/models/layout_model.py
CHANGED
@@ -91,7 +91,7 @@ class LayoutModel(BasePageModel):
|
|
91
91
|
local_dir: Optional[Path] = None,
|
92
92
|
force: bool = False,
|
93
93
|
progress: bool = False,
|
94
|
-
layout_model_config: LayoutModelConfig =
|
94
|
+
layout_model_config: LayoutModelConfig = LayoutOptions().model_spec, # use default
|
95
95
|
) -> Path:
|
96
96
|
return download_hf_model(
|
97
97
|
repo_id=layout_model_config.repo_id,
|
@@ -122,8 +122,8 @@ class LayoutModel(BasePageModel):
|
|
122
122
|
left_clusters = [c for c in clusters if c.label not in exclude_labels]
|
123
123
|
right_clusters = [c for c in clusters if c.label in exclude_labels]
|
124
124
|
# Create a deep copy of the original image for both sides
|
125
|
-
left_image =
|
126
|
-
right_image =
|
125
|
+
left_image = page.image.copy()
|
126
|
+
right_image = page.image.copy()
|
127
127
|
|
128
128
|
# Draw clusters on both images
|
129
129
|
draw_clusters(left_image, left_clusters, scale_x, scale_y)
|
@@ -90,7 +90,7 @@ class PagePreprocessingModel(BasePageModel):
|
|
90
90
|
|
91
91
|
# DEBUG code:
|
92
92
|
def draw_text_boxes(image, cells, show: bool = False):
|
93
|
-
draw = ImageDraw.Draw(image)
|
93
|
+
draw = ImageDraw.Draw(image.copy())
|
94
94
|
for c in cells:
|
95
95
|
x0, y0, x1, y1 = (
|
96
96
|
c.to_bounding_box().l,
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
4
4
|
|
5
5
|
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
|
6
6
|
from docling.datamodel.pipeline_options import (
|
7
|
+
LayoutOptions,
|
7
8
|
granite_picture_description,
|
8
9
|
smolvlm_picture_description,
|
9
10
|
)
|
@@ -47,7 +48,7 @@ def download_models(
|
|
47
48
|
if with_layout:
|
48
49
|
_log.info("Downloading layout model...")
|
49
50
|
LayoutModel.download_models(
|
50
|
-
local_dir=output_dir /
|
51
|
+
local_dir=output_dir / LayoutOptions().model_spec.model_repo_folder,
|
51
52
|
force=force,
|
52
53
|
progress=progress,
|
53
54
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.50.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
6
6
|
License-Expression: MIT
|
@@ -28,7 +28,7 @@ License-File: LICENSE
|
|
28
28
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
29
29
|
Requires-Dist: docling-core[chunking]<3.0.0,>=2.42.0
|
30
30
|
Requires-Dist: docling-parse<5.0.0,>=4.2.2
|
31
|
-
Requires-Dist: docling-ibm-models<4,>=3.9.
|
31
|
+
Requires-Dist: docling-ibm-models<4,>=3.9.1
|
32
32
|
Requires-Dist: filetype<2.0.0,>=1.2.0
|
33
33
|
Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
|
34
34
|
Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
|
@@ -10,7 +10,7 @@ docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE
|
|
10
10
|
docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3Uea00XrLluTg,7918
|
11
11
|
docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
|
12
12
|
docling/backend/docling_parse_v4_backend.py,sha256=MbCMxNGmoW4iuev9tX1Vt4jtIeak2kC9Uac3xQSRxeo,7509
|
13
|
-
docling/backend/html_backend.py,sha256=
|
13
|
+
docling/backend/html_backend.py,sha256=7I3BQSmC7P47jpzXHt3OuPNhtVedJiZVEjjLykyx5pY,42245
|
14
14
|
docling/backend/md_backend.py,sha256=qCI7SD9hnWWGrkG_drpzQv2Z7DVBG4Tsq3hhTsYV790,22562
|
15
15
|
docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
|
16
16
|
docling/backend/msexcel_backend.py,sha256=5JRbPwOjR1r45AMeIts1rj6InbOgLBf_CtAhvNPVmsQ,19157
|
@@ -40,7 +40,7 @@ docling/datamodel/base_models.py,sha256=vOt895z0GsFirHkkI3hM23e9oyUuz9RXfcGFtoIN
|
|
40
40
|
docling/datamodel/document.py,sha256=ElY7G6FYJ6Bayyw433_tbnxyE47fnQRoBG_mygvOBrA,17370
|
41
41
|
docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQgoQ,1323
|
42
42
|
docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
|
43
|
-
docling/datamodel/pipeline_options.py,sha256=
|
43
|
+
docling/datamodel/pipeline_options.py,sha256=IkbBJGQjZ9nrxN9qN6L0KBLnf1F3BBg3tfCMWPmx0cQ,10966
|
44
44
|
docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
|
45
45
|
docling/datamodel/pipeline_options_vlm_model.py,sha256=AcqqThSW74hwQ6x7pazzm57LnJiUqB7gQi5wFayGlbk,2628
|
46
46
|
docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
|
@@ -52,16 +52,16 @@ docling/models/base_ocr_model.py,sha256=kT8TylASOpPlY60rIG6VL6_eLVsfg5KvEVnZHzDW
|
|
52
52
|
docling/models/code_formula_model.py,sha256=XRugm4EwifLRc-TrAk-glKlktJP-nAPneKh2EOovkJU,11308
|
53
53
|
docling/models/document_picture_classifier.py,sha256=9JvoWeH5uQBC7levjM8zptk7UT-b8EQnD-2EnxTjTT4,6202
|
54
54
|
docling/models/easyocr_model.py,sha256=ECPBd-48cCw5s935NsPJO_C_1QuK_yAUGloMM77WqIM,7387
|
55
|
-
docling/models/layout_model.py,sha256=
|
55
|
+
docling/models/layout_model.py,sha256=2D7Ey2Mvtzyq9KARIFLaUZKzsR661h7Zggwn0IM9R3c,9154
|
56
56
|
docling/models/ocr_mac_model.py,sha256=y-1DSFDbACHpEwNTfQwzN9ab8r5j5rBFNPtQ48BzsrA,5396
|
57
57
|
docling/models/page_assemble_model.py,sha256=TvN1naez7dUodLxpUUBzpuMCpqZBTf6YSpewxgjzmrg,6323
|
58
|
-
docling/models/page_preprocessing_model.py,sha256=
|
58
|
+
docling/models/page_preprocessing_model.py,sha256=EmusNexws5ZmR93js_saVU0BedqZ_HIHQeY7lcf52tI,5284
|
59
59
|
docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCrS_btclO_ZCLAUqrfl0,2377
|
60
60
|
docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
|
61
61
|
docling/models/picture_description_vlm_model.py,sha256=5BJvaF3PHuL9lCVYqPv9krh3h_7YwNSdKYw1EVEj13k,4156
|
62
62
|
docling/models/rapid_ocr_model.py,sha256=7yZC7I1qoC9xC8xJIjTk2c8VFm89RfB6Vr7IDOnr5gs,7102
|
63
63
|
docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
|
64
|
-
docling/models/table_structure_model.py,sha256=
|
64
|
+
docling/models/table_structure_model.py,sha256=7vO8LisdoqCTsY8X8lsk9d-oD2hVjUtdaWlkMTQxEg0,12518
|
65
65
|
docling/models/tesseract_ocr_cli_model.py,sha256=I3Gn28Y-LD8OfvyCElN9fLiNgpo2sT0uMkVt258253s,12881
|
66
66
|
docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
|
67
67
|
docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
|
@@ -93,15 +93,15 @@ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
|
|
93
93
|
docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
|
94
94
|
docling/utils/layout_postprocessor.py,sha256=sE9UR3Nv4iOk26uoIsN3bFioE7ScfAjj0orDBDneLXg,25166
|
95
95
|
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
96
|
-
docling/utils/model_downloader.py,sha256=
|
96
|
+
docling/utils/model_downloader.py,sha256=lAIyevIC6dyv1TS0ElRSAGNylB5n_V8pWs1PhxH8wAQ,4104
|
97
97
|
docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
|
98
98
|
docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,1842
|
99
99
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
100
100
|
docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
|
101
101
|
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
102
|
-
docling-2.
|
103
|
-
docling-2.
|
104
|
-
docling-2.
|
105
|
-
docling-2.
|
106
|
-
docling-2.
|
107
|
-
docling-2.
|
102
|
+
docling-2.50.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
103
|
+
docling-2.50.0.dist-info/METADATA,sha256=w6U8qf-fYMZi6EXxFXFLxs9WOSG3S0Ilblg-klEyK3Y,10731
|
104
|
+
docling-2.50.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
105
|
+
docling-2.50.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
|
106
|
+
docling-2.50.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
|
107
|
+
docling-2.50.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|