docling 2.57.0__py3-none-any.whl → 2.59.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling might be problematic. Click here for more details.
- docling/backend/abstract_backend.py +24 -3
- docling/backend/asciidoc_backend.py +3 -3
- docling/backend/docling_parse_v4_backend.py +15 -4
- docling/backend/html_backend.py +130 -20
- docling/backend/md_backend.py +27 -5
- docling/backend/msexcel_backend.py +121 -29
- docling/backend/mspowerpoint_backend.py +2 -2
- docling/backend/msword_backend.py +18 -18
- docling/backend/pdf_backend.py +9 -2
- docling/backend/pypdfium2_backend.py +12 -3
- docling/cli/main.py +104 -38
- docling/datamodel/asr_model_specs.py +408 -6
- docling/datamodel/backend_options.py +82 -0
- docling/datamodel/base_models.py +19 -2
- docling/datamodel/document.py +81 -48
- docling/datamodel/pipeline_options_asr_model.py +21 -1
- docling/datamodel/pipeline_options_vlm_model.py +1 -0
- docling/document_converter.py +37 -45
- docling/document_extractor.py +12 -11
- docling/models/api_vlm_model.py +5 -3
- docling/models/picture_description_vlm_model.py +5 -1
- docling/models/readingorder_model.py +6 -7
- docling/models/vlm_models_inline/hf_transformers_model.py +13 -3
- docling/models/vlm_models_inline/mlx_model.py +9 -3
- docling/models/vlm_models_inline/nuextract_transformers_model.py +13 -3
- docling/models/vlm_models_inline/vllm_model.py +42 -8
- docling/pipeline/asr_pipeline.py +149 -6
- docling/utils/api_image_request.py +20 -9
- docling/utils/layout_postprocessor.py +23 -24
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/METADATA +11 -8
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/RECORD +35 -34
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/WHEEL +0 -0
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/entry_points.txt +0 -0
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/licenses/LICENSE +0 -0
- {docling-2.57.0.dist-info → docling-2.59.0.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,6 @@ import bisect
|
|
|
2
2
|
import logging
|
|
3
3
|
import sys
|
|
4
4
|
from collections import defaultdict
|
|
5
|
-
from typing import Dict, List, Set, Tuple
|
|
6
5
|
|
|
7
6
|
from docling_core.types.doc import DocItemLabel, Size
|
|
8
7
|
from docling_core.types.doc.page import TextCell
|
|
@@ -39,7 +38,7 @@ class UnionFind:
|
|
|
39
38
|
self.parent[root_y] = root_x
|
|
40
39
|
self.rank[root_x] += 1
|
|
41
40
|
|
|
42
|
-
def get_groups(self) ->
|
|
41
|
+
def get_groups(self) -> dict[int, list[int]]:
|
|
43
42
|
"""Returns groups as {root: [elements]}."""
|
|
44
43
|
groups = defaultdict(list)
|
|
45
44
|
for elem in self.parent:
|
|
@@ -50,13 +49,13 @@ class UnionFind:
|
|
|
50
49
|
class SpatialClusterIndex:
|
|
51
50
|
"""Efficient spatial indexing for clusters using R-tree and interval trees."""
|
|
52
51
|
|
|
53
|
-
def __init__(self, clusters:
|
|
52
|
+
def __init__(self, clusters: list[Cluster]):
|
|
54
53
|
p = index.Property()
|
|
55
54
|
p.dimension = 2
|
|
56
55
|
self.spatial_index = index.Index(properties=p)
|
|
57
56
|
self.x_intervals = IntervalTree()
|
|
58
57
|
self.y_intervals = IntervalTree()
|
|
59
|
-
self.clusters_by_id:
|
|
58
|
+
self.clusters_by_id: dict[int, Cluster] = {}
|
|
60
59
|
|
|
61
60
|
for cluster in clusters:
|
|
62
61
|
self.add_cluster(cluster)
|
|
@@ -72,7 +71,7 @@ class SpatialClusterIndex:
|
|
|
72
71
|
self.spatial_index.delete(cluster.id, cluster.bbox.as_tuple())
|
|
73
72
|
del self.clusters_by_id[cluster.id]
|
|
74
73
|
|
|
75
|
-
def find_candidates(self, bbox: BoundingBox) ->
|
|
74
|
+
def find_candidates(self, bbox: BoundingBox) -> set[int]:
|
|
76
75
|
"""Find potential overlapping cluster IDs using all indexes."""
|
|
77
76
|
spatial = set(self.spatial_index.intersection(bbox.as_tuple()))
|
|
78
77
|
x_candidates = self.x_intervals.find_containing(
|
|
@@ -123,13 +122,13 @@ class IntervalTree:
|
|
|
123
122
|
"""Memory-efficient interval tree for 1D overlap queries."""
|
|
124
123
|
|
|
125
124
|
def __init__(self):
|
|
126
|
-
self.intervals:
|
|
125
|
+
self.intervals: list[Interval] = [] # Sorted by min_val
|
|
127
126
|
|
|
128
127
|
def insert(self, min_val: float, max_val: float, id: int):
|
|
129
128
|
interval = Interval(min_val, max_val, id)
|
|
130
129
|
bisect.insort(self.intervals, interval)
|
|
131
130
|
|
|
132
|
-
def find_containing(self, point: float) ->
|
|
131
|
+
def find_containing(self, point: float) -> set[int]:
|
|
133
132
|
"""Find all intervals containing the point."""
|
|
134
133
|
pos = bisect.bisect_left(self.intervals, point)
|
|
135
134
|
result = set()
|
|
@@ -196,7 +195,7 @@ class LayoutPostprocessor:
|
|
|
196
195
|
}
|
|
197
196
|
|
|
198
197
|
def __init__(
|
|
199
|
-
self, page: Page, clusters:
|
|
198
|
+
self, page: Page, clusters: list[Cluster], options: LayoutOptions
|
|
200
199
|
) -> None:
|
|
201
200
|
"""Initialize processor with page and clusters."""
|
|
202
201
|
|
|
@@ -219,7 +218,7 @@ class LayoutPostprocessor:
|
|
|
219
218
|
[c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
|
|
220
219
|
)
|
|
221
220
|
|
|
222
|
-
def postprocess(self) ->
|
|
221
|
+
def postprocess(self) -> tuple[list[Cluster], list[TextCell]]:
|
|
223
222
|
"""Main processing pipeline."""
|
|
224
223
|
self.regular_clusters = self._process_regular_clusters()
|
|
225
224
|
self.special_clusters = self._process_special_clusters()
|
|
@@ -254,7 +253,7 @@ class LayoutPostprocessor:
|
|
|
254
253
|
|
|
255
254
|
return final_clusters, self.cells
|
|
256
255
|
|
|
257
|
-
def _process_regular_clusters(self) ->
|
|
256
|
+
def _process_regular_clusters(self) -> list[Cluster]:
|
|
258
257
|
"""Process regular clusters with iterative refinement."""
|
|
259
258
|
clusters = [
|
|
260
259
|
c
|
|
@@ -311,7 +310,7 @@ class LayoutPostprocessor:
|
|
|
311
310
|
|
|
312
311
|
return clusters
|
|
313
312
|
|
|
314
|
-
def _process_special_clusters(self) ->
|
|
313
|
+
def _process_special_clusters(self) -> list[Cluster]:
|
|
315
314
|
special_clusters = [
|
|
316
315
|
c
|
|
317
316
|
for c in self.special_clusters
|
|
@@ -381,7 +380,7 @@ class LayoutPostprocessor:
|
|
|
381
380
|
|
|
382
381
|
return picture_clusters + wrapper_clusters
|
|
383
382
|
|
|
384
|
-
def _handle_cross_type_overlaps(self, special_clusters) ->
|
|
383
|
+
def _handle_cross_type_overlaps(self, special_clusters) -> list[Cluster]:
|
|
385
384
|
"""Handle overlaps between regular and wrapper clusters before child assignment.
|
|
386
385
|
|
|
387
386
|
In particular, KEY_VALUE_REGION proposals that are almost identical to a TABLE
|
|
@@ -454,7 +453,7 @@ class LayoutPostprocessor:
|
|
|
454
453
|
|
|
455
454
|
def _select_best_cluster_from_group(
|
|
456
455
|
self,
|
|
457
|
-
group_clusters:
|
|
456
|
+
group_clusters: list[Cluster],
|
|
458
457
|
params: dict,
|
|
459
458
|
) -> Cluster:
|
|
460
459
|
"""Select best cluster from a group of overlapping clusters based on all rules."""
|
|
@@ -487,11 +486,11 @@ class LayoutPostprocessor:
|
|
|
487
486
|
|
|
488
487
|
def _remove_overlapping_clusters(
|
|
489
488
|
self,
|
|
490
|
-
clusters:
|
|
489
|
+
clusters: list[Cluster],
|
|
491
490
|
cluster_type: str,
|
|
492
491
|
overlap_threshold: float = 0.8,
|
|
493
492
|
containment_threshold: float = 0.8,
|
|
494
|
-
) ->
|
|
493
|
+
) -> list[Cluster]:
|
|
495
494
|
if not clusters:
|
|
496
495
|
return []
|
|
497
496
|
|
|
@@ -544,7 +543,7 @@ class LayoutPostprocessor:
|
|
|
544
543
|
|
|
545
544
|
def _select_best_cluster(
|
|
546
545
|
self,
|
|
547
|
-
clusters:
|
|
546
|
+
clusters: list[Cluster],
|
|
548
547
|
area_threshold: float,
|
|
549
548
|
conf_threshold: float,
|
|
550
549
|
) -> Cluster:
|
|
@@ -572,7 +571,7 @@ class LayoutPostprocessor:
|
|
|
572
571
|
|
|
573
572
|
return current_best if current_best else clusters[0]
|
|
574
573
|
|
|
575
|
-
def _deduplicate_cells(self, cells:
|
|
574
|
+
def _deduplicate_cells(self, cells: list[TextCell]) -> list[TextCell]:
|
|
576
575
|
"""Ensure each cell appears only once, maintaining order of first appearance."""
|
|
577
576
|
seen_ids = set()
|
|
578
577
|
unique_cells = []
|
|
@@ -583,8 +582,8 @@ class LayoutPostprocessor:
|
|
|
583
582
|
return unique_cells
|
|
584
583
|
|
|
585
584
|
def _assign_cells_to_clusters(
|
|
586
|
-
self, clusters:
|
|
587
|
-
) ->
|
|
585
|
+
self, clusters: list[Cluster], min_overlap: float = 0.2
|
|
586
|
+
) -> list[Cluster]:
|
|
588
587
|
"""Assign cells to best overlapping cluster."""
|
|
589
588
|
for cluster in clusters:
|
|
590
589
|
cluster.cells = []
|
|
@@ -616,7 +615,7 @@ class LayoutPostprocessor:
|
|
|
616
615
|
|
|
617
616
|
return clusters
|
|
618
617
|
|
|
619
|
-
def _find_unassigned_cells(self, clusters:
|
|
618
|
+
def _find_unassigned_cells(self, clusters: list[Cluster]) -> list[TextCell]:
|
|
620
619
|
"""Find cells not assigned to any cluster."""
|
|
621
620
|
assigned = {cell.index for cluster in clusters for cell in cluster.cells}
|
|
622
621
|
return [
|
|
@@ -625,7 +624,7 @@ class LayoutPostprocessor:
|
|
|
625
624
|
if cell.index not in assigned and cell.text.strip()
|
|
626
625
|
]
|
|
627
626
|
|
|
628
|
-
def _adjust_cluster_bboxes(self, clusters:
|
|
627
|
+
def _adjust_cluster_bboxes(self, clusters: list[Cluster]) -> list[Cluster]:
|
|
629
628
|
"""Adjust cluster bounding boxes to contain their cells."""
|
|
630
629
|
for cluster in clusters:
|
|
631
630
|
if not cluster.cells:
|
|
@@ -651,13 +650,13 @@ class LayoutPostprocessor:
|
|
|
651
650
|
|
|
652
651
|
return clusters
|
|
653
652
|
|
|
654
|
-
def _sort_cells(self, cells:
|
|
653
|
+
def _sort_cells(self, cells: list[TextCell]) -> list[TextCell]:
|
|
655
654
|
"""Sort cells in native reading order."""
|
|
656
655
|
return sorted(cells, key=lambda c: (c.index))
|
|
657
656
|
|
|
658
657
|
def _sort_clusters(
|
|
659
|
-
self, clusters:
|
|
660
|
-
) ->
|
|
658
|
+
self, clusters: list[Cluster], mode: str = "id"
|
|
659
|
+
) -> list[Cluster]:
|
|
661
660
|
"""Sort clusters in reading order (top-to-bottom, left-to-right)."""
|
|
662
661
|
if mode == "id": # sort in the order the cells are printed in the PDF.
|
|
663
662
|
return sorted(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.59.0
|
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
|
5
5
|
Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -22,12 +22,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
26
|
Requires-Python: <4.0,>=3.9
|
|
26
27
|
Description-Content-Type: text/markdown
|
|
27
28
|
License-File: LICENSE
|
|
28
29
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
29
30
|
Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
|
|
30
|
-
Requires-Dist: docling-parse<5.0.0,>=4.
|
|
31
|
+
Requires-Dist: docling-parse<5.0.0,>=4.7.0
|
|
31
32
|
Requires-Dist: docling-ibm-models<4,>=3.9.1
|
|
32
33
|
Requires-Dist: filetype<2.0.0,>=1.2.0
|
|
33
34
|
Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
|
|
@@ -45,7 +46,7 @@ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
|
|
|
45
46
|
Requires-Dist: pandas<3.0.0,>=2.1.4
|
|
46
47
|
Requires-Dist: marko<3.0.0,>=2.1.2
|
|
47
48
|
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
48
|
-
Requires-Dist: lxml<
|
|
49
|
+
Requires-Dist: lxml<7.0.0,>=4.0.0
|
|
49
50
|
Requires-Dist: pillow<12.0.0,>=10.0.0
|
|
50
51
|
Requires-Dist: tqdm<5.0.0,>=4.65.0
|
|
51
52
|
Requires-Dist: pluggy<2.0.0,>=1.0.0
|
|
@@ -62,14 +63,15 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
|
|
|
62
63
|
Provides-Extra: vlm
|
|
63
64
|
Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
|
|
64
65
|
Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
|
|
65
|
-
Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
|
66
|
-
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
|
|
66
|
+
Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
|
|
67
|
+
Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
|
|
67
68
|
Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
|
|
68
69
|
Provides-Extra: rapidocr
|
|
69
|
-
Requires-Dist: rapidocr<4.0.0,>=3.3;
|
|
70
|
-
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
|
|
70
|
+
Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
|
|
71
|
+
Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
|
|
71
72
|
Provides-Extra: asr
|
|
72
|
-
Requires-Dist:
|
|
73
|
+
Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
|
|
74
|
+
Requires-Dist: openai-whisper>=20250625; python_version < "3.14" and extra == "asr"
|
|
73
75
|
Dynamic: license-file
|
|
74
76
|
|
|
75
77
|
<p align="center">
|
|
@@ -96,6 +98,7 @@ Dynamic: license-file
|
|
|
96
98
|
[](https://pepy.tech/projects/docling)
|
|
97
99
|
[](https://apify.com/vancura/docling)
|
|
98
100
|
[](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
|
|
101
|
+
[](https://docling.ai/discord)
|
|
99
102
|
[](https://www.bestpractices.dev/projects/10101)
|
|
100
103
|
[](https://lfaidata.foundation/projects/)
|
|
101
104
|
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
docling/document_converter.py,sha256=
|
|
3
|
-
docling/document_extractor.py,sha256
|
|
2
|
+
docling/document_converter.py,sha256=_P3f4eZ8Gssv3P3l8xX2RrgzS8WhafY7-x6rWaWOeN4,15511
|
|
3
|
+
docling/document_extractor.py,sha256=Jk1a4hgPxjLkp4UoZR_pdEMid9-jhNiND5_NlPHGy6c,11965
|
|
4
4
|
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
|
5
5
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
6
|
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
docling/backend/abstract_backend.py,sha256=
|
|
8
|
-
docling/backend/asciidoc_backend.py,sha256=
|
|
7
|
+
docling/backend/abstract_backend.py,sha256=_xKSjLpR-ia93Kz0dto0yyVsaeIqEepUhVEGo18MuWw,2169
|
|
8
|
+
docling/backend/asciidoc_backend.py,sha256=DR8AUTNvy_SCHkieMpqZXg_NLRTy4roEqa0V8sILPWk,14400
|
|
9
9
|
docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
|
|
10
10
|
docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3Uea00XrLluTg,7918
|
|
11
11
|
docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
|
|
12
|
-
docling/backend/docling_parse_v4_backend.py,sha256=
|
|
13
|
-
docling/backend/html_backend.py,sha256=
|
|
14
|
-
docling/backend/md_backend.py,sha256=
|
|
12
|
+
docling/backend/docling_parse_v4_backend.py,sha256=tBJR0BbKFOIDKSngjVDu0BrzTj7qUZAhFdRT8GvAJ18,8232
|
|
13
|
+
docling/backend/html_backend.py,sha256=m91kRxMhQ1w-7G6MHA9l01dgF8-YQNn8ZNx9lwG467M,52935
|
|
14
|
+
docling/backend/md_backend.py,sha256=_0ToiecsGwU4H4BBso4ar9TGJi8OTwSXjgmi66vSJVQ,23513
|
|
15
15
|
docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
|
|
16
|
-
docling/backend/msexcel_backend.py,sha256=
|
|
17
|
-
docling/backend/mspowerpoint_backend.py,sha256=
|
|
18
|
-
docling/backend/msword_backend.py,sha256=
|
|
16
|
+
docling/backend/msexcel_backend.py,sha256=ujU8qoevNhLDWffihMlSYFVl7B3y_Uu5g-yispWyt8Q,22868
|
|
17
|
+
docling/backend/mspowerpoint_backend.py,sha256=71W_iV31Rggqn9UcMzXmsZ3QKMRpsBT8fCwdjsIIKAs,15109
|
|
18
|
+
docling/backend/msword_backend.py,sha256=zNJy-KM3Ia-L8IQ4sjYxATW4owFxbg2CK0rzke8y-7w,57451
|
|
19
19
|
docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
|
|
20
|
-
docling/backend/pdf_backend.py,sha256=
|
|
21
|
-
docling/backend/pypdfium2_backend.py,sha256=
|
|
20
|
+
docling/backend/pdf_backend.py,sha256=UovGV3RJG6qllzMPYzhDB6GID7buGV6w1uxl5dOAEw4,3563
|
|
21
|
+
docling/backend/pypdfium2_backend.py,sha256=tx0FnUW87zPsyafCvOuLcls2k5QdpPKWweyjNTfclNc,14509
|
|
22
22
|
docling/backend/webvtt_backend.py,sha256=9xPcfWVLuqhEAFrkv8aU36qHnSgjeINZAXT_C9C6XJA,19165
|
|
23
23
|
docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
docling/backend/docx/drawingml/utils.py,sha256=E9Iq8_052eEV5L1IN3ZqFX9eBidH56DKNlh6Tk7Do0I,3640
|
|
@@ -32,23 +32,24 @@ docling/backend/xml/jats_backend.py,sha256=_BWpQQg3SlsHAOOj0v2qRJoVqaQzL91GqN1tK
|
|
|
32
32
|
docling/backend/xml/uspto_backend.py,sha256=Tv4CE7V5_QwxTNJPl90CAd_mAbwaLGy8S6s6evh1Xow,70910
|
|
33
33
|
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
|
34
34
|
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
docling/cli/main.py,sha256=
|
|
35
|
+
docling/cli/main.py,sha256=T7MllU1e2zYoKekpEHPv7VdI4cypL6K5zzCfscHCRro,37404
|
|
36
36
|
docling/cli/models.py,sha256=zZBFQJAD7C5sespnYy5M__4qC_GyqAZ-QpfWtgPRDB0,6343
|
|
37
37
|
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
|
38
38
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
|
|
40
|
-
docling/datamodel/asr_model_specs.py,sha256=
|
|
41
|
-
docling/datamodel/
|
|
42
|
-
docling/datamodel/
|
|
40
|
+
docling/datamodel/asr_model_specs.py,sha256=gQJkW7DaSPiOuW_0QoI5OzR1_DQGRkw7yQlrVJ4hyo0,14473
|
|
41
|
+
docling/datamodel/backend_options.py,sha256=2zSbJRtBmJ6Twywj8pLOKaHhklY85XaGXUmSLX_SfgQ,2473
|
|
42
|
+
docling/datamodel/base_models.py,sha256=AmKIWnqjKo0WgUg6SsHJpN_et_B4rR6em0NEfJ1JKxU,12821
|
|
43
|
+
docling/datamodel/document.py,sha256=T9OogC1kIm0VDSC2ZFcFgWdcOjXzw5JvGr2y2hMlx3s,18795
|
|
43
44
|
docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQgoQ,1323
|
|
44
45
|
docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
|
|
45
46
|
docling/datamodel/pipeline_options.py,sha256=dklSaA7P6VkjbBB-Pz2OyzO2SQuV9y0I8VVr9XHJusw,11692
|
|
46
|
-
docling/datamodel/pipeline_options_asr_model.py,sha256=
|
|
47
|
-
docling/datamodel/pipeline_options_vlm_model.py,sha256=
|
|
47
|
+
docling/datamodel/pipeline_options_asr_model.py,sha256=cLqtRHBr2kbTNXRJ1ZhFGiXIK7Nl9RFmz2Wd7tJF2Jg,2172
|
|
48
|
+
docling/datamodel/pipeline_options_vlm_model.py,sha256=JBdpfN3nASD5_DaAUe0tla20-Mia8fkveyNw7wVTJ4c,3131
|
|
48
49
|
docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
|
|
49
50
|
docling/datamodel/vlm_model_specs.py,sha256=9TTmihDEFcI-TY1jJ2GTnTcrGa3bLg0e6anN4gPtFgU,10035
|
|
50
51
|
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
docling/models/api_vlm_model.py,sha256=
|
|
52
|
+
docling/models/api_vlm_model.py,sha256=tZHXS_weqkhgVse1JbrpvjzAyCxW8br78eRYrlMSG3k,4321
|
|
52
53
|
docling/models/auto_ocr_model.py,sha256=nn_eQfNdGUclXKrB0nodHmCqgMUNUJzG3dLq0lhlNAI,5188
|
|
53
54
|
docling/models/base_model.py,sha256=QEbglxu3kT6aNq3x_5jY8T_KcD_Hhv9zr0-A4Mizhco,7252
|
|
54
55
|
docling/models/base_ocr_model.py,sha256=kT8TylASOpPlY60rIG6VL6_eLVsfg5KvEVnZHzDWtR0,8193
|
|
@@ -61,9 +62,9 @@ docling/models/page_assemble_model.py,sha256=TvN1naez7dUodLxpUUBzpuMCpqZBTf6YSpe
|
|
|
61
62
|
docling/models/page_preprocessing_model.py,sha256=EmusNexws5ZmR93js_saVU0BedqZ_HIHQeY7lcf52tI,5284
|
|
62
63
|
docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCrS_btclO_ZCLAUqrfl0,2377
|
|
63
64
|
docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
|
|
64
|
-
docling/models/picture_description_vlm_model.py,sha256=
|
|
65
|
+
docling/models/picture_description_vlm_model.py,sha256=7-reEy5gNxKgOB-VMiysemTwoasZhO5H8VyX4NUEY-4,4272
|
|
65
66
|
docling/models/rapid_ocr_model.py,sha256=JGeed1aNO64SYFgxlOifdut4fynUJyBuyyQrfuSno-4,13182
|
|
66
|
-
docling/models/readingorder_model.py,sha256
|
|
67
|
+
docling/models/readingorder_model.py,sha256=gnRFfJAXH-zKtQJws5Zb1_KCVvu_dAq9pgaDYQKCt9s,17236
|
|
67
68
|
docling/models/table_structure_model.py,sha256=7g_mFf1YzfF8PXQfefNu6XYZu7TzJAn86zKb6IEUdCg,12518
|
|
68
69
|
docling/models/tesseract_ocr_cli_model.py,sha256=KuO4rXc-88C2-cAymvcr41TqFi3hNg4gerEzoI3Z6m4,13039
|
|
69
70
|
docling/models/tesseract_ocr_model.py,sha256=W_476USwExjSfhelXG8B9eNIVXXlm_dNFA60TZ5rq7E,11216
|
|
@@ -77,12 +78,12 @@ docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
77
78
|
docling/models/utils/generation_utils.py,sha256=0ZfMBMbolHAWjdbMza8FbD4_jQ4VY6ReUa4gqVLwMoU,5365
|
|
78
79
|
docling/models/utils/hf_model_download.py,sha256=VlKna9tLIVOGQkIRQBXfDimPIIyeRV7cFCbuOVmFQiU,1092
|
|
79
80
|
docling/models/vlm_models_inline/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
80
|
-
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=
|
|
81
|
-
docling/models/vlm_models_inline/mlx_model.py,sha256=
|
|
82
|
-
docling/models/vlm_models_inline/nuextract_transformers_model.py,sha256=
|
|
83
|
-
docling/models/vlm_models_inline/vllm_model.py,sha256=
|
|
81
|
+
docling/models/vlm_models_inline/hf_transformers_model.py,sha256=ylhdnY6A2nUkLQ2Ki-o-Jn8_kjO-JbYKdhnDXmGPB7Y,15047
|
|
82
|
+
docling/models/vlm_models_inline/mlx_model.py,sha256=_q1fVmVaEfnKTVp78djO4MSUA7LrF0JtCnMjTKnotT8,13749
|
|
83
|
+
docling/models/vlm_models_inline/nuextract_transformers_model.py,sha256=f-Djq2G6JLT-RE2LoEP3b2Q-LI33NsGM7Qxo4f6TkeA,10768
|
|
84
|
+
docling/models/vlm_models_inline/vllm_model.py,sha256=gIGZha3YCPBlJGgbjtqpRkiNrOqQszsOT3ZZZu1xbYo,11671
|
|
84
85
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
|
-
docling/pipeline/asr_pipeline.py,sha256=
|
|
86
|
+
docling/pipeline/asr_pipeline.py,sha256=rzEMHkbZfTmCwl4mjMa2bWRlVmkajC5nKBaY0bT7qj0,16020
|
|
86
87
|
docling/pipeline/base_extraction_pipeline.py,sha256=GYrEz83IXv-tdIHjtNWxMBNczFwL8SZyf9vnPJ3STaI,2627
|
|
87
88
|
docling/pipeline/base_pipeline.py,sha256=NPMQDTyis-LgQ4SybY2f5AESZl5PxogF-FRQuCDckXg,12748
|
|
88
89
|
docling/pipeline/extraction_vlm_pipeline.py,sha256=veUOTe8nGdnduZKaGn1RRb-NfU1H6t_EN4QAsb022Zg,8260
|
|
@@ -92,10 +93,10 @@ docling/pipeline/threaded_standard_pdf_pipeline.py,sha256=i67G5AOW7PIFCe5JS2sdBm
|
|
|
92
93
|
docling/pipeline/vlm_pipeline.py,sha256=HSbSoGZyy4eIK8eOL2g_NymrHg8r-DrB2buggJQAqHU,16189
|
|
93
94
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
95
|
docling/utils/accelerator_utils.py,sha256=DSajLxVx1JEVT0zt5de26llciLNlVfIDfSa2zYCFJzQ,2909
|
|
95
|
-
docling/utils/api_image_request.py,sha256=
|
|
96
|
+
docling/utils/api_image_request.py,sha256=HO-FrZ8kOqMMRVJSIsH3apoNoDKM2l7xrC8NfWAEgFQ,5876
|
|
96
97
|
docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
|
|
97
98
|
docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
|
|
98
|
-
docling/utils/layout_postprocessor.py,sha256=
|
|
99
|
+
docling/utils/layout_postprocessor.py,sha256=bwDIhgUg5rKianzccGPTotTjqjkWtIQSoZwgKio8YC4,25124
|
|
99
100
|
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
|
100
101
|
docling/utils/model_downloader.py,sha256=qrkL5NTpwk6yF4bcipcUtLRxl0Tqh7zoSa_WtLsMySA,5325
|
|
101
102
|
docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
|
|
@@ -103,9 +104,9 @@ docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,
|
|
|
103
104
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
|
104
105
|
docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
|
|
105
106
|
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
|
106
|
-
docling-2.
|
|
107
|
-
docling-2.
|
|
108
|
-
docling-2.
|
|
109
|
-
docling-2.
|
|
110
|
-
docling-2.
|
|
111
|
-
docling-2.
|
|
107
|
+
docling-2.59.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
|
108
|
+
docling-2.59.0.dist-info/METADATA,sha256=cXUxVkPEnIzf56IGksKO5slZW2A1Nu0WSonasqsdwic,11805
|
|
109
|
+
docling-2.59.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
110
|
+
docling-2.59.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
|
|
111
|
+
docling-2.59.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
|
|
112
|
+
docling-2.59.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|