docling 2.25.2__py3-none-any.whl → 2.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/asciidoc_backend.py +1 -1
- docling/backend/csv_backend.py +1 -1
- docling/backend/docling_parse_backend.py +21 -13
- docling/backend/docling_parse_v2_backend.py +20 -12
- docling/backend/docling_parse_v4_backend.py +185 -0
- docling/backend/docx/__init__.py +0 -0
- docling/backend/docx/latex/__init__.py +0 -0
- docling/backend/docx/latex/latex_dict.py +271 -0
- docling/backend/docx/latex/omml.py +453 -0
- docling/backend/html_backend.py +7 -7
- docling/backend/md_backend.py +1 -1
- docling/backend/msexcel_backend.py +2 -45
- docling/backend/mspowerpoint_backend.py +1 -1
- docling/backend/msword_backend.py +65 -3
- docling/backend/pdf_backend.py +7 -2
- docling/backend/pypdfium2_backend.py +52 -30
- docling/backend/xml/uspto_backend.py +1 -1
- docling/cli/main.py +62 -23
- docling/cli/models.py +1 -1
- docling/datamodel/base_models.py +8 -10
- docling/datamodel/pipeline_options.py +27 -31
- docling/document_converter.py +5 -5
- docling/models/base_model.py +9 -1
- docling/models/base_ocr_model.py +27 -16
- docling/models/code_formula_model.py +84 -5
- docling/models/document_picture_classifier.py +1 -1
- docling/models/easyocr_model.py +28 -13
- docling/models/factories/__init__.py +27 -0
- docling/models/factories/base_factory.py +122 -0
- docling/models/factories/ocr_factory.py +11 -0
- docling/models/factories/picture_description_factory.py +11 -0
- docling/models/ocr_mac_model.py +39 -11
- docling/models/page_preprocessing_model.py +4 -0
- docling/models/picture_description_api_model.py +20 -3
- docling/models/picture_description_base_model.py +19 -3
- docling/models/picture_description_vlm_model.py +14 -2
- docling/models/plugins/__init__.py +0 -0
- docling/models/plugins/defaults.py +28 -0
- docling/models/rapid_ocr_model.py +34 -13
- docling/models/table_structure_model.py +14 -5
- docling/models/tesseract_ocr_cli_model.py +40 -15
- docling/models/tesseract_ocr_model.py +37 -12
- docling/pipeline/standard_pdf_pipeline.py +25 -78
- docling/utils/export.py +8 -6
- docling/utils/layout_postprocessor.py +26 -23
- docling/utils/visualization.py +1 -1
- {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/METADATA +48 -19
- docling-2.27.0.dist-info/RECORD +83 -0
- {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/entry_points.txt +3 -0
- docling-2.25.2.dist-info/RECORD +0 -72
- {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/LICENSE +0 -0
- {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/WHEEL +0 -0
@@ -5,9 +5,10 @@ from collections import defaultdict
|
|
5
5
|
from typing import Dict, List, Set, Tuple
|
6
6
|
|
7
7
|
from docling_core.types.doc import DocItemLabel, Size
|
8
|
+
from docling_core.types.doc.page import TextCell
|
8
9
|
from rtree import index
|
9
10
|
|
10
|
-
from docling.datamodel.base_models import BoundingBox,
|
11
|
+
from docling.datamodel.base_models import BoundingBox, Cluster
|
11
12
|
|
12
13
|
_log = logging.getLogger(__name__)
|
13
14
|
|
@@ -198,7 +199,7 @@ class LayoutPostprocessor:
|
|
198
199
|
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
|
199
200
|
}
|
200
201
|
|
201
|
-
def __init__(self, cells: List[
|
202
|
+
def __init__(self, cells: List[TextCell], clusters: List[Cluster], page_size: Size):
|
202
203
|
"""Initialize processor with cells and clusters."""
|
203
204
|
"""Initialize processor with cells and spatial indices."""
|
204
205
|
self.cells = cells
|
@@ -218,7 +219,7 @@ class LayoutPostprocessor:
|
|
218
219
|
[c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
|
219
220
|
)
|
220
221
|
|
221
|
-
def postprocess(self) -> Tuple[List[Cluster], List[
|
222
|
+
def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
|
222
223
|
"""Main processing pipeline."""
|
223
224
|
self.regular_clusters = self._process_regular_clusters()
|
224
225
|
self.special_clusters = self._process_special_clusters()
|
@@ -271,15 +272,13 @@ class LayoutPostprocessor:
|
|
271
272
|
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
272
273
|
orphan_clusters = []
|
273
274
|
for i, cell in enumerate(unassigned):
|
274
|
-
conf =
|
275
|
-
if isinstance(cell, OcrCell):
|
276
|
-
conf = cell.confidence
|
275
|
+
conf = cell.confidence
|
277
276
|
|
278
277
|
orphan_clusters.append(
|
279
278
|
Cluster(
|
280
279
|
id=next_id + i,
|
281
280
|
label=DocItemLabel.TEXT,
|
282
|
-
bbox=cell.
|
281
|
+
bbox=cell.to_bounding_box(),
|
283
282
|
confidence=conf,
|
284
283
|
cells=[cell],
|
285
284
|
)
|
@@ -557,13 +556,13 @@ class LayoutPostprocessor:
|
|
557
556
|
|
558
557
|
return current_best if current_best else clusters[0]
|
559
558
|
|
560
|
-
def _deduplicate_cells(self, cells: List[
|
559
|
+
def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
|
561
560
|
"""Ensure each cell appears only once, maintaining order of first appearance."""
|
562
561
|
seen_ids = set()
|
563
562
|
unique_cells = []
|
564
563
|
for cell in cells:
|
565
|
-
if cell.
|
566
|
-
seen_ids.add(cell.
|
564
|
+
if cell.index not in seen_ids:
|
565
|
+
seen_ids.add(cell.index)
|
567
566
|
unique_cells.append(cell)
|
568
567
|
return unique_cells
|
569
568
|
|
@@ -582,11 +581,13 @@ class LayoutPostprocessor:
|
|
582
581
|
best_cluster = None
|
583
582
|
|
584
583
|
for cluster in clusters:
|
585
|
-
if cell.
|
584
|
+
if cell.rect.to_bounding_box().area() <= 0:
|
586
585
|
continue
|
587
586
|
|
588
|
-
overlap = cell.
|
589
|
-
|
587
|
+
overlap = cell.rect.to_bounding_box().intersection_area_with(
|
588
|
+
cluster.bbox
|
589
|
+
)
|
590
|
+
overlap_ratio = overlap / cell.rect.to_bounding_box().area()
|
590
591
|
|
591
592
|
if overlap_ratio > best_overlap:
|
592
593
|
best_overlap = overlap_ratio
|
@@ -601,11 +602,13 @@ class LayoutPostprocessor:
|
|
601
602
|
|
602
603
|
return clusters
|
603
604
|
|
604
|
-
def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[
|
605
|
+
def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
|
605
606
|
"""Find cells not assigned to any cluster."""
|
606
|
-
assigned = {cell.
|
607
|
+
assigned = {cell.index for cluster in clusters for cell in cluster.cells}
|
607
608
|
return [
|
608
|
-
cell
|
609
|
+
cell
|
610
|
+
for cell in self.cells
|
611
|
+
if cell.index not in assigned and cell.text.strip()
|
609
612
|
]
|
610
613
|
|
611
614
|
def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
|
@@ -615,10 +618,10 @@ class LayoutPostprocessor:
|
|
615
618
|
continue
|
616
619
|
|
617
620
|
cells_bbox = BoundingBox(
|
618
|
-
l=min(cell.
|
619
|
-
t=min(cell.
|
620
|
-
r=max(cell.
|
621
|
-
b=max(cell.
|
621
|
+
l=min(cell.rect.to_bounding_box().l for cell in cluster.cells),
|
622
|
+
t=min(cell.rect.to_bounding_box().t for cell in cluster.cells),
|
623
|
+
r=max(cell.rect.to_bounding_box().r for cell in cluster.cells),
|
624
|
+
b=max(cell.rect.to_bounding_box().b for cell in cluster.cells),
|
622
625
|
)
|
623
626
|
|
624
627
|
if cluster.label == DocItemLabel.TABLE:
|
@@ -634,9 +637,9 @@ class LayoutPostprocessor:
|
|
634
637
|
|
635
638
|
return clusters
|
636
639
|
|
637
|
-
def _sort_cells(self, cells: List[
|
640
|
+
def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
|
638
641
|
"""Sort cells in native reading order."""
|
639
|
-
return sorted(cells, key=lambda c: (c.
|
642
|
+
return sorted(cells, key=lambda c: (c.index))
|
640
643
|
|
641
644
|
def _sort_clusters(
|
642
645
|
self, clusters: List[Cluster], mode: str = "id"
|
@@ -647,7 +650,7 @@ class LayoutPostprocessor:
|
|
647
650
|
clusters,
|
648
651
|
key=lambda cluster: (
|
649
652
|
(
|
650
|
-
min(cell.
|
653
|
+
min(cell.index for cell in cluster.cells)
|
651
654
|
if cluster.cells
|
652
655
|
else sys.maxsize
|
653
656
|
),
|
docling/utils/visualization.py
CHANGED
@@ -25,7 +25,7 @@ def draw_clusters(
|
|
25
25
|
# Draw cells first (underneath)
|
26
26
|
cell_color = (0, 0, 0, 40) # Transparent black for cells
|
27
27
|
for tc in c.cells:
|
28
|
-
cx0, cy0, cx1, cy1 = tc.
|
28
|
+
cx0, cy0, cx1, cy1 = tc.rect.to_bounding_box().as_tuple()
|
29
29
|
cx0 *= scale_x
|
30
30
|
cx1 *= scale_x
|
31
31
|
cy0 *= scale_x
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.27.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/docling-project/docling
|
6
6
|
License: MIT
|
7
7
|
Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
|
8
8
|
Author: Christoph Auer
|
@@ -28,9 +28,9 @@ Provides-Extra: vlm
|
|
28
28
|
Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
|
29
29
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
30
30
|
Requires-Dist: certifi (>=2024.7.4)
|
31
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
31
|
+
Requires-Dist: docling-core[chunking] (>=2.23.0,<3.0.0)
|
32
32
|
Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
|
33
|
-
Requires-Dist: docling-parse (>=
|
33
|
+
Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
|
34
34
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
35
35
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
36
36
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -42,8 +42,10 @@ Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (ex
|
|
42
42
|
Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
|
43
43
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
44
44
|
Requires-Dist: pillow (>=10.0.0,<12.0.0)
|
45
|
+
Requires-Dist: pluggy (>=1.0.0,<2.0.0)
|
45
46
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
46
47
|
Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
|
48
|
+
Requires-Dist: pylatexenc (>=2.10,<3.0)
|
47
49
|
Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
|
48
50
|
Requires-Dist: python-docx (>=1.1.2,<2.0.0)
|
49
51
|
Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
|
@@ -57,12 +59,12 @@ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
|
57
59
|
Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
|
58
60
|
Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
|
59
61
|
Requires-Dist: typer (>=0.12.5,<0.13.0)
|
60
|
-
Project-URL: Repository, https://github.com/
|
62
|
+
Project-URL: Repository, https://github.com/docling-project/docling
|
61
63
|
Description-Content-Type: text/markdown
|
62
64
|
|
63
65
|
<p align="center">
|
64
|
-
<a href="https://github.com/
|
65
|
-
<img loading="lazy" alt="Docling" src="https://github.com/
|
66
|
+
<a href="https://github.com/docling-project/docling">
|
67
|
+
<img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
|
66
68
|
</a>
|
67
69
|
</p>
|
68
70
|
|
@@ -73,7 +75,7 @@ Description-Content-Type: text/markdown
|
|
73
75
|
</p>
|
74
76
|
|
75
77
|
[](https://arxiv.org/abs/2408.09869)
|
76
|
-
[](https://
|
78
|
+
[](https://docling-project.github.io/docling/)
|
77
79
|
[](https://pypi.org/project/docling/)
|
78
80
|
[](https://pypi.org/project/docling/)
|
79
81
|
[](https://python-poetry.org/)
|
@@ -81,8 +83,9 @@ Description-Content-Type: text/markdown
|
|
81
83
|
[](https://pycqa.github.io/isort/)
|
82
84
|
[](https://pydantic.dev)
|
83
85
|
[](https://github.com/pre-commit/pre-commit)
|
84
|
-
[](https://opensource.org/licenses/MIT)
|
85
87
|
[](https://pepy.tech/projects/docling)
|
88
|
+
[](https://apify.com/vancura/docling)
|
86
89
|
|
87
90
|
Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
|
88
91
|
|
@@ -113,7 +116,7 @@ pip install docling
|
|
113
116
|
|
114
117
|
Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
|
115
118
|
|
116
|
-
More [detailed installation instructions](https://
|
119
|
+
More [detailed installation instructions](https://docling-project.github.io/docling/installation/) are available in the docs.
|
117
120
|
|
118
121
|
## Getting started
|
119
122
|
|
@@ -128,28 +131,54 @@ result = converter.convert(source)
|
|
128
131
|
print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
|
129
132
|
```
|
130
133
|
|
131
|
-
More [advanced usage options](https://
|
134
|
+
More [advanced usage options](https://docling-project.github.io/docling/usage/) are available in
|
132
135
|
the docs.
|
133
136
|
|
134
137
|
## Documentation
|
135
138
|
|
136
|
-
Check out Docling's [documentation](https://
|
139
|
+
Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
|
137
140
|
installation, usage, concepts, recipes, extensions, and more.
|
138
141
|
|
139
142
|
## Examples
|
140
143
|
|
141
|
-
Go hands-on with our [examples](https://
|
144
|
+
Go hands-on with our [examples](https://docling-project.github.io/docling/examples/),
|
142
145
|
demonstrating how to address different application use cases with Docling.
|
143
146
|
|
144
147
|
## Integrations
|
145
148
|
|
146
149
|
To further accelerate your AI application development, check out Docling's native
|
147
|
-
[integrations](https://
|
150
|
+
[integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
|
148
151
|
and tools.
|
149
152
|
|
153
|
+
## Apify Actor
|
154
|
+
|
155
|
+
<a href="https://apify.com/vancura/docling?fpr=docling"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Docling Actor on Apify" width="176" height="39" /></a>
|
156
|
+
|
157
|
+
You can run Docling in the cloud without installation using the [Docling Actor](https://apify.com/vancura/docling?fpr=docling) on Apify platform. Simply provide a document URL and get the processed result:
|
158
|
+
|
159
|
+
```bash
|
160
|
+
apify call vancura/docling -i '{
|
161
|
+
"options": {
|
162
|
+
"to_formats": ["md", "json", "html", "text", "doctags"]
|
163
|
+
},
|
164
|
+
"http_sources": [
|
165
|
+
{"url": "https://vancura.dev/assets/actor-test/facial-hairstyles-and-filtering-facepiece-respirators.pdf"},
|
166
|
+
{"url": "https://arxiv.org/pdf/2408.09869"}
|
167
|
+
]
|
168
|
+
}'
|
169
|
+
```
|
170
|
+
|
171
|
+
The Actor stores results in:
|
172
|
+
|
173
|
+
* Processed document in key-value store (`OUTPUT_RESULT`)
|
174
|
+
* Processing logs (`DOCLING_LOG`)
|
175
|
+
* Dataset record with result URL and status
|
176
|
+
|
177
|
+
Read more about the [Docling Actor](.actor/README.md), including how to use it via the Apify API and CLI.
|
178
|
+
|
150
179
|
## Get help and support
|
151
180
|
|
152
|
-
Please feel free to connect with us using the [discussion section](https://github.com/
|
181
|
+
Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
|
153
182
|
|
154
183
|
## Technical report
|
155
184
|
|
@@ -157,7 +186,7 @@ For more details on Docling's inner workings, check out the [Docling Technical R
|
|
157
186
|
|
158
187
|
## Contributing
|
159
188
|
|
160
|
-
Please read [Contributing to Docling](https://github.com/
|
189
|
+
Please read [Contributing to Docling](https://github.com/docling-project/docling/blob/main/CONTRIBUTING.md) for details.
|
161
190
|
|
162
191
|
## References
|
163
192
|
|
@@ -185,7 +214,7 @@ For individual model usage, please refer to the model licenses found in the orig
|
|
185
214
|
|
186
215
|
Docling has been brought to you by IBM.
|
187
216
|
|
188
|
-
[supported_formats]: https://
|
189
|
-
[docling_document]: https://
|
190
|
-
[integrations]: https://
|
217
|
+
[supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
|
218
|
+
[docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
|
219
|
+
[integrations]: https://docling-project.github.io/docling/integrations/
|
191
220
|
|
@@ -0,0 +1,83 @@
|
|
1
|
+
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
|
4
|
+
docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhXorvZ1k,14060
|
5
|
+
docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
|
6
|
+
docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
|
7
|
+
docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
|
8
|
+
docling/backend/docling_parse_v4_backend.py,sha256=sUjcgD62n2Z15gOYhLNAnwkzqSAnlQ8eKkDuVrlK_rk,6002
|
9
|
+
docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
docling/backend/docx/latex/latex_dict.py,sha256=a0UC3VLmG1BLN-hGmEaQamzKbDB10fCz0U8qRU--aBw,6613
|
12
|
+
docling/backend/docx/latex/omml.py,sha256=U-mQXNCI9ObUyHDxv6ItvaHlObIEu77PiXS1Vaaah6U,12012
|
13
|
+
docling/backend/html_backend.py,sha256=i9a5ucsIuf-sn6M8tmKt9Kg_qWqc5OJxhARb6ZNS3wI,19448
|
14
|
+
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
16
|
+
docling/backend/md_backend.py,sha256=v230PXShYJo2QaabwUHiBpE-EGScHIerjL78zPaJpZM,16837
|
17
|
+
docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
|
18
|
+
docling/backend/mspowerpoint_backend.py,sha256=wUriELF9wHwThITXxSyseVASe6W6Sw0E7Qg_U-Q3JNU,16434
|
19
|
+
docling/backend/msword_backend.py,sha256=uSQJ5PHoTIlw2bcAe8NGWutjgceNYWfg4N1ze17F4D0,23101
|
20
|
+
docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
|
21
|
+
docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
|
22
|
+
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
|
24
|
+
docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
|
25
|
+
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
26
|
+
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
docling/cli/main.py,sha256=1N4h1HrNCWEymkqb4_mXyplcdVgVNAR7lRAZFXTiRKk,18310
|
28
|
+
docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
|
29
|
+
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
30
|
+
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
+
docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
|
32
|
+
docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
|
33
|
+
docling/datamodel/pipeline_options.py,sha256=n45Xgl1qnrHZxztd4CyhdDPYa8FygADJ8EpfbUuIlmc,11963
|
34
|
+
docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
|
35
|
+
docling/document_converter.py,sha256=LwbnfGzma937EmSrNWMzM-dldI9Cbu4DUgY8gL1OVHo,13184
|
36
|
+
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
37
|
+
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
+
docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
|
39
|
+
docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
|
40
|
+
docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
|
41
|
+
docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
|
42
|
+
docling/models/easyocr_model.py,sha256=ezq3yv5lORe7T1bbSoTZALck2oHqyEHq57cRfhMYCCQ,7401
|
43
|
+
docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq85Q2K_aM,872
|
44
|
+
docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
|
45
|
+
docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
|
46
|
+
docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
|
47
|
+
docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
|
48
|
+
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
49
|
+
docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
|
50
|
+
docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
|
51
|
+
docling/models/page_preprocessing_model.py,sha256=wAN2WlW7YnpqyETq6MpEWgUAokUwqGaX_g59sPUQsXo,2903
|
52
|
+
docling/models/picture_description_api_model.py,sha256=SRjOkCTBYa1pTIaQffDLUPabljjYrLOQ916MywESEXk,3715
|
53
|
+
docling/models/picture_description_base_model.py,sha256=uRpjBXC2qjpPyWFUt600N1GvmvF-vWwB8f-OTQ7PfDg,2305
|
54
|
+
docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
|
55
|
+
docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
|
+
docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
|
57
|
+
docling/models/rapid_ocr_model.py,sha256=C_I0Ek9mAPIyTFRHuNbqtXg1c15rLNDE1tJ6_hPIi4c,5869
|
58
|
+
docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
|
59
|
+
docling/models/table_structure_model.py,sha256=_b6-2alzhzI19-thDGpM3mww54mxbHLkEiTYMU84d30,11773
|
60
|
+
docling/models/tesseract_ocr_cli_model.py,sha256=S-rCisPrVa3ASvOWycqQoria0PtmNqgdg8YxrLbG1ww,10067
|
61
|
+
docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q-WgZN1Eg,9256
|
62
|
+
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
|
+
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
64
|
+
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
65
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
|
66
|
+
docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
|
67
|
+
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
68
|
+
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
+
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
70
|
+
docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
|
71
|
+
docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
|
72
|
+
docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
|
73
|
+
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
74
|
+
docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
|
75
|
+
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
76
|
+
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
77
|
+
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
78
|
+
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
79
|
+
docling-2.27.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
80
|
+
docling-2.27.0.dist-info/METADATA,sha256=bjSjck82ddDda67NwQaZwW_s9T_jTHw9lE3RhhXf1Y4,10142
|
81
|
+
docling-2.27.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
82
|
+
docling-2.27.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
|
83
|
+
docling-2.27.0.dist-info/RECORD,,
|
docling-2.25.2.dist-info/RECORD
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
|
4
|
-
docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQQknNXk,14050
|
5
|
-
docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
|
6
|
-
docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
|
7
|
-
docling/backend/docling_parse_v2_backend.py,sha256=oF8W-zuvEfpmyXp7Itt6-ot_feeMneMmSG7CpKclMhc,9005
|
8
|
-
docling/backend/html_backend.py,sha256=qLzNpMpfmllwpp-5uARrmaVyN5D1YOpmsbS3-RyL2p0,19370
|
9
|
-
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
11
|
-
docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
|
12
|
-
docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
|
13
|
-
docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
|
14
|
-
docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
|
15
|
-
docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
|
16
|
-
docling/backend/pypdfium2_backend.py,sha256=l6YfoiIibw-Z4wrRwQTPP96IGOMAf1SIT_TPVBIuZRs,9663
|
17
|
-
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
|
19
|
-
docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
|
20
|
-
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
21
|
-
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
docling/cli/main.py,sha256=pCJ_GFgxsgZ0soz32OhMl-CWi7YXIrvax_m9Qw4UhMs,16839
|
23
|
-
docling/cli/models.py,sha256=DDnz-boX2MexPxC8OnOMPgSPG0iwseT3xkkCfgPrZis,3969
|
24
|
-
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
25
|
-
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
-
docling/datamodel/base_models.py,sha256=kMDT-rFhtJUFOOOry4wd2PzCMTLFixFklgSgmRDMS64,7201
|
27
|
-
docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
|
28
|
-
docling/datamodel/pipeline_options.py,sha256=YpWqCqkA44YUFPhiBg_LYcfOAXxNhv10vZKrkfLtJ_I,11987
|
29
|
-
docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
|
30
|
-
docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
|
31
|
-
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
32
|
-
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
-
docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
|
34
|
-
docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
|
35
|
-
docling/models/code_formula_model.py,sha256=6grbRPWaLljadheT5s4omdT6hmXfin4gJU17csWvhjY,8611
|
36
|
-
docling/models/document_picture_classifier.py,sha256=6I_j6fG5fnhIV6rqN31LYikNTZyg5isXrVs0GIqHDaY,6235
|
37
|
-
docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
|
38
|
-
docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
|
39
|
-
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
40
|
-
docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
|
41
|
-
docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
|
42
|
-
docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
|
43
|
-
docling/models/picture_description_api_model.py,sha256=SKNoHpqzbfM8iO-DJJ4ccyNVqO0B2d9neLBnXqt50FY,3186
|
44
|
-
docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
|
45
|
-
docling/models/picture_description_vlm_model.py,sha256=EvKn4zWgTsQnbMFEoDhU3Ox4Pu5DkPqd2QewsGoXULU,3641
|
46
|
-
docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
|
47
|
-
docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
|
48
|
-
docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
|
49
|
-
docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
|
50
|
-
docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
|
51
|
-
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
-
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
53
|
-
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
54
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=IQHktVYvueTrYnIgLonaMvfYKKsU3L-hC9dqrR-Lw8g,12904
|
55
|
-
docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
|
56
|
-
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
57
|
-
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
|
-
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
59
|
-
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
60
|
-
docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
|
61
|
-
docling/utils/layout_postprocessor.py,sha256=kdIk5TpAEXvsQUvkdALBDnAbjc4I_j8s8w6GEvbu4f0,24304
|
62
|
-
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
63
|
-
docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
|
64
|
-
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
65
|
-
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
66
|
-
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
67
|
-
docling/utils/visualization.py,sha256=cmbIroPQXPmJdFrNIfpC26WpijBwx05qmpu3QhiG1EI,2850
|
68
|
-
docling-2.25.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
69
|
-
docling-2.25.2.dist-info/METADATA,sha256=NsR1pyqk-Q5G5pHrpaLf6TCQEE-r-hGrEB9Hpqdgykk,8803
|
70
|
-
docling-2.25.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
71
|
-
docling-2.25.2.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
|
72
|
-
docling-2.25.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|