docling 2.26.0__py3-none-any.whl → 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/asciidoc_backend.py +1 -1
- docling/backend/csv_backend.py +1 -1
- docling/backend/docling_parse_backend.py +21 -13
- docling/backend/docling_parse_v2_backend.py +20 -12
- docling/backend/docling_parse_v4_backend.py +192 -0
- docling/backend/docx/__init__.py +0 -0
- docling/backend/docx/latex/__init__.py +0 -0
- docling/backend/docx/latex/latex_dict.py +271 -0
- docling/backend/docx/latex/omml.py +453 -0
- docling/backend/html_backend.py +7 -7
- docling/backend/md_backend.py +1 -1
- docling/backend/msexcel_backend.py +2 -45
- docling/backend/mspowerpoint_backend.py +19 -1
- docling/backend/msword_backend.py +68 -3
- docling/backend/pdf_backend.py +7 -2
- docling/backend/pypdfium2_backend.py +52 -30
- docling/backend/xml/uspto_backend.py +1 -1
- docling/cli/main.py +135 -53
- docling/cli/models.py +1 -1
- docling/datamodel/base_models.py +8 -10
- docling/datamodel/pipeline_options.py +54 -32
- docling/document_converter.py +5 -5
- docling/models/base_model.py +9 -1
- docling/models/base_ocr_model.py +27 -16
- docling/models/easyocr_model.py +28 -13
- docling/models/factories/__init__.py +27 -0
- docling/models/factories/base_factory.py +122 -0
- docling/models/factories/ocr_factory.py +11 -0
- docling/models/factories/picture_description_factory.py +11 -0
- docling/models/hf_mlx_model.py +137 -0
- docling/models/ocr_mac_model.py +39 -11
- docling/models/page_preprocessing_model.py +4 -0
- docling/models/picture_description_api_model.py +20 -3
- docling/models/picture_description_base_model.py +19 -3
- docling/models/picture_description_vlm_model.py +14 -2
- docling/models/plugins/__init__.py +0 -0
- docling/models/plugins/defaults.py +28 -0
- docling/models/rapid_ocr_model.py +34 -13
- docling/models/table_structure_model.py +13 -4
- docling/models/tesseract_ocr_cli_model.py +40 -15
- docling/models/tesseract_ocr_model.py +37 -12
- docling/pipeline/standard_pdf_pipeline.py +25 -78
- docling/pipeline/vlm_pipeline.py +78 -398
- docling/utils/export.py +8 -6
- docling/utils/layout_postprocessor.py +26 -23
- docling/utils/visualization.py +1 -1
- {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/METADATA +47 -23
- docling-2.28.0.dist-info/RECORD +84 -0
- {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/entry_points.txt +3 -0
- docling-2.26.0.dist-info/RECORD +0 -72
- {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/LICENSE +0 -0
- {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/WHEEL +0 -0
@@ -5,9 +5,10 @@ from collections import defaultdict
|
|
5
5
|
from typing import Dict, List, Set, Tuple
|
6
6
|
|
7
7
|
from docling_core.types.doc import DocItemLabel, Size
|
8
|
+
from docling_core.types.doc.page import TextCell
|
8
9
|
from rtree import index
|
9
10
|
|
10
|
-
from docling.datamodel.base_models import BoundingBox,
|
11
|
+
from docling.datamodel.base_models import BoundingBox, Cluster
|
11
12
|
|
12
13
|
_log = logging.getLogger(__name__)
|
13
14
|
|
@@ -198,7 +199,7 @@ class LayoutPostprocessor:
|
|
198
199
|
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
|
199
200
|
}
|
200
201
|
|
201
|
-
def __init__(self, cells: List[
|
202
|
+
def __init__(self, cells: List[TextCell], clusters: List[Cluster], page_size: Size):
|
202
203
|
"""Initialize processor with cells and clusters."""
|
203
204
|
"""Initialize processor with cells and spatial indices."""
|
204
205
|
self.cells = cells
|
@@ -218,7 +219,7 @@ class LayoutPostprocessor:
|
|
218
219
|
[c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
|
219
220
|
)
|
220
221
|
|
221
|
-
def postprocess(self) -> Tuple[List[Cluster], List[
|
222
|
+
def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
|
222
223
|
"""Main processing pipeline."""
|
223
224
|
self.regular_clusters = self._process_regular_clusters()
|
224
225
|
self.special_clusters = self._process_special_clusters()
|
@@ -271,15 +272,13 @@ class LayoutPostprocessor:
|
|
271
272
|
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
272
273
|
orphan_clusters = []
|
273
274
|
for i, cell in enumerate(unassigned):
|
274
|
-
conf =
|
275
|
-
if isinstance(cell, OcrCell):
|
276
|
-
conf = cell.confidence
|
275
|
+
conf = cell.confidence
|
277
276
|
|
278
277
|
orphan_clusters.append(
|
279
278
|
Cluster(
|
280
279
|
id=next_id + i,
|
281
280
|
label=DocItemLabel.TEXT,
|
282
|
-
bbox=cell.
|
281
|
+
bbox=cell.to_bounding_box(),
|
283
282
|
confidence=conf,
|
284
283
|
cells=[cell],
|
285
284
|
)
|
@@ -557,13 +556,13 @@ class LayoutPostprocessor:
|
|
557
556
|
|
558
557
|
return current_best if current_best else clusters[0]
|
559
558
|
|
560
|
-
def _deduplicate_cells(self, cells: List[
|
559
|
+
def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
|
561
560
|
"""Ensure each cell appears only once, maintaining order of first appearance."""
|
562
561
|
seen_ids = set()
|
563
562
|
unique_cells = []
|
564
563
|
for cell in cells:
|
565
|
-
if cell.
|
566
|
-
seen_ids.add(cell.
|
564
|
+
if cell.index not in seen_ids:
|
565
|
+
seen_ids.add(cell.index)
|
567
566
|
unique_cells.append(cell)
|
568
567
|
return unique_cells
|
569
568
|
|
@@ -582,11 +581,13 @@ class LayoutPostprocessor:
|
|
582
581
|
best_cluster = None
|
583
582
|
|
584
583
|
for cluster in clusters:
|
585
|
-
if cell.
|
584
|
+
if cell.rect.to_bounding_box().area() <= 0:
|
586
585
|
continue
|
587
586
|
|
588
|
-
overlap = cell.
|
589
|
-
|
587
|
+
overlap = cell.rect.to_bounding_box().intersection_area_with(
|
588
|
+
cluster.bbox
|
589
|
+
)
|
590
|
+
overlap_ratio = overlap / cell.rect.to_bounding_box().area()
|
590
591
|
|
591
592
|
if overlap_ratio > best_overlap:
|
592
593
|
best_overlap = overlap_ratio
|
@@ -601,11 +602,13 @@ class LayoutPostprocessor:
|
|
601
602
|
|
602
603
|
return clusters
|
603
604
|
|
604
|
-
def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[
|
605
|
+
def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
|
605
606
|
"""Find cells not assigned to any cluster."""
|
606
|
-
assigned = {cell.
|
607
|
+
assigned = {cell.index for cluster in clusters for cell in cluster.cells}
|
607
608
|
return [
|
608
|
-
cell
|
609
|
+
cell
|
610
|
+
for cell in self.cells
|
611
|
+
if cell.index not in assigned and cell.text.strip()
|
609
612
|
]
|
610
613
|
|
611
614
|
def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
|
@@ -615,10 +618,10 @@ class LayoutPostprocessor:
|
|
615
618
|
continue
|
616
619
|
|
617
620
|
cells_bbox = BoundingBox(
|
618
|
-
l=min(cell.
|
619
|
-
t=min(cell.
|
620
|
-
r=max(cell.
|
621
|
-
b=max(cell.
|
621
|
+
l=min(cell.rect.to_bounding_box().l for cell in cluster.cells),
|
622
|
+
t=min(cell.rect.to_bounding_box().t for cell in cluster.cells),
|
623
|
+
r=max(cell.rect.to_bounding_box().r for cell in cluster.cells),
|
624
|
+
b=max(cell.rect.to_bounding_box().b for cell in cluster.cells),
|
622
625
|
)
|
623
626
|
|
624
627
|
if cluster.label == DocItemLabel.TABLE:
|
@@ -634,9 +637,9 @@ class LayoutPostprocessor:
|
|
634
637
|
|
635
638
|
return clusters
|
636
639
|
|
637
|
-
def _sort_cells(self, cells: List[
|
640
|
+
def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
|
638
641
|
"""Sort cells in native reading order."""
|
639
|
-
return sorted(cells, key=lambda c: (c.
|
642
|
+
return sorted(cells, key=lambda c: (c.index))
|
640
643
|
|
641
644
|
def _sort_clusters(
|
642
645
|
self, clusters: List[Cluster], mode: str = "id"
|
@@ -647,7 +650,7 @@ class LayoutPostprocessor:
|
|
647
650
|
clusters,
|
648
651
|
key=lambda cluster: (
|
649
652
|
(
|
650
|
-
min(cell.
|
653
|
+
min(cell.index for cell in cluster.cells)
|
651
654
|
if cluster.cells
|
652
655
|
else sys.maxsize
|
653
656
|
),
|
docling/utils/visualization.py
CHANGED
@@ -25,7 +25,7 @@ def draw_clusters(
|
|
25
25
|
# Draw cells first (underneath)
|
26
26
|
cell_color = (0, 0, 0, 40) # Transparent black for cells
|
27
27
|
for tc in c.cells:
|
28
|
-
cx0, cy0, cx1, cy1 = tc.
|
28
|
+
cx0, cy0, cx1, cy1 = tc.rect.to_bounding_box().as_tuple()
|
29
29
|
cx0 *= scale_x
|
30
30
|
cx1 *= scale_x
|
31
31
|
cy0 *= scale_x
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.28.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/docling-project/docling
|
6
6
|
License: MIT
|
7
7
|
Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
|
8
8
|
Author: Christoph Auer
|
@@ -28,9 +28,9 @@ Provides-Extra: vlm
|
|
28
28
|
Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
|
29
29
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
30
30
|
Requires-Dist: certifi (>=2024.7.4)
|
31
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
31
|
+
Requires-Dist: docling-core[chunking] (>=2.23.1,<3.0.0)
|
32
32
|
Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
|
33
|
-
Requires-Dist: docling-parse (>=
|
33
|
+
Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
|
34
34
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
35
35
|
Requires-Dist: filetype (>=1.2.0,<2.0.0)
|
36
36
|
Requires-Dist: huggingface_hub (>=0.23,<1)
|
@@ -42,8 +42,10 @@ Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (ex
|
|
42
42
|
Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
|
43
43
|
Requires-Dist: pandas (>=2.1.4,<3.0.0)
|
44
44
|
Requires-Dist: pillow (>=10.0.0,<12.0.0)
|
45
|
+
Requires-Dist: pluggy (>=1.0.0,<2.0.0)
|
45
46
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
46
47
|
Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
|
48
|
+
Requires-Dist: pylatexenc (>=2.10,<3.0)
|
47
49
|
Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
|
48
50
|
Requires-Dist: python-docx (>=1.1.2,<2.0.0)
|
49
51
|
Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
|
@@ -57,12 +59,12 @@ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
|
|
57
59
|
Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
|
58
60
|
Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
|
59
61
|
Requires-Dist: typer (>=0.12.5,<0.13.0)
|
60
|
-
Project-URL: Repository, https://github.com/
|
62
|
+
Project-URL: Repository, https://github.com/docling-project/docling
|
61
63
|
Description-Content-Type: text/markdown
|
62
64
|
|
63
65
|
<p align="center">
|
64
|
-
<a href="https://github.com/
|
65
|
-
<img loading="lazy" alt="Docling" src="https://github.com/
|
66
|
+
<a href="https://github.com/docling-project/docling">
|
67
|
+
<img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
|
66
68
|
</a>
|
67
69
|
</p>
|
68
70
|
|
@@ -73,7 +75,7 @@ Description-Content-Type: text/markdown
|
|
73
75
|
</p>
|
74
76
|
|
75
77
|
[](https://arxiv.org/abs/2408.09869)
|
76
|
-
[](https://
|
78
|
+
[](https://docling-project.github.io/docling/)
|
77
79
|
[](https://pypi.org/project/docling/)
|
78
80
|
[](https://pypi.org/project/docling/)
|
79
81
|
[](https://python-poetry.org/)
|
@@ -81,8 +83,10 @@ Description-Content-Type: text/markdown
|
|
81
83
|
[](https://pycqa.github.io/isort/)
|
82
84
|
[](https://pydantic.dev)
|
83
85
|
[](https://github.com/pre-commit/pre-commit)
|
84
|
-
[](https://opensource.org/licenses/MIT)
|
85
87
|
[](https://pepy.tech/projects/docling)
|
88
|
+
[](https://apify.com/vancura/docling)
|
89
|
+
[](https://lfaidata.foundation/projects/)
|
86
90
|
|
87
91
|
Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
|
88
92
|
|
@@ -95,12 +99,12 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|
95
99
|
* 🔒 Local execution capabilities for sensitive data and air-gapped environments
|
96
100
|
* 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
|
97
101
|
* 🔍 Extensive OCR support for scanned PDFs and images
|
102
|
+
* 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
|
98
103
|
* 💻 Simple and convenient CLI
|
99
104
|
|
100
105
|
### Coming soon
|
101
106
|
|
102
107
|
* 📝 Metadata extraction, including title, authors, references & language
|
103
|
-
* 📝 Inclusion of Visual Language Models ([SmolDocling](https://huggingface.co/blog/smolervlm#smoldocling))
|
104
108
|
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
105
109
|
* 📝 Complex chemistry understanding (Molecular structures)
|
106
110
|
|
@@ -113,11 +117,11 @@ pip install docling
|
|
113
117
|
|
114
118
|
Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
|
115
119
|
|
116
|
-
More [detailed installation instructions](https://
|
120
|
+
More [detailed installation instructions](https://docling-project.github.io/docling/installation/) are available in the docs.
|
117
121
|
|
118
122
|
## Getting started
|
119
123
|
|
120
|
-
To convert individual documents, use `convert()`, for example:
|
124
|
+
To convert individual documents with python, use `convert()`, for example:
|
121
125
|
|
122
126
|
```python
|
123
127
|
from docling.document_converter import DocumentConverter
|
@@ -128,28 +132,44 @@ result = converter.convert(source)
|
|
128
132
|
print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
|
129
133
|
```
|
130
134
|
|
131
|
-
More [advanced usage options](https://
|
135
|
+
More [advanced usage options](https://docling-project.github.io/docling/usage/) are available in
|
132
136
|
the docs.
|
133
137
|
|
138
|
+
## CLI
|
139
|
+
|
140
|
+
Docling has a built-in CLI to run conversions.
|
141
|
+
|
142
|
+
```bash
|
143
|
+
docling https://arxiv.org/pdf/2206.01062
|
144
|
+
```
|
145
|
+
|
146
|
+
You can also use 🥚[SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview) and other VLMs via Docling CLI:
|
147
|
+
```bash
|
148
|
+
docling --pipeline vlm --vlm-model smoldocling https://arxiv.org/pdf/2206.01062
|
149
|
+
```
|
150
|
+
This will use MLX acceleration on supported Apple Silicon hardware.
|
151
|
+
|
152
|
+
Read more [here](https://docling-project.github.io/docling/usage/)
|
153
|
+
|
134
154
|
## Documentation
|
135
155
|
|
136
|
-
Check out Docling's [documentation](https://
|
156
|
+
Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
|
137
157
|
installation, usage, concepts, recipes, extensions, and more.
|
138
158
|
|
139
159
|
## Examples
|
140
160
|
|
141
|
-
Go hands-on with our [examples](https://
|
161
|
+
Go hands-on with our [examples](https://docling-project.github.io/docling/examples/),
|
142
162
|
demonstrating how to address different application use cases with Docling.
|
143
163
|
|
144
164
|
## Integrations
|
145
165
|
|
146
166
|
To further accelerate your AI application development, check out Docling's native
|
147
|
-
[integrations](https://
|
167
|
+
[integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
|
148
168
|
and tools.
|
149
169
|
|
150
170
|
## Get help and support
|
151
171
|
|
152
|
-
Please feel free to connect with us using the [discussion section](https://github.com/
|
172
|
+
Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
|
153
173
|
|
154
174
|
## Technical report
|
155
175
|
|
@@ -157,7 +177,7 @@ For more details on Docling's inner workings, check out the [Docling Technical R
|
|
157
177
|
|
158
178
|
## Contributing
|
159
179
|
|
160
|
-
Please read [Contributing to Docling](https://github.com/
|
180
|
+
Please read [Contributing to Docling](https://github.com/docling-project/docling/blob/main/CONTRIBUTING.md) for details.
|
161
181
|
|
162
182
|
## References
|
163
183
|
|
@@ -181,11 +201,15 @@ If you use Docling in your projects, please consider citing the following:
|
|
181
201
|
The Docling codebase is under MIT license.
|
182
202
|
For individual model usage, please refer to the model licenses found in the original packages.
|
183
203
|
|
184
|
-
##
|
204
|
+
## LF AI & Data
|
205
|
+
|
206
|
+
Docling is hosted as a project in the [LF AI & Data Foundation](https://lfaidata.foundation/projects/).
|
207
|
+
|
208
|
+
### IBM ❤️ Open Source AI
|
185
209
|
|
186
|
-
|
210
|
+
The project was started by the AI for knowledge team at IBM Research Zurich.
|
187
211
|
|
188
|
-
[supported_formats]: https://
|
189
|
-
[docling_document]: https://
|
190
|
-
[integrations]: https://
|
212
|
+
[supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
|
213
|
+
[docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
|
214
|
+
[integrations]: https://docling-project.github.io/docling/integrations/
|
191
215
|
|
@@ -0,0 +1,84 @@
|
|
1
|
+
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
|
4
|
+
docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhXorvZ1k,14060
|
5
|
+
docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
|
6
|
+
docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
|
7
|
+
docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
|
8
|
+
docling/backend/docling_parse_v4_backend.py,sha256=IECMJQWEvYqQv043_1Ho6dLkCbuaK8cMUsqcxwqruXo,6287
|
9
|
+
docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
docling/backend/docx/latex/latex_dict.py,sha256=a0UC3VLmG1BLN-hGmEaQamzKbDB10fCz0U8qRU--aBw,6613
|
12
|
+
docling/backend/docx/latex/omml.py,sha256=U-mQXNCI9ObUyHDxv6ItvaHlObIEu77PiXS1Vaaah6U,12012
|
13
|
+
docling/backend/html_backend.py,sha256=i9a5ucsIuf-sn6M8tmKt9Kg_qWqc5OJxhARb6ZNS3wI,19448
|
14
|
+
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
16
|
+
docling/backend/md_backend.py,sha256=v230PXShYJo2QaabwUHiBpE-EGScHIerjL78zPaJpZM,16837
|
17
|
+
docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
|
18
|
+
docling/backend/mspowerpoint_backend.py,sha256=zXdXr8nGJJbPGTgR5_dqq5WmNL1wDCaK0RqFqtuHPqs,17213
|
19
|
+
docling/backend/msword_backend.py,sha256=VjTvJe249FjHJDBpK0RC4iyosMzmpJLTuFIAPNEdReU,23259
|
20
|
+
docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
|
21
|
+
docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
|
22
|
+
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
|
24
|
+
docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
|
25
|
+
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
26
|
+
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
docling/cli/main.py,sha256=zr36i-itYkX013g_DK6aNiNe8UPaD27_A7UtG5qwLUo,20174
|
28
|
+
docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
|
29
|
+
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
30
|
+
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
+
docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
|
32
|
+
docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
|
33
|
+
docling/datamodel/pipeline_options.py,sha256=TpRf_-7UuCjjaytFWA0nL2m-KP4no9jeAjaXRjBLMLE,12593
|
34
|
+
docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
|
35
|
+
docling/document_converter.py,sha256=LwbnfGzma937EmSrNWMzM-dldI9Cbu4DUgY8gL1OVHo,13184
|
36
|
+
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
37
|
+
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
+
docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
|
39
|
+
docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
|
40
|
+
docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
|
41
|
+
docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
|
42
|
+
docling/models/easyocr_model.py,sha256=ezq3yv5lORe7T1bbSoTZALck2oHqyEHq57cRfhMYCCQ,7401
|
43
|
+
docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq85Q2K_aM,872
|
44
|
+
docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
|
45
|
+
docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
|
46
|
+
docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
|
47
|
+
docling/models/hf_mlx_model.py,sha256=2eSHphJm5LAfiSA24blVMc2znJlKMYrtmmzq8ffc-rU,4924
|
48
|
+
docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
|
49
|
+
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
50
|
+
docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
|
51
|
+
docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
|
52
|
+
docling/models/page_preprocessing_model.py,sha256=wAN2WlW7YnpqyETq6MpEWgUAokUwqGaX_g59sPUQsXo,2903
|
53
|
+
docling/models/picture_description_api_model.py,sha256=SRjOkCTBYa1pTIaQffDLUPabljjYrLOQ916MywESEXk,3715
|
54
|
+
docling/models/picture_description_base_model.py,sha256=uRpjBXC2qjpPyWFUt600N1GvmvF-vWwB8f-OTQ7PfDg,2305
|
55
|
+
docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
|
56
|
+
docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
|
+
docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
|
58
|
+
docling/models/rapid_ocr_model.py,sha256=C_I0Ek9mAPIyTFRHuNbqtXg1c15rLNDE1tJ6_hPIi4c,5869
|
59
|
+
docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
|
60
|
+
docling/models/table_structure_model.py,sha256=_b6-2alzhzI19-thDGpM3mww54mxbHLkEiTYMU84d30,11773
|
61
|
+
docling/models/tesseract_ocr_cli_model.py,sha256=S-rCisPrVa3ASvOWycqQoria0PtmNqgdg8YxrLbG1ww,10067
|
62
|
+
docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q-WgZN1Eg,9256
|
63
|
+
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
+
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
65
|
+
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
66
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
|
67
|
+
docling/pipeline/vlm_pipeline.py,sha256=1eKt3gqWf6PxGvYZuqhKi2BFljJGJWIyHemzOAwa39Y,9065
|
68
|
+
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
69
|
+
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
|
+
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
71
|
+
docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
|
72
|
+
docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
|
73
|
+
docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
|
74
|
+
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
75
|
+
docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
|
76
|
+
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
77
|
+
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
78
|
+
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
79
|
+
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
80
|
+
docling-2.28.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
81
|
+
docling-2.28.0.dist-info/METADATA,sha256=miIkWRX5hgrOeGbyYDAiQaymAR6PxK6Qdlss5DR1YhM,9982
|
82
|
+
docling-2.28.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
83
|
+
docling-2.28.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
|
84
|
+
docling-2.28.0.dist-info/RECORD,,
|
docling-2.26.0.dist-info/RECORD
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
|
4
|
-
docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQQknNXk,14050
|
5
|
-
docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
|
6
|
-
docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
|
7
|
-
docling/backend/docling_parse_v2_backend.py,sha256=oF8W-zuvEfpmyXp7Itt6-ot_feeMneMmSG7CpKclMhc,9005
|
8
|
-
docling/backend/html_backend.py,sha256=qLzNpMpfmllwpp-5uARrmaVyN5D1YOpmsbS3-RyL2p0,19370
|
9
|
-
docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
|
11
|
-
docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
|
12
|
-
docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
|
13
|
-
docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
|
14
|
-
docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
|
15
|
-
docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
|
16
|
-
docling/backend/pypdfium2_backend.py,sha256=l6YfoiIibw-Z4wrRwQTPP96IGOMAf1SIT_TPVBIuZRs,9663
|
17
|
-
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
|
19
|
-
docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
|
20
|
-
docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
|
21
|
-
docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
docling/cli/main.py,sha256=unokSvmqZqFE_yLUQGBIo7q9QjdFrrE8EqnHxnqpGtM,16863
|
23
|
-
docling/cli/models.py,sha256=DDnz-boX2MexPxC8OnOMPgSPG0iwseT3xkkCfgPrZis,3969
|
24
|
-
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
25
|
-
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
-
docling/datamodel/base_models.py,sha256=kMDT-rFhtJUFOOOry4wd2PzCMTLFixFklgSgmRDMS64,7201
|
27
|
-
docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
|
28
|
-
docling/datamodel/pipeline_options.py,sha256=L5ZmMZOkE0T2419uk_butX3ZoY8GhLJcmuGm2Gf1OHU,11991
|
29
|
-
docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
|
30
|
-
docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
|
31
|
-
docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
|
32
|
-
docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
-
docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
|
34
|
-
docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
|
35
|
-
docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
|
36
|
-
docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
|
37
|
-
docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
|
38
|
-
docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
|
39
|
-
docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
|
40
|
-
docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
|
41
|
-
docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
|
42
|
-
docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
|
43
|
-
docling/models/picture_description_api_model.py,sha256=SKNoHpqzbfM8iO-DJJ4ccyNVqO0B2d9neLBnXqt50FY,3186
|
44
|
-
docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
|
45
|
-
docling/models/picture_description_vlm_model.py,sha256=EvKn4zWgTsQnbMFEoDhU3Ox4Pu5DkPqd2QewsGoXULU,3641
|
46
|
-
docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
|
47
|
-
docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
|
48
|
-
docling/models/table_structure_model.py,sha256=gEXHRtHlLFUsP_Gs2EPaBJL-3KlMHa5HLUwzr3kN4_Y,11225
|
49
|
-
docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
|
50
|
-
docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
|
51
|
-
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
-
docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
|
53
|
-
docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
|
54
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=IQHktVYvueTrYnIgLonaMvfYKKsU3L-hC9dqrR-Lw8g,12904
|
55
|
-
docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
|
56
|
-
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
57
|
-
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
|
-
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
59
|
-
docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
|
60
|
-
docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
|
61
|
-
docling/utils/layout_postprocessor.py,sha256=kdIk5TpAEXvsQUvkdALBDnAbjc4I_j8s8w6GEvbu4f0,24304
|
62
|
-
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
63
|
-
docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
|
64
|
-
docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
|
65
|
-
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
66
|
-
docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
|
67
|
-
docling/utils/visualization.py,sha256=cmbIroPQXPmJdFrNIfpC26WpijBwx05qmpu3QhiG1EI,2850
|
68
|
-
docling-2.26.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
69
|
-
docling-2.26.0.dist-info/METADATA,sha256=IPh-vv9mpl1sHnl4pkEsLGrdYeBlaJ-mfN28sn_zito,8803
|
70
|
-
docling-2.26.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
71
|
-
docling-2.26.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
|
72
|
-
docling-2.26.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|