docling 2.26.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. docling/backend/asciidoc_backend.py +1 -1
  2. docling/backend/csv_backend.py +1 -1
  3. docling/backend/docling_parse_backend.py +21 -13
  4. docling/backend/docling_parse_v2_backend.py +20 -12
  5. docling/backend/docling_parse_v4_backend.py +192 -0
  6. docling/backend/docx/__init__.py +0 -0
  7. docling/backend/docx/latex/__init__.py +0 -0
  8. docling/backend/docx/latex/latex_dict.py +271 -0
  9. docling/backend/docx/latex/omml.py +453 -0
  10. docling/backend/html_backend.py +7 -7
  11. docling/backend/md_backend.py +1 -1
  12. docling/backend/msexcel_backend.py +2 -45
  13. docling/backend/mspowerpoint_backend.py +19 -1
  14. docling/backend/msword_backend.py +68 -3
  15. docling/backend/pdf_backend.py +7 -2
  16. docling/backend/pypdfium2_backend.py +52 -30
  17. docling/backend/xml/uspto_backend.py +1 -1
  18. docling/cli/main.py +135 -53
  19. docling/cli/models.py +1 -1
  20. docling/datamodel/base_models.py +8 -10
  21. docling/datamodel/pipeline_options.py +54 -32
  22. docling/document_converter.py +5 -5
  23. docling/models/base_model.py +9 -1
  24. docling/models/base_ocr_model.py +27 -16
  25. docling/models/easyocr_model.py +28 -13
  26. docling/models/factories/__init__.py +27 -0
  27. docling/models/factories/base_factory.py +122 -0
  28. docling/models/factories/ocr_factory.py +11 -0
  29. docling/models/factories/picture_description_factory.py +11 -0
  30. docling/models/hf_mlx_model.py +137 -0
  31. docling/models/ocr_mac_model.py +39 -11
  32. docling/models/page_preprocessing_model.py +4 -0
  33. docling/models/picture_description_api_model.py +20 -3
  34. docling/models/picture_description_base_model.py +19 -3
  35. docling/models/picture_description_vlm_model.py +14 -2
  36. docling/models/plugins/__init__.py +0 -0
  37. docling/models/plugins/defaults.py +28 -0
  38. docling/models/rapid_ocr_model.py +34 -13
  39. docling/models/table_structure_model.py +13 -4
  40. docling/models/tesseract_ocr_cli_model.py +40 -15
  41. docling/models/tesseract_ocr_model.py +37 -12
  42. docling/pipeline/standard_pdf_pipeline.py +25 -78
  43. docling/pipeline/vlm_pipeline.py +78 -398
  44. docling/utils/export.py +8 -6
  45. docling/utils/layout_postprocessor.py +26 -23
  46. docling/utils/visualization.py +1 -1
  47. {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/METADATA +47 -23
  48. docling-2.28.0.dist-info/RECORD +84 -0
  49. {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/entry_points.txt +3 -0
  50. docling-2.26.0.dist-info/RECORD +0 -72
  51. {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/LICENSE +0 -0
  52. {docling-2.26.0.dist-info → docling-2.28.0.dist-info}/WHEEL +0 -0
@@ -5,9 +5,10 @@ from collections import defaultdict
5
5
  from typing import Dict, List, Set, Tuple
6
6
 
7
7
  from docling_core.types.doc import DocItemLabel, Size
8
+ from docling_core.types.doc.page import TextCell
8
9
  from rtree import index
9
10
 
10
- from docling.datamodel.base_models import BoundingBox, Cell, Cluster, OcrCell
11
+ from docling.datamodel.base_models import BoundingBox, Cluster
11
12
 
12
13
  _log = logging.getLogger(__name__)
13
14
 
@@ -198,7 +199,7 @@ class LayoutPostprocessor:
198
199
  DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
199
200
  }
200
201
 
201
- def __init__(self, cells: List[Cell], clusters: List[Cluster], page_size: Size):
202
+ def __init__(self, cells: List[TextCell], clusters: List[Cluster], page_size: Size):
202
203
  """Initialize processor with cells and clusters."""
203
204
  """Initialize processor with cells and spatial indices."""
204
205
  self.cells = cells
@@ -218,7 +219,7 @@ class LayoutPostprocessor:
218
219
  [c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
219
220
  )
220
221
 
221
- def postprocess(self) -> Tuple[List[Cluster], List[Cell]]:
222
+ def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
222
223
  """Main processing pipeline."""
223
224
  self.regular_clusters = self._process_regular_clusters()
224
225
  self.special_clusters = self._process_special_clusters()
@@ -271,15 +272,13 @@ class LayoutPostprocessor:
271
272
  next_id = max((c.id for c in self.all_clusters), default=0) + 1
272
273
  orphan_clusters = []
273
274
  for i, cell in enumerate(unassigned):
274
- conf = 1.0
275
- if isinstance(cell, OcrCell):
276
- conf = cell.confidence
275
+ conf = cell.confidence
277
276
 
278
277
  orphan_clusters.append(
279
278
  Cluster(
280
279
  id=next_id + i,
281
280
  label=DocItemLabel.TEXT,
282
- bbox=cell.bbox,
281
+ bbox=cell.to_bounding_box(),
283
282
  confidence=conf,
284
283
  cells=[cell],
285
284
  )
@@ -557,13 +556,13 @@ class LayoutPostprocessor:
557
556
 
558
557
  return current_best if current_best else clusters[0]
559
558
 
560
- def _deduplicate_cells(self, cells: List[Cell]) -> List[Cell]:
559
+ def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
561
560
  """Ensure each cell appears only once, maintaining order of first appearance."""
562
561
  seen_ids = set()
563
562
  unique_cells = []
564
563
  for cell in cells:
565
- if cell.id not in seen_ids:
566
- seen_ids.add(cell.id)
564
+ if cell.index not in seen_ids:
565
+ seen_ids.add(cell.index)
567
566
  unique_cells.append(cell)
568
567
  return unique_cells
569
568
 
@@ -582,11 +581,13 @@ class LayoutPostprocessor:
582
581
  best_cluster = None
583
582
 
584
583
  for cluster in clusters:
585
- if cell.bbox.area() <= 0:
584
+ if cell.rect.to_bounding_box().area() <= 0:
586
585
  continue
587
586
 
588
- overlap = cell.bbox.intersection_area_with(cluster.bbox)
589
- overlap_ratio = overlap / cell.bbox.area()
587
+ overlap = cell.rect.to_bounding_box().intersection_area_with(
588
+ cluster.bbox
589
+ )
590
+ overlap_ratio = overlap / cell.rect.to_bounding_box().area()
590
591
 
591
592
  if overlap_ratio > best_overlap:
592
593
  best_overlap = overlap_ratio
@@ -601,11 +602,13 @@ class LayoutPostprocessor:
601
602
 
602
603
  return clusters
603
604
 
604
- def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[Cell]:
605
+ def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
605
606
  """Find cells not assigned to any cluster."""
606
- assigned = {cell.id for cluster in clusters for cell in cluster.cells}
607
+ assigned = {cell.index for cluster in clusters for cell in cluster.cells}
607
608
  return [
608
- cell for cell in self.cells if cell.id not in assigned and cell.text.strip()
609
+ cell
610
+ for cell in self.cells
611
+ if cell.index not in assigned and cell.text.strip()
609
612
  ]
610
613
 
611
614
  def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
@@ -615,10 +618,10 @@ class LayoutPostprocessor:
615
618
  continue
616
619
 
617
620
  cells_bbox = BoundingBox(
618
- l=min(cell.bbox.l for cell in cluster.cells),
619
- t=min(cell.bbox.t for cell in cluster.cells),
620
- r=max(cell.bbox.r for cell in cluster.cells),
621
- b=max(cell.bbox.b for cell in cluster.cells),
621
+ l=min(cell.rect.to_bounding_box().l for cell in cluster.cells),
622
+ t=min(cell.rect.to_bounding_box().t for cell in cluster.cells),
623
+ r=max(cell.rect.to_bounding_box().r for cell in cluster.cells),
624
+ b=max(cell.rect.to_bounding_box().b for cell in cluster.cells),
622
625
  )
623
626
 
624
627
  if cluster.label == DocItemLabel.TABLE:
@@ -634,9 +637,9 @@ class LayoutPostprocessor:
634
637
 
635
638
  return clusters
636
639
 
637
- def _sort_cells(self, cells: List[Cell]) -> List[Cell]:
640
+ def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
638
641
  """Sort cells in native reading order."""
639
- return sorted(cells, key=lambda c: (c.id))
642
+ return sorted(cells, key=lambda c: (c.index))
640
643
 
641
644
  def _sort_clusters(
642
645
  self, clusters: List[Cluster], mode: str = "id"
@@ -647,7 +650,7 @@ class LayoutPostprocessor:
647
650
  clusters,
648
651
  key=lambda cluster: (
649
652
  (
650
- min(cell.id for cell in cluster.cells)
653
+ min(cell.index for cell in cluster.cells)
651
654
  if cluster.cells
652
655
  else sys.maxsize
653
656
  ),
@@ -25,7 +25,7 @@ def draw_clusters(
25
25
  # Draw cells first (underneath)
26
26
  cell_color = (0, 0, 0, 40) # Transparent black for cells
27
27
  for tc in c.cells:
28
- cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
28
+ cx0, cy0, cx1, cy1 = tc.rect.to_bounding_box().as_tuple()
29
29
  cx0 *= scale_x
30
30
  cx1 *= scale_x
31
31
  cy0 *= scale_x
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.26.0
3
+ Version: 2.28.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
- Home-page: https://github.com/DS4SD/docling
5
+ Home-page: https://github.com/docling-project/docling
6
6
  License: MIT
7
7
  Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
8
8
  Author: Christoph Auer
@@ -28,9 +28,9 @@ Provides-Extra: vlm
28
28
  Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
29
29
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
30
  Requires-Dist: certifi (>=2024.7.4)
31
- Requires-Dist: docling-core[chunking] (>=2.19.0,<3.0.0)
31
+ Requires-Dist: docling-core[chunking] (>=2.23.1,<3.0.0)
32
32
  Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
33
- Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
33
+ Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
34
34
  Requires-Dist: easyocr (>=1.7,<2.0)
35
35
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
36
36
  Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -42,8 +42,10 @@ Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (ex
42
42
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
43
43
  Requires-Dist: pandas (>=2.1.4,<3.0.0)
44
44
  Requires-Dist: pillow (>=10.0.0,<12.0.0)
45
+ Requires-Dist: pluggy (>=1.0.0,<2.0.0)
45
46
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
46
47
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
48
+ Requires-Dist: pylatexenc (>=2.10,<3.0)
47
49
  Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
48
50
  Requires-Dist: python-docx (>=1.1.2,<2.0.0)
49
51
  Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
@@ -57,12 +59,12 @@ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
57
59
  Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
58
60
  Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
59
61
  Requires-Dist: typer (>=0.12.5,<0.13.0)
60
- Project-URL: Repository, https://github.com/DS4SD/docling
62
+ Project-URL: Repository, https://github.com/docling-project/docling
61
63
  Description-Content-Type: text/markdown
62
64
 
63
65
  <p align="center">
64
- <a href="https://github.com/ds4sd/docling">
65
- <img loading="lazy" alt="Docling" src="https://github.com/DS4SD/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
66
+ <a href="https://github.com/docling-project/docling">
67
+ <img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
66
68
  </a>
67
69
  </p>
68
70
 
@@ -73,7 +75,7 @@ Description-Content-Type: text/markdown
73
75
  </p>
74
76
 
75
77
  [![arXiv](https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg)](https://arxiv.org/abs/2408.09869)
76
- [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://ds4sd.github.io/docling/)
78
+ [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
77
79
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
78
80
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
79
81
  [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
@@ -81,8 +83,10 @@ Description-Content-Type: text/markdown
81
83
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
82
84
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
83
85
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
84
- [![License MIT](https://img.shields.io/github/license/DS4SD/docling)](https://opensource.org/licenses/MIT)
86
+ [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
85
87
  [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
88
+ [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
89
+ [![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
86
90
 
87
91
  Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
88
92
 
@@ -95,12 +99,12 @@ Docling simplifies document processing, parsing diverse formats — including ad
95
99
  * 🔒 Local execution capabilities for sensitive data and air-gapped environments
96
100
  * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
97
101
  * 🔍 Extensive OCR support for scanned PDFs and images
102
+ * 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
98
103
  * 💻 Simple and convenient CLI
99
104
 
100
105
  ### Coming soon
101
106
 
102
107
  * 📝 Metadata extraction, including title, authors, references & language
103
- * 📝 Inclusion of Visual Language Models ([SmolDocling](https://huggingface.co/blog/smolervlm#smoldocling))
104
108
  * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
105
109
  * 📝 Complex chemistry understanding (Molecular structures)
106
110
 
@@ -113,11 +117,11 @@ pip install docling
113
117
 
114
118
  Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
115
119
 
116
- More [detailed installation instructions](https://ds4sd.github.io/docling/installation/) are available in the docs.
120
+ More [detailed installation instructions](https://docling-project.github.io/docling/installation/) are available in the docs.
117
121
 
118
122
  ## Getting started
119
123
 
120
- To convert individual documents, use `convert()`, for example:
124
+ To convert individual documents with python, use `convert()`, for example:
121
125
 
122
126
  ```python
123
127
  from docling.document_converter import DocumentConverter
@@ -128,28 +132,44 @@ result = converter.convert(source)
128
132
  print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
129
133
  ```
130
134
 
131
- More [advanced usage options](https://ds4sd.github.io/docling/usage/) are available in
135
+ More [advanced usage options](https://docling-project.github.io/docling/usage/) are available in
132
136
  the docs.
133
137
 
138
+ ## CLI
139
+
140
+ Docling has a built-in CLI to run conversions.
141
+
142
+ ```bash
143
+ docling https://arxiv.org/pdf/2206.01062
144
+ ```
145
+
146
+ You can also use 🥚[SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview) and other VLMs via Docling CLI:
147
+ ```bash
148
+ docling --pipeline vlm --vlm-model smoldocling https://arxiv.org/pdf/2206.01062
149
+ ```
150
+ This will use MLX acceleration on supported Apple Silicon hardware.
151
+
152
+ Read more [here](https://docling-project.github.io/docling/usage/)
153
+
134
154
  ## Documentation
135
155
 
136
- Check out Docling's [documentation](https://ds4sd.github.io/docling/), for details on
156
+ Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
137
157
  installation, usage, concepts, recipes, extensions, and more.
138
158
 
139
159
  ## Examples
140
160
 
141
- Go hands-on with our [examples](https://ds4sd.github.io/docling/examples/),
161
+ Go hands-on with our [examples](https://docling-project.github.io/docling/examples/),
142
162
  demonstrating how to address different application use cases with Docling.
143
163
 
144
164
  ## Integrations
145
165
 
146
166
  To further accelerate your AI application development, check out Docling's native
147
- [integrations](https://ds4sd.github.io/docling/integrations/) with popular frameworks
167
+ [integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
148
168
  and tools.
149
169
 
150
170
  ## Get help and support
151
171
 
152
- Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling/discussions).
172
+ Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
153
173
 
154
174
  ## Technical report
155
175
 
@@ -157,7 +177,7 @@ For more details on Docling's inner workings, check out the [Docling Technical R
157
177
 
158
178
  ## Contributing
159
179
 
160
- Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details.
180
+ Please read [Contributing to Docling](https://github.com/docling-project/docling/blob/main/CONTRIBUTING.md) for details.
161
181
 
162
182
  ## References
163
183
 
@@ -181,11 +201,15 @@ If you use Docling in your projects, please consider citing the following:
181
201
  The Docling codebase is under MIT license.
182
202
  For individual model usage, please refer to the model licenses found in the original packages.
183
203
 
184
- ## IBM ❤️ Open Source AI
204
+ ## LF AI & Data
205
+
206
+ Docling is hosted as a project in the [LF AI & Data Foundation](https://lfaidata.foundation/projects/).
207
+
208
+ ### IBM ❤️ Open Source AI
185
209
 
186
- Docling has been brought to you by IBM.
210
+ The project was started by the AI for knowledge team at IBM Research Zurich.
187
211
 
188
- [supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
189
- [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
190
- [integrations]: https://ds4sd.github.io/docling/integrations/
212
+ [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
213
+ [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
214
+ [integrations]: https://docling-project.github.io/docling/integrations/
191
215
 
@@ -0,0 +1,84 @@
1
+ docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
+ docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhXorvZ1k,14060
5
+ docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
6
+ docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
7
+ docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
8
+ docling/backend/docling_parse_v4_backend.py,sha256=IECMJQWEvYqQv043_1Ho6dLkCbuaK8cMUsqcxwqruXo,6287
9
+ docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ docling/backend/docx/latex/latex_dict.py,sha256=a0UC3VLmG1BLN-hGmEaQamzKbDB10fCz0U8qRU--aBw,6613
12
+ docling/backend/docx/latex/omml.py,sha256=U-mQXNCI9ObUyHDxv6ItvaHlObIEu77PiXS1Vaaah6U,12012
13
+ docling/backend/html_backend.py,sha256=i9a5ucsIuf-sn6M8tmKt9Kg_qWqc5OJxhARb6ZNS3wI,19448
14
+ docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
16
+ docling/backend/md_backend.py,sha256=v230PXShYJo2QaabwUHiBpE-EGScHIerjL78zPaJpZM,16837
17
+ docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
18
+ docling/backend/mspowerpoint_backend.py,sha256=zXdXr8nGJJbPGTgR5_dqq5WmNL1wDCaK0RqFqtuHPqs,17213
19
+ docling/backend/msword_backend.py,sha256=VjTvJe249FjHJDBpK0RC4iyosMzmpJLTuFIAPNEdReU,23259
20
+ docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
21
+ docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
22
+ docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
24
+ docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
25
+ docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
26
+ docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ docling/cli/main.py,sha256=zr36i-itYkX013g_DK6aNiNe8UPaD27_A7UtG5qwLUo,20174
28
+ docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
29
+ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
30
+ docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
32
+ docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
33
+ docling/datamodel/pipeline_options.py,sha256=TpRf_-7UuCjjaytFWA0nL2m-KP4no9jeAjaXRjBLMLE,12593
34
+ docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
35
+ docling/document_converter.py,sha256=LwbnfGzma937EmSrNWMzM-dldI9Cbu4DUgY8gL1OVHo,13184
36
+ docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
37
+ docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
39
+ docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
40
+ docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
41
+ docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
42
+ docling/models/easyocr_model.py,sha256=ezq3yv5lORe7T1bbSoTZALck2oHqyEHq57cRfhMYCCQ,7401
43
+ docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq85Q2K_aM,872
44
+ docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
45
+ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
46
+ docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
47
+ docling/models/hf_mlx_model.py,sha256=2eSHphJm5LAfiSA24blVMc2znJlKMYrtmmzq8ffc-rU,4924
48
+ docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
49
+ docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
50
+ docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
51
+ docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
52
+ docling/models/page_preprocessing_model.py,sha256=wAN2WlW7YnpqyETq6MpEWgUAokUwqGaX_g59sPUQsXo,2903
53
+ docling/models/picture_description_api_model.py,sha256=SRjOkCTBYa1pTIaQffDLUPabljjYrLOQ916MywESEXk,3715
54
+ docling/models/picture_description_base_model.py,sha256=uRpjBXC2qjpPyWFUt600N1GvmvF-vWwB8f-OTQ7PfDg,2305
55
+ docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
56
+ docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
58
+ docling/models/rapid_ocr_model.py,sha256=C_I0Ek9mAPIyTFRHuNbqtXg1c15rLNDE1tJ6_hPIi4c,5869
59
+ docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
60
+ docling/models/table_structure_model.py,sha256=_b6-2alzhzI19-thDGpM3mww54mxbHLkEiTYMU84d30,11773
61
+ docling/models/tesseract_ocr_cli_model.py,sha256=S-rCisPrVa3ASvOWycqQoria0PtmNqgdg8YxrLbG1ww,10067
62
+ docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q-WgZN1Eg,9256
63
+ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
65
+ docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
66
+ docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
67
+ docling/pipeline/vlm_pipeline.py,sha256=1eKt3gqWf6PxGvYZuqhKi2BFljJGJWIyHemzOAwa39Y,9065
68
+ docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
69
+ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
+ docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
71
+ docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
72
+ docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
73
+ docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
74
+ docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
75
+ docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
76
+ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
77
+ docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
78
+ docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
79
+ docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
80
+ docling-2.28.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
81
+ docling-2.28.0.dist-info/METADATA,sha256=miIkWRX5hgrOeGbyYDAiQaymAR6PxK6Qdlss5DR1YhM,9982
82
+ docling-2.28.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
83
+ docling-2.28.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
84
+ docling-2.28.0.dist-info/RECORD,,
@@ -2,3 +2,6 @@
2
2
  docling=docling.cli.main:app
3
3
  docling-tools=docling.cli.tools:app
4
4
 
5
+ [docling]
6
+ docling_defaults=docling.models.plugins.defaults
7
+
@@ -1,72 +0,0 @@
1
- docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
- docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQQknNXk,14050
5
- docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
6
- docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
7
- docling/backend/docling_parse_v2_backend.py,sha256=oF8W-zuvEfpmyXp7Itt6-ot_feeMneMmSG7CpKclMhc,9005
8
- docling/backend/html_backend.py,sha256=qLzNpMpfmllwpp-5uARrmaVyN5D1YOpmsbS3-RyL2p0,19370
9
- docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
11
- docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
12
- docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
13
- docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
14
- docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
15
- docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
16
- docling/backend/pypdfium2_backend.py,sha256=l6YfoiIibw-Z4wrRwQTPP96IGOMAf1SIT_TPVBIuZRs,9663
17
- docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
19
- docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
20
- docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
21
- docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- docling/cli/main.py,sha256=unokSvmqZqFE_yLUQGBIo7q9QjdFrrE8EqnHxnqpGtM,16863
23
- docling/cli/models.py,sha256=DDnz-boX2MexPxC8OnOMPgSPG0iwseT3xkkCfgPrZis,3969
24
- docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
25
- docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- docling/datamodel/base_models.py,sha256=kMDT-rFhtJUFOOOry4wd2PzCMTLFixFklgSgmRDMS64,7201
27
- docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
28
- docling/datamodel/pipeline_options.py,sha256=L5ZmMZOkE0T2419uk_butX3ZoY8GhLJcmuGm2Gf1OHU,11991
29
- docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
30
- docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
31
- docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
32
- docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
34
- docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
35
- docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
36
- docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
37
- docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
38
- docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
39
- docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
40
- docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
41
- docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
42
- docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
43
- docling/models/picture_description_api_model.py,sha256=SKNoHpqzbfM8iO-DJJ4ccyNVqO0B2d9neLBnXqt50FY,3186
44
- docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
45
- docling/models/picture_description_vlm_model.py,sha256=EvKn4zWgTsQnbMFEoDhU3Ox4Pu5DkPqd2QewsGoXULU,3641
46
- docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
47
- docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
48
- docling/models/table_structure_model.py,sha256=gEXHRtHlLFUsP_Gs2EPaBJL-3KlMHa5HLUwzr3kN4_Y,11225
49
- docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
50
- docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
51
- docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
53
- docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
54
- docling/pipeline/standard_pdf_pipeline.py,sha256=IQHktVYvueTrYnIgLonaMvfYKKsU3L-hC9dqrR-Lw8g,12904
55
- docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
56
- docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
57
- docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
59
- docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
60
- docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
61
- docling/utils/layout_postprocessor.py,sha256=kdIk5TpAEXvsQUvkdALBDnAbjc4I_j8s8w6GEvbu4f0,24304
62
- docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
63
- docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
64
- docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
65
- docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
66
- docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
67
- docling/utils/visualization.py,sha256=cmbIroPQXPmJdFrNIfpC26WpijBwx05qmpu3QhiG1EI,2850
68
- docling-2.26.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
69
- docling-2.26.0.dist-info/METADATA,sha256=IPh-vv9mpl1sHnl4pkEsLGrdYeBlaJ-mfN28sn_zito,8803
70
- docling-2.26.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
71
- docling-2.26.0.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
72
- docling-2.26.0.dist-info/RECORD,,