docling 2.25.2__py3-none-any.whl → 2.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. docling/backend/asciidoc_backend.py +1 -1
  2. docling/backend/csv_backend.py +1 -1
  3. docling/backend/docling_parse_backend.py +21 -13
  4. docling/backend/docling_parse_v2_backend.py +20 -12
  5. docling/backend/docling_parse_v4_backend.py +185 -0
  6. docling/backend/docx/__init__.py +0 -0
  7. docling/backend/docx/latex/__init__.py +0 -0
  8. docling/backend/docx/latex/latex_dict.py +271 -0
  9. docling/backend/docx/latex/omml.py +453 -0
  10. docling/backend/html_backend.py +7 -7
  11. docling/backend/md_backend.py +1 -1
  12. docling/backend/msexcel_backend.py +2 -45
  13. docling/backend/mspowerpoint_backend.py +1 -1
  14. docling/backend/msword_backend.py +65 -3
  15. docling/backend/pdf_backend.py +7 -2
  16. docling/backend/pypdfium2_backend.py +52 -30
  17. docling/backend/xml/uspto_backend.py +1 -1
  18. docling/cli/main.py +62 -23
  19. docling/cli/models.py +1 -1
  20. docling/datamodel/base_models.py +8 -10
  21. docling/datamodel/pipeline_options.py +27 -31
  22. docling/document_converter.py +5 -5
  23. docling/models/base_model.py +9 -1
  24. docling/models/base_ocr_model.py +27 -16
  25. docling/models/code_formula_model.py +84 -5
  26. docling/models/document_picture_classifier.py +1 -1
  27. docling/models/easyocr_model.py +28 -13
  28. docling/models/factories/__init__.py +27 -0
  29. docling/models/factories/base_factory.py +122 -0
  30. docling/models/factories/ocr_factory.py +11 -0
  31. docling/models/factories/picture_description_factory.py +11 -0
  32. docling/models/ocr_mac_model.py +39 -11
  33. docling/models/page_preprocessing_model.py +4 -0
  34. docling/models/picture_description_api_model.py +20 -3
  35. docling/models/picture_description_base_model.py +19 -3
  36. docling/models/picture_description_vlm_model.py +14 -2
  37. docling/models/plugins/__init__.py +0 -0
  38. docling/models/plugins/defaults.py +28 -0
  39. docling/models/rapid_ocr_model.py +34 -13
  40. docling/models/table_structure_model.py +14 -5
  41. docling/models/tesseract_ocr_cli_model.py +40 -15
  42. docling/models/tesseract_ocr_model.py +37 -12
  43. docling/pipeline/standard_pdf_pipeline.py +25 -78
  44. docling/utils/export.py +8 -6
  45. docling/utils/layout_postprocessor.py +26 -23
  46. docling/utils/visualization.py +1 -1
  47. {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/METADATA +48 -19
  48. docling-2.27.0.dist-info/RECORD +83 -0
  49. {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/entry_points.txt +3 -0
  50. docling-2.25.2.dist-info/RECORD +0 -72
  51. {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/LICENSE +0 -0
  52. {docling-2.25.2.dist-info → docling-2.27.0.dist-info}/WHEEL +0 -0
@@ -5,9 +5,10 @@ from collections import defaultdict
5
5
  from typing import Dict, List, Set, Tuple
6
6
 
7
7
  from docling_core.types.doc import DocItemLabel, Size
8
+ from docling_core.types.doc.page import TextCell
8
9
  from rtree import index
9
10
 
10
- from docling.datamodel.base_models import BoundingBox, Cell, Cluster, OcrCell
11
+ from docling.datamodel.base_models import BoundingBox, Cluster
11
12
 
12
13
  _log = logging.getLogger(__name__)
13
14
 
@@ -198,7 +199,7 @@ class LayoutPostprocessor:
198
199
  DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
199
200
  }
200
201
 
201
- def __init__(self, cells: List[Cell], clusters: List[Cluster], page_size: Size):
202
+ def __init__(self, cells: List[TextCell], clusters: List[Cluster], page_size: Size):
202
203
  """Initialize processor with cells and clusters."""
203
204
  """Initialize processor with cells and spatial indices."""
204
205
  self.cells = cells
@@ -218,7 +219,7 @@ class LayoutPostprocessor:
218
219
  [c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
219
220
  )
220
221
 
221
- def postprocess(self) -> Tuple[List[Cluster], List[Cell]]:
222
+ def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
222
223
  """Main processing pipeline."""
223
224
  self.regular_clusters = self._process_regular_clusters()
224
225
  self.special_clusters = self._process_special_clusters()
@@ -271,15 +272,13 @@ class LayoutPostprocessor:
271
272
  next_id = max((c.id for c in self.all_clusters), default=0) + 1
272
273
  orphan_clusters = []
273
274
  for i, cell in enumerate(unassigned):
274
- conf = 1.0
275
- if isinstance(cell, OcrCell):
276
- conf = cell.confidence
275
+ conf = cell.confidence
277
276
 
278
277
  orphan_clusters.append(
279
278
  Cluster(
280
279
  id=next_id + i,
281
280
  label=DocItemLabel.TEXT,
282
- bbox=cell.bbox,
281
+ bbox=cell.to_bounding_box(),
283
282
  confidence=conf,
284
283
  cells=[cell],
285
284
  )
@@ -557,13 +556,13 @@ class LayoutPostprocessor:
557
556
 
558
557
  return current_best if current_best else clusters[0]
559
558
 
560
- def _deduplicate_cells(self, cells: List[Cell]) -> List[Cell]:
559
+ def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
561
560
  """Ensure each cell appears only once, maintaining order of first appearance."""
562
561
  seen_ids = set()
563
562
  unique_cells = []
564
563
  for cell in cells:
565
- if cell.id not in seen_ids:
566
- seen_ids.add(cell.id)
564
+ if cell.index not in seen_ids:
565
+ seen_ids.add(cell.index)
567
566
  unique_cells.append(cell)
568
567
  return unique_cells
569
568
 
@@ -582,11 +581,13 @@ class LayoutPostprocessor:
582
581
  best_cluster = None
583
582
 
584
583
  for cluster in clusters:
585
- if cell.bbox.area() <= 0:
584
+ if cell.rect.to_bounding_box().area() <= 0:
586
585
  continue
587
586
 
588
- overlap = cell.bbox.intersection_area_with(cluster.bbox)
589
- overlap_ratio = overlap / cell.bbox.area()
587
+ overlap = cell.rect.to_bounding_box().intersection_area_with(
588
+ cluster.bbox
589
+ )
590
+ overlap_ratio = overlap / cell.rect.to_bounding_box().area()
590
591
 
591
592
  if overlap_ratio > best_overlap:
592
593
  best_overlap = overlap_ratio
@@ -601,11 +602,13 @@ class LayoutPostprocessor:
601
602
 
602
603
  return clusters
603
604
 
604
- def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[Cell]:
605
+ def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
605
606
  """Find cells not assigned to any cluster."""
606
- assigned = {cell.id for cluster in clusters for cell in cluster.cells}
607
+ assigned = {cell.index for cluster in clusters for cell in cluster.cells}
607
608
  return [
608
- cell for cell in self.cells if cell.id not in assigned and cell.text.strip()
609
+ cell
610
+ for cell in self.cells
611
+ if cell.index not in assigned and cell.text.strip()
609
612
  ]
610
613
 
611
614
  def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
@@ -615,10 +618,10 @@ class LayoutPostprocessor:
615
618
  continue
616
619
 
617
620
  cells_bbox = BoundingBox(
618
- l=min(cell.bbox.l for cell in cluster.cells),
619
- t=min(cell.bbox.t for cell in cluster.cells),
620
- r=max(cell.bbox.r for cell in cluster.cells),
621
- b=max(cell.bbox.b for cell in cluster.cells),
621
+ l=min(cell.rect.to_bounding_box().l for cell in cluster.cells),
622
+ t=min(cell.rect.to_bounding_box().t for cell in cluster.cells),
623
+ r=max(cell.rect.to_bounding_box().r for cell in cluster.cells),
624
+ b=max(cell.rect.to_bounding_box().b for cell in cluster.cells),
622
625
  )
623
626
 
624
627
  if cluster.label == DocItemLabel.TABLE:
@@ -634,9 +637,9 @@ class LayoutPostprocessor:
634
637
 
635
638
  return clusters
636
639
 
637
- def _sort_cells(self, cells: List[Cell]) -> List[Cell]:
640
+ def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
638
641
  """Sort cells in native reading order."""
639
- return sorted(cells, key=lambda c: (c.id))
642
+ return sorted(cells, key=lambda c: (c.index))
640
643
 
641
644
  def _sort_clusters(
642
645
  self, clusters: List[Cluster], mode: str = "id"
@@ -647,7 +650,7 @@ class LayoutPostprocessor:
647
650
  clusters,
648
651
  key=lambda cluster: (
649
652
  (
650
- min(cell.id for cell in cluster.cells)
653
+ min(cell.index for cell in cluster.cells)
651
654
  if cluster.cells
652
655
  else sys.maxsize
653
656
  ),
@@ -25,7 +25,7 @@ def draw_clusters(
25
25
  # Draw cells first (underneath)
26
26
  cell_color = (0, 0, 0, 40) # Transparent black for cells
27
27
  for tc in c.cells:
28
- cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
28
+ cx0, cy0, cx1, cy1 = tc.rect.to_bounding_box().as_tuple()
29
29
  cx0 *= scale_x
30
30
  cx1 *= scale_x
31
31
  cy0 *= scale_x
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.25.2
3
+ Version: 2.27.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
- Home-page: https://github.com/DS4SD/docling
5
+ Home-page: https://github.com/docling-project/docling
6
6
  License: MIT
7
7
  Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
8
8
  Author: Christoph Auer
@@ -28,9 +28,9 @@ Provides-Extra: vlm
28
28
  Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
29
29
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
30
  Requires-Dist: certifi (>=2024.7.4)
31
- Requires-Dist: docling-core[chunking] (>=2.19.0,<3.0.0)
31
+ Requires-Dist: docling-core[chunking] (>=2.23.0,<3.0.0)
32
32
  Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
33
- Requires-Dist: docling-parse (>=3.3.0,<4.0.0)
33
+ Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
34
34
  Requires-Dist: easyocr (>=1.7,<2.0)
35
35
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
36
36
  Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -42,8 +42,10 @@ Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (ex
42
42
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
43
43
  Requires-Dist: pandas (>=2.1.4,<3.0.0)
44
44
  Requires-Dist: pillow (>=10.0.0,<12.0.0)
45
+ Requires-Dist: pluggy (>=1.0.0,<2.0.0)
45
46
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
46
47
  Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
48
+ Requires-Dist: pylatexenc (>=2.10,<3.0)
47
49
  Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
48
50
  Requires-Dist: python-docx (>=1.1.2,<2.0.0)
49
51
  Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
@@ -57,12 +59,12 @@ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
57
59
  Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
58
60
  Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
59
61
  Requires-Dist: typer (>=0.12.5,<0.13.0)
60
- Project-URL: Repository, https://github.com/DS4SD/docling
62
+ Project-URL: Repository, https://github.com/docling-project/docling
61
63
  Description-Content-Type: text/markdown
62
64
 
63
65
  <p align="center">
64
- <a href="https://github.com/ds4sd/docling">
65
- <img loading="lazy" alt="Docling" src="https://github.com/DS4SD/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
66
+ <a href="https://github.com/docling-project/docling">
67
+ <img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling/raw/main/docs/assets/docling_processing.png" width="100%"/>
66
68
  </a>
67
69
  </p>
68
70
 
@@ -73,7 +75,7 @@ Description-Content-Type: text/markdown
73
75
  </p>
74
76
 
75
77
  [![arXiv](https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg)](https://arxiv.org/abs/2408.09869)
76
- [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://ds4sd.github.io/docling/)
78
+ [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
77
79
  [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
78
80
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
79
81
  [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
@@ -81,8 +83,9 @@ Description-Content-Type: text/markdown
81
83
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
82
84
  [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
83
85
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
84
- [![License MIT](https://img.shields.io/github/license/DS4SD/docling)](https://opensource.org/licenses/MIT)
86
+ [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
85
87
  [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
88
+ [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
86
89
 
87
90
  Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
88
91
 
@@ -113,7 +116,7 @@ pip install docling
113
116
 
114
117
  Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectures.
115
118
 
116
- More [detailed installation instructions](https://ds4sd.github.io/docling/installation/) are available in the docs.
119
+ More [detailed installation instructions](https://docling-project.github.io/docling/installation/) are available in the docs.
117
120
 
118
121
  ## Getting started
119
122
 
@@ -128,28 +131,54 @@ result = converter.convert(source)
128
131
  print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
129
132
  ```
130
133
 
131
- More [advanced usage options](https://ds4sd.github.io/docling/usage/) are available in
134
+ More [advanced usage options](https://docling-project.github.io/docling/usage/) are available in
132
135
  the docs.
133
136
 
134
137
  ## Documentation
135
138
 
136
- Check out Docling's [documentation](https://ds4sd.github.io/docling/), for details on
139
+ Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
137
140
  installation, usage, concepts, recipes, extensions, and more.
138
141
 
139
142
  ## Examples
140
143
 
141
- Go hands-on with our [examples](https://ds4sd.github.io/docling/examples/),
144
+ Go hands-on with our [examples](https://docling-project.github.io/docling/examples/),
142
145
  demonstrating how to address different application use cases with Docling.
143
146
 
144
147
  ## Integrations
145
148
 
146
149
  To further accelerate your AI application development, check out Docling's native
147
- [integrations](https://ds4sd.github.io/docling/integrations/) with popular frameworks
150
+ [integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
148
151
  and tools.
149
152
 
153
+ ## Apify Actor
154
+
155
+ <a href="https://apify.com/vancura/docling?fpr=docling"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Docling Actor on Apify" width="176" height="39" /></a>
156
+
157
+ You can run Docling in the cloud without installation using the [Docling Actor](https://apify.com/vancura/docling?fpr=docling) on Apify platform. Simply provide a document URL and get the processed result:
158
+
159
+ ```bash
160
+ apify call vancura/docling -i '{
161
+ "options": {
162
+ "to_formats": ["md", "json", "html", "text", "doctags"]
163
+ },
164
+ "http_sources": [
165
+ {"url": "https://vancura.dev/assets/actor-test/facial-hairstyles-and-filtering-facepiece-respirators.pdf"},
166
+ {"url": "https://arxiv.org/pdf/2408.09869"}
167
+ ]
168
+ }'
169
+ ```
170
+
171
+ The Actor stores results in:
172
+
173
+ * Processed document in key-value store (`OUTPUT_RESULT`)
174
+ * Processing logs (`DOCLING_LOG`)
175
+ * Dataset record with result URL and status
176
+
177
+ Read more about the [Docling Actor](.actor/README.md), including how to use it via the Apify API and CLI.
178
+
150
179
  ## Get help and support
151
180
 
152
- Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling/discussions).
181
+ Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
153
182
 
154
183
  ## Technical report
155
184
 
@@ -157,7 +186,7 @@ For more details on Docling's inner workings, check out the [Docling Technical R
157
186
 
158
187
  ## Contributing
159
188
 
160
- Please read [Contributing to Docling](https://github.com/DS4SD/docling/blob/main/CONTRIBUTING.md) for details.
189
+ Please read [Contributing to Docling](https://github.com/docling-project/docling/blob/main/CONTRIBUTING.md) for details.
161
190
 
162
191
  ## References
163
192
 
@@ -185,7 +214,7 @@ For individual model usage, please refer to the model licenses found in the orig
185
214
 
186
215
  Docling has been brought to you by IBM.
187
216
 
188
- [supported_formats]: https://ds4sd.github.io/docling/usage/supported_formats/
189
- [docling_document]: https://ds4sd.github.io/docling/concepts/docling_document/
190
- [integrations]: https://ds4sd.github.io/docling/integrations/
217
+ [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
218
+ [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
219
+ [integrations]: https://docling-project.github.io/docling/integrations/
191
220
 
@@ -0,0 +1,83 @@
1
+ docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
+ docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhXorvZ1k,14060
5
+ docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
6
+ docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
7
+ docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
8
+ docling/backend/docling_parse_v4_backend.py,sha256=sUjcgD62n2Z15gOYhLNAnwkzqSAnlQ8eKkDuVrlK_rk,6002
9
+ docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ docling/backend/docx/latex/latex_dict.py,sha256=a0UC3VLmG1BLN-hGmEaQamzKbDB10fCz0U8qRU--aBw,6613
12
+ docling/backend/docx/latex/omml.py,sha256=U-mQXNCI9ObUyHDxv6ItvaHlObIEu77PiXS1Vaaah6U,12012
13
+ docling/backend/html_backend.py,sha256=i9a5ucsIuf-sn6M8tmKt9Kg_qWqc5OJxhARb6ZNS3wI,19448
14
+ docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
16
+ docling/backend/md_backend.py,sha256=v230PXShYJo2QaabwUHiBpE-EGScHIerjL78zPaJpZM,16837
17
+ docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
18
+ docling/backend/mspowerpoint_backend.py,sha256=wUriELF9wHwThITXxSyseVASe6W6Sw0E7Qg_U-Q3JNU,16434
19
+ docling/backend/msword_backend.py,sha256=uSQJ5PHoTIlw2bcAe8NGWutjgceNYWfg4N1ze17F4D0,23101
20
+ docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
21
+ docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
22
+ docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
24
+ docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
25
+ docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
26
+ docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ docling/cli/main.py,sha256=1N4h1HrNCWEymkqb4_mXyplcdVgVNAR7lRAZFXTiRKk,18310
28
+ docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
29
+ docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
30
+ docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
32
+ docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
33
+ docling/datamodel/pipeline_options.py,sha256=n45Xgl1qnrHZxztd4CyhdDPYa8FygADJ8EpfbUuIlmc,11963
34
+ docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
35
+ docling/document_converter.py,sha256=LwbnfGzma937EmSrNWMzM-dldI9Cbu4DUgY8gL1OVHo,13184
36
+ docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
37
+ docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
39
+ docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
40
+ docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
41
+ docling/models/document_picture_classifier.py,sha256=fz77RsTdlnA_yC47O-KUq2xVWMKX0_9jm_EGcHliw-E,6235
42
+ docling/models/easyocr_model.py,sha256=ezq3yv5lORe7T1bbSoTZALck2oHqyEHq57cRfhMYCCQ,7401
43
+ docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq85Q2K_aM,872
44
+ docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
45
+ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
46
+ docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
47
+ docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
48
+ docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
49
+ docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
50
+ docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
51
+ docling/models/page_preprocessing_model.py,sha256=wAN2WlW7YnpqyETq6MpEWgUAokUwqGaX_g59sPUQsXo,2903
52
+ docling/models/picture_description_api_model.py,sha256=SRjOkCTBYa1pTIaQffDLUPabljjYrLOQ916MywESEXk,3715
53
+ docling/models/picture_description_base_model.py,sha256=uRpjBXC2qjpPyWFUt600N1GvmvF-vWwB8f-OTQ7PfDg,2305
54
+ docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
55
+ docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
57
+ docling/models/rapid_ocr_model.py,sha256=C_I0Ek9mAPIyTFRHuNbqtXg1c15rLNDE1tJ6_hPIi4c,5869
58
+ docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
59
+ docling/models/table_structure_model.py,sha256=_b6-2alzhzI19-thDGpM3mww54mxbHLkEiTYMU84d30,11773
60
+ docling/models/tesseract_ocr_cli_model.py,sha256=S-rCisPrVa3ASvOWycqQoria0PtmNqgdg8YxrLbG1ww,10067
61
+ docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q-WgZN1Eg,9256
62
+ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
+ docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
64
+ docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
65
+ docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
66
+ docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
67
+ docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
+ docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
+ docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
70
+ docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
71
+ docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
72
+ docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
73
+ docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
74
+ docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
75
+ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
76
+ docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
77
+ docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
78
+ docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
79
+ docling-2.27.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
80
+ docling-2.27.0.dist-info/METADATA,sha256=bjSjck82ddDda67NwQaZwW_s9T_jTHw9lE3RhhXf1Y4,10142
81
+ docling-2.27.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
82
+ docling-2.27.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
83
+ docling-2.27.0.dist-info/RECORD,,
@@ -2,3 +2,6 @@
2
2
  docling=docling.cli.main:app
3
3
  docling-tools=docling.cli.tools:app
4
4
 
5
+ [docling]
6
+ docling_defaults=docling.models.plugins.defaults
7
+
@@ -1,72 +0,0 @@
1
- docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
- docling/backend/asciidoc_backend.py,sha256=zyHxlG_BvlLwvpdNca3P6aopxOJZw8wbDFkJQQknNXk,14050
5
- docling/backend/csv_backend.py,sha256=xuId4JGEXjoyPgO9Fy9hQ5C-ezXvJwv0TGB8fyFHgWM,4533
6
- docling/backend/docling_parse_backend.py,sha256=hEEJibI1oJS0LAnFoIs6gMshS3bCqGtVxHnDNvBGZuA,7649
7
- docling/backend/docling_parse_v2_backend.py,sha256=oF8W-zuvEfpmyXp7Itt6-ot_feeMneMmSG7CpKclMhc,9005
8
- docling/backend/html_backend.py,sha256=qLzNpMpfmllwpp-5uARrmaVyN5D1YOpmsbS3-RyL2p0,19370
9
- docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
11
- docling/backend/md_backend.py,sha256=NaVfcnEH-5bwVovjn76EobF6B6Wm8AhaTZ4E8k0TUPo,16826
12
- docling/backend/msexcel_backend.py,sha256=lyJc4ShJGAN2ZfNTTuhdYTF-44cZsGyn_8Djstp3IEU,12700
13
- docling/backend/mspowerpoint_backend.py,sha256=esAyaaQe17BQFweGAGJHvImKETefY0BpvfpUSECC49w,16424
14
- docling/backend/msword_backend.py,sha256=V4miLIcOH8DDlSCm25F_DALBW60Uf9JoSS0TB4yrQBw,20591
15
- docling/backend/pdf_backend.py,sha256=17Pr8dWsD1C4FYUprrwMM9trDGW-JYLjrcScx1Ul4io,2048
16
- docling/backend/pypdfium2_backend.py,sha256=l6YfoiIibw-Z4wrRwQTPP96IGOMAf1SIT_TPVBIuZRs,9663
17
- docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqafCvdr7s,24945
19
- docling/backend/xml/uspto_backend.py,sha256=IGUNeF2xpLeaVrX6nKb-jXgtSYD2ozULsrDPcrI1IbQ,71040
20
- docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
21
- docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- docling/cli/main.py,sha256=pCJ_GFgxsgZ0soz32OhMl-CWi7YXIrvax_m9Qw4UhMs,16839
23
- docling/cli/models.py,sha256=DDnz-boX2MexPxC8OnOMPgSPG0iwseT3xkkCfgPrZis,3969
24
- docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
25
- docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- docling/datamodel/base_models.py,sha256=kMDT-rFhtJUFOOOry4wd2PzCMTLFixFklgSgmRDMS64,7201
27
- docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
28
- docling/datamodel/pipeline_options.py,sha256=YpWqCqkA44YUFPhiBg_LYcfOAXxNhv10vZKrkfLtJ_I,11987
29
- docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
30
- docling/document_converter.py,sha256=AeiSmKzWcnOkZm8O-KIBG72g3l4W2CAsq3yEbfC1tiE,13184
31
- docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
32
- docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- docling/models/base_model.py,sha256=q_lKeQ0FT70idXlZ3JgyAv8dA8J3bZWBSDBkqTzy0lo,2679
34
- docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
35
- docling/models/code_formula_model.py,sha256=6grbRPWaLljadheT5s4omdT6hmXfin4gJU17csWvhjY,8611
36
- docling/models/document_picture_classifier.py,sha256=6I_j6fG5fnhIV6rqN31LYikNTZyg5isXrVs0GIqHDaY,6235
37
- docling/models/easyocr_model.py,sha256=ePg1exAXeOzkBRBT-6PBSmqKFmnNFkCEd4HNDsGVgLM,6860
38
- docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
39
- docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
40
- docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
41
- docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
42
- docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
43
- docling/models/picture_description_api_model.py,sha256=SKNoHpqzbfM8iO-DJJ4ccyNVqO0B2d9neLBnXqt50FY,3186
44
- docling/models/picture_description_base_model.py,sha256=rZLIW1_CaRAw_EP3zuI8ktC0ZxwO7yubhh2RkaC_8e8,1910
45
- docling/models/picture_description_vlm_model.py,sha256=EvKn4zWgTsQnbMFEoDhU3Ox4Pu5DkPqd2QewsGoXULU,3641
46
- docling/models/rapid_ocr_model.py,sha256=2HXmurNRPP6qyqn7U5h9NQIs8zi0TMHf56CpcKQk0fU,5038
47
- docling/models/readingorder_model.py,sha256=hNWbBX3uZv1FxMwKNKn2JFQuQqTspBLsJBVEidXr6Wk,14869
48
- docling/models/table_structure_model.py,sha256=UIqWlw_9JNfGsO86c00rPb4GCg-yNliKEwyhCqlsZbM,11225
49
- docling/models/tesseract_ocr_cli_model.py,sha256=F5EhS4NDEmLkPq-a0P7o2LrzjmJgACzlYXTDvtD3NtY,9343
50
- docling/models/tesseract_ocr_model.py,sha256=ikGu6QNknLG64c9yYIb0Ix6MGhBzOoa1ODbNc8MT5r8,8508
51
- docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
53
- docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
54
- docling/pipeline/standard_pdf_pipeline.py,sha256=IQHktVYvueTrYnIgLonaMvfYKKsU3L-hC9dqrR-Lw8g,12904
55
- docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
56
- docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
57
- docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
59
- docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
60
- docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
61
- docling/utils/layout_postprocessor.py,sha256=kdIk5TpAEXvsQUvkdALBDnAbjc4I_j8s8w6GEvbu4f0,24304
62
- docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
63
- docling/utils/model_downloader.py,sha256=sxAQvjiIu9m2Ur5Ot5C5SATmgWJAHi0xSjzxj8QXYJk,3213
64
- docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
65
- docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
66
- docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
67
- docling/utils/visualization.py,sha256=cmbIroPQXPmJdFrNIfpC26WpijBwx05qmpu3QhiG1EI,2850
68
- docling-2.25.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
69
- docling-2.25.2.dist-info/METADATA,sha256=NsR1pyqk-Q5G5pHrpaLf6TCQEE-r-hGrEB9Hpqdgykk,8803
70
- docling-2.25.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
71
- docling-2.25.2.dist-info/entry_points.txt,sha256=cFrINXsORijdm2EWJzf1m9_rDxH9G9W1fP385-9atY4,84
72
- docling-2.25.2.dist-info/RECORD,,